github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/external/xlang_parquetio_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Unit tests for cross-language parquet io read/write.""" 19 20 # pytype: skip-file 21 22 import logging 23 import os 24 import re 25 import unittest 26 27 import apache_beam as beam 28 from apache_beam import coders 29 from apache_beam.coders.avro_record import AvroRecord 30 from apache_beam.options.pipeline_options import DebugOptions 31 from apache_beam.testing.test_pipeline import TestPipeline 32 from apache_beam.transforms.external import ImplicitSchemaPayloadBuilder 33 34 PARQUET_WRITE_URN = "beam:transforms:xlang:test:parquet_write" 35 36 37 # TODO: enable test_xlang_parquetio_write after fixing BEAM-10507 38 # @pytest.mark.uses_java_expansion_service 39 @unittest.skipUnless( 40 os.environ.get('EXPANSION_JAR'), 41 "EXPANSION_JAR environment variable is not set.") 42 @unittest.skipUnless( 43 os.environ.get('EXPANSION_PORT'), 44 "EXPANSION_PORT environment var is not provided.") 45 class XlangParquetIOTest(unittest.TestCase): 46 # TODO: add verification for the file written by external transform 47 # after fixing BEAM-7612 48 def test_xlang_parquetio_write(self): 49 expansion_jar = os.environ.get('EXPANSION_JAR') 50 port = os.environ.get('EXPANSION_PORT') 51 address = 'localhost:%s' % port 52 try: 53 with TestPipeline() as p: 54 p.get_pipeline_options().view_as(DebugOptions).experiments.append( 55 'jar_packages=' + expansion_jar) 56 p.not_use_test_runner_api = True 57 _ = p \ 58 | beam.Create([ 59 AvroRecord({"name": "abc"}), AvroRecord({"name": "def"}), 60 AvroRecord({"name": "ghi"})]) \ 61 | beam.ExternalTransform( 62 PARQUET_WRITE_URN, 63 ImplicitSchemaPayloadBuilder({'data': u'/tmp/test.parquet'}), 64 address) 65 except RuntimeError as e: 66 if re.search(PARQUET_WRITE_URN, str(e)): 67 print("looks like URN not implemented in expansion service, skipping.") 68 else: 69 raise e 70 71 72 class AvroTestCoder(coders.AvroGenericCoder): 73 SCHEMA = """ 74 { 75 "type": "record", "name": "testrecord", 76 "fields": [ {"name": "name", "type": "string"} ] 77 } 78 """ 79 80 def __init__(self): 81 super().__init__(self.SCHEMA) 82 83 84 coders.registry.register_coder(AvroRecord, AvroTestCoder) 85 86 if __name__ == '__main__': 87 logging.getLogger().setLevel(logging.INFO) 88 unittest.main()