github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/external/xlang_parquetio_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Unit tests for cross-language parquet io read/write."""
    19  
    20  # pytype: skip-file
    21  
    22  import logging
    23  import os
    24  import re
    25  import unittest
    26  
    27  import apache_beam as beam
    28  from apache_beam import coders
    29  from apache_beam.coders.avro_record import AvroRecord
    30  from apache_beam.options.pipeline_options import DebugOptions
    31  from apache_beam.testing.test_pipeline import TestPipeline
    32  from apache_beam.transforms.external import ImplicitSchemaPayloadBuilder
    33  
    34  PARQUET_WRITE_URN = "beam:transforms:xlang:test:parquet_write"
    35  
    36  
    37  # TODO: enable test_xlang_parquetio_write after fixing BEAM-10507
    38  # @pytest.mark.uses_java_expansion_service
    39  @unittest.skipUnless(
    40      os.environ.get('EXPANSION_JAR'),
    41      "EXPANSION_JAR environment variable is not set.")
    42  @unittest.skipUnless(
    43      os.environ.get('EXPANSION_PORT'),
    44      "EXPANSION_PORT environment var is not provided.")
    45  class XlangParquetIOTest(unittest.TestCase):
    46    # TODO: add verification for the file written by external transform
    47    #  after fixing BEAM-7612
    48    def test_xlang_parquetio_write(self):
    49      expansion_jar = os.environ.get('EXPANSION_JAR')
    50      port = os.environ.get('EXPANSION_PORT')
    51      address = 'localhost:%s' % port
    52      try:
    53        with TestPipeline() as p:
    54          p.get_pipeline_options().view_as(DebugOptions).experiments.append(
    55              'jar_packages=' + expansion_jar)
    56          p.not_use_test_runner_api = True
    57          _ = p \
    58            | beam.Create([
    59                AvroRecord({"name": "abc"}), AvroRecord({"name": "def"}),
    60                AvroRecord({"name": "ghi"})]) \
    61            | beam.ExternalTransform(
    62                PARQUET_WRITE_URN,
    63                ImplicitSchemaPayloadBuilder({'data': u'/tmp/test.parquet'}),
    64                address)
    65      except RuntimeError as e:
    66        if re.search(PARQUET_WRITE_URN, str(e)):
    67          print("looks like URN not implemented in expansion service, skipping.")
    68        else:
    69          raise e
    70  
    71  
    72  class AvroTestCoder(coders.AvroGenericCoder):
    73    SCHEMA = """
    74    {
    75      "type": "record", "name": "testrecord",
    76      "fields": [ {"name": "name", "type": "string"} ]
    77    }
    78    """
    79  
    80    def __init__(self):
    81      super().__init__(self.SCHEMA)
    82  
    83  
    84  coders.registry.register_coder(AvroRecord, AvroTestCoder)
    85  
    86  if __name__ == '__main__':
    87    logging.getLogger().setLevel(logging.INFO)
    88    unittest.main()