github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/gcp/bigquery_avro_tools_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  import logging
    18  import unittest
    19  
    20  from fastavro.schema import parse_schema
    21  
    22  from apache_beam.io.gcp import bigquery_avro_tools
    23  from apache_beam.io.gcp import bigquery_tools
    24  from apache_beam.io.gcp.bigquery_test import HttpError
    25  from apache_beam.io.gcp.internal.clients import bigquery
    26  
    27  
    28  @unittest.skipIf(HttpError is None, 'GCP dependencies are not installed')
    29  class TestBigQueryToAvroSchema(unittest.TestCase):
    30    def test_convert_bigquery_schema_to_avro_schema(self):
    31      subfields = [
    32          bigquery.TableFieldSchema(
    33              name="species", type="STRING", mode="NULLABLE"),
    34      ]
    35  
    36      fields = [
    37          bigquery.TableFieldSchema(
    38              name="number", type="INTEGER", mode="REQUIRED"),
    39          bigquery.TableFieldSchema(
    40              name="species", type="STRING", mode="NULLABLE"),
    41          bigquery.TableFieldSchema(name="quality",
    42                                    type="FLOAT"),  # default to NULLABLE
    43          bigquery.TableFieldSchema(name="grade",
    44                                    type="FLOAT64"),  # default to NULLABLE
    45          bigquery.TableFieldSchema(name="quantity",
    46                                    type="INTEGER"),  # default to NULLABLE
    47          bigquery.TableFieldSchema(name="dependents",
    48                                    type="INT64"),  # default to NULLABLE
    49          bigquery.TableFieldSchema(
    50              name="birthday", type="TIMESTAMP", mode="NULLABLE"),
    51          bigquery.TableFieldSchema(
    52              name="birthdayMoney", type="NUMERIC", mode="NULLABLE"),
    53          bigquery.TableFieldSchema(
    54              name="flighted", type="BOOL", mode="NULLABLE"),
    55          bigquery.TableFieldSchema(
    56              name="flighted2", type="BOOLEAN", mode="NULLABLE"),
    57          bigquery.TableFieldSchema(name="sound", type="BYTES", mode="NULLABLE"),
    58          bigquery.TableFieldSchema(
    59              name="anniversaryDate", type="DATE", mode="NULLABLE"),
    60          bigquery.TableFieldSchema(
    61              name="anniversaryDatetime", type="DATETIME", mode="NULLABLE"),
    62          bigquery.TableFieldSchema(
    63              name="anniversaryTime", type="TIME", mode="NULLABLE"),
    64          bigquery.TableFieldSchema(
    65              name="scion", type="RECORD", mode="NULLABLE", fields=subfields),
    66          bigquery.TableFieldSchema(
    67              name="family", type="STRUCT", mode="NULLABLE", fields=subfields),
    68          bigquery.TableFieldSchema(
    69              name="associates", type="RECORD", mode="REPEATED",
    70              fields=subfields),
    71          bigquery.TableFieldSchema(
    72              name="geoPositions", type="GEOGRAPHY", mode="NULLABLE"),
    73      ]
    74  
    75      table_schema = bigquery.TableSchema(fields=fields)
    76      avro_schema = bigquery_avro_tools.get_record_schema_from_dict_table_schema(
    77          "root", bigquery_tools.get_dict_table_schema(table_schema))
    78  
    79      parsed_schema = parse_schema(avro_schema)
    80      self.assertEqual(type(parsed_schema), dict)
    81      # names: key -> name, value ->  different types allowed
    82      names = {
    83          "number": 4,
    84          "species": 2,
    85          "quality": 2,
    86          "grade": 2,
    87          "quantity": 2,
    88          "dependents": 2,
    89          "birthday": 2,
    90          "birthdayMoney": 2,
    91          "flighted": 2,
    92          "flighted2": 2,
    93          "sound": 2,
    94          "anniversaryDate": 2,
    95          "anniversaryDatetime": 2,
    96          "anniversaryTime": 2,
    97          "scion": 2,
    98          "family": 2,
    99          "associates": 2,
   100          "geoPositions": 2,
   101      }
   102      # simple test case to check if the schema is parsed right.
   103      fields = parsed_schema["fields"]
   104      for i in range(len(fields)):
   105        field_ = fields[i]
   106        assert 'name' in field_ and field_['name'] in names
   107        self.assertEqual(len(field_['type']), names[field_['name']])
   108  
   109  
   110  if __name__ == '__main__':
   111    logging.getLogger().setLevel(logging.INFO)
   112    unittest.main()