github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/gcp/bigquery_avro_tools_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 import logging 18 import unittest 19 20 from fastavro.schema import parse_schema 21 22 from apache_beam.io.gcp import bigquery_avro_tools 23 from apache_beam.io.gcp import bigquery_tools 24 from apache_beam.io.gcp.bigquery_test import HttpError 25 from apache_beam.io.gcp.internal.clients import bigquery 26 27 28 @unittest.skipIf(HttpError is None, 'GCP dependencies are not installed') 29 class TestBigQueryToAvroSchema(unittest.TestCase): 30 def test_convert_bigquery_schema_to_avro_schema(self): 31 subfields = [ 32 bigquery.TableFieldSchema( 33 name="species", type="STRING", mode="NULLABLE"), 34 ] 35 36 fields = [ 37 bigquery.TableFieldSchema( 38 name="number", type="INTEGER", mode="REQUIRED"), 39 bigquery.TableFieldSchema( 40 name="species", type="STRING", mode="NULLABLE"), 41 bigquery.TableFieldSchema(name="quality", 42 type="FLOAT"), # default to NULLABLE 43 bigquery.TableFieldSchema(name="grade", 44 type="FLOAT64"), # default to NULLABLE 45 bigquery.TableFieldSchema(name="quantity", 46 type="INTEGER"), # default to NULLABLE 47 bigquery.TableFieldSchema(name="dependents", 48 type="INT64"), # default to NULLABLE 49 bigquery.TableFieldSchema( 50 name="birthday", type="TIMESTAMP", mode="NULLABLE"), 51 bigquery.TableFieldSchema( 52 name="birthdayMoney", type="NUMERIC", mode="NULLABLE"), 53 bigquery.TableFieldSchema( 54 name="flighted", type="BOOL", mode="NULLABLE"), 55 bigquery.TableFieldSchema( 56 name="flighted2", type="BOOLEAN", mode="NULLABLE"), 57 bigquery.TableFieldSchema(name="sound", type="BYTES", mode="NULLABLE"), 58 bigquery.TableFieldSchema( 59 name="anniversaryDate", type="DATE", mode="NULLABLE"), 60 bigquery.TableFieldSchema( 61 name="anniversaryDatetime", type="DATETIME", mode="NULLABLE"), 62 bigquery.TableFieldSchema( 63 name="anniversaryTime", type="TIME", mode="NULLABLE"), 64 bigquery.TableFieldSchema( 65 name="scion", type="RECORD", mode="NULLABLE", fields=subfields), 66 bigquery.TableFieldSchema( 67 name="family", type="STRUCT", mode="NULLABLE", fields=subfields), 68 bigquery.TableFieldSchema( 69 name="associates", type="RECORD", mode="REPEATED", 70 fields=subfields), 71 bigquery.TableFieldSchema( 72 name="geoPositions", type="GEOGRAPHY", mode="NULLABLE"), 73 ] 74 75 table_schema = bigquery.TableSchema(fields=fields) 76 avro_schema = bigquery_avro_tools.get_record_schema_from_dict_table_schema( 77 "root", bigquery_tools.get_dict_table_schema(table_schema)) 78 79 parsed_schema = parse_schema(avro_schema) 80 self.assertEqual(type(parsed_schema), dict) 81 # names: key -> name, value -> different types allowed 82 names = { 83 "number": 4, 84 "species": 2, 85 "quality": 2, 86 "grade": 2, 87 "quantity": 2, 88 "dependents": 2, 89 "birthday": 2, 90 "birthdayMoney": 2, 91 "flighted": 2, 92 "flighted2": 2, 93 "sound": 2, 94 "anniversaryDate": 2, 95 "anniversaryDatetime": 2, 96 "anniversaryTime": 2, 97 "scion": 2, 98 "family": 2, 99 "associates": 2, 100 "geoPositions": 2, 101 } 102 # simple test case to check if the schema is parsed right. 103 fields = parsed_schema["fields"] 104 for i in range(len(fields)): 105 field_ = fields[i] 106 assert 'name' in field_ and field_['name'] in names 107 self.assertEqual(len(field_['type']), names[field_['name']]) 108 109 110 if __name__ == '__main__': 111 logging.getLogger().setLevel(logging.INFO) 112 unittest.main()