github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/gcp/bigquery_schema_tools_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 import logging 18 import typing 19 import unittest.mock 20 21 import mock 22 import numpy as np 23 24 import apache_beam.io.gcp.bigquery 25 from apache_beam.io.gcp import bigquery_schema_tools 26 from apache_beam.io.gcp.bigquery_tools import BigQueryWrapper 27 from apache_beam.io.gcp.internal.clients import bigquery 28 from apache_beam.options import value_provider 29 30 try: 31 from apitools.base.py.exceptions import HttpError 32 except ImportError: 33 HttpError = None 34 35 36 @unittest.skipIf(HttpError is None, 'GCP dependencies are not installed') 37 class TestBigQueryToSchema(unittest.TestCase): 38 def test_check_schema_conversions(self): 39 fields = [ 40 bigquery.TableFieldSchema(name='stn', type='STRING', mode="NULLABLE"), 41 bigquery.TableFieldSchema(name='temp', type='FLOAT64', mode="REPEATED"), 42 bigquery.TableFieldSchema(name='count', type='INTEGER', mode=None) 43 ] 44 schema = bigquery.TableSchema(fields=fields) 45 46 usertype = bigquery_schema_tools.generate_user_type_from_bq_schema( 47 the_table_schema=schema) 48 self.assertEqual( 49 usertype.__annotations__, 50 { 51 'stn': typing.Optional[str], 52 'temp': typing.Sequence[np.float64], 53 'count': typing.Optional[np.int64] 54 }) 55 56 def test_check_conversion_with_empty_schema(self): 57 fields = [] 58 schema = bigquery.TableSchema(fields=fields) 59 60 usertype = bigquery_schema_tools.generate_user_type_from_bq_schema( 61 the_table_schema=schema) 62 self.assertEqual(usertype.__annotations__, {}) 63 64 def test_check_schema_conversions_with_timestamp(self): 65 fields = [ 66 bigquery.TableFieldSchema(name='stn', type='STRING', mode="NULLABLE"), 67 bigquery.TableFieldSchema(name='temp', type='FLOAT64', mode="REPEATED"), 68 bigquery.TableFieldSchema( 69 name='times', type='TIMESTAMP', mode="NULLABLE") 70 ] 71 schema = bigquery.TableSchema(fields=fields) 72 73 usertype = bigquery_schema_tools.generate_user_type_from_bq_schema( 74 the_table_schema=schema) 75 self.assertEqual( 76 usertype.__annotations__, 77 { 78 'stn': typing.Optional[str], 79 'temp': typing.Sequence[np.float64], 80 'times': typing.Optional[apache_beam.utils.timestamp.Timestamp] 81 }) 82 83 def test_unsupported_type(self): 84 fields = [ 85 bigquery.TableFieldSchema( 86 name='number', type='DOUBLE', mode="NULLABLE"), 87 bigquery.TableFieldSchema(name='temp', type='FLOAT64', mode="REPEATED"), 88 bigquery.TableFieldSchema(name='count', type='INTEGER', mode=None) 89 ] 90 schema = bigquery.TableSchema(fields=fields) 91 with self.assertRaisesRegex(ValueError, 92 "Encountered an unsupported type: 'DOUBLE'"): 93 bigquery_schema_tools.generate_user_type_from_bq_schema( 94 the_table_schema=schema) 95 96 def test_unsupported_mode(self): 97 fields = [ 98 bigquery.TableFieldSchema(name='number', type='INTEGER', mode="NESTED"), 99 bigquery.TableFieldSchema(name='temp', type='FLOAT64', mode="REPEATED"), 100 bigquery.TableFieldSchema(name='count', type='INTEGER', mode=None) 101 ] 102 schema = bigquery.TableSchema(fields=fields) 103 with self.assertRaisesRegex(ValueError, 104 "Encountered an unsupported mode: 'NESTED'"): 105 bigquery_schema_tools.generate_user_type_from_bq_schema( 106 the_table_schema=schema) 107 108 @mock.patch.object(BigQueryWrapper, 'get_table') 109 def test_bad_schema_public_api_export(self, get_table): 110 fields = [ 111 bigquery.TableFieldSchema(name='stn', type='DOUBLE', mode="NULLABLE"), 112 bigquery.TableFieldSchema(name='temp', type='FLOAT64', mode="REPEATED"), 113 bigquery.TableFieldSchema(name='count', type='INTEGER', mode=None) 114 ] 115 schema = bigquery.TableSchema(fields=fields) 116 table = apache_beam.io.gcp.internal.clients.bigquery.\ 117 bigquery_v2_messages.Table( 118 schema=schema) 119 get_table.return_value = table 120 121 with self.assertRaisesRegex(ValueError, 122 "Encountered an unsupported type: 'DOUBLE'"): 123 p = apache_beam.Pipeline() 124 pipeline = p | apache_beam.io.gcp.bigquery.ReadFromBigQuery( 125 table="dataset.sample_table", 126 method="EXPORT", 127 project="project", 128 output_type='BEAM_ROW') 129 pipeline 130 131 @mock.patch.object(BigQueryWrapper, 'get_table') 132 def test_bad_schema_public_api_direct_read(self, get_table): 133 fields = [ 134 bigquery.TableFieldSchema(name='stn', type='DOUBLE', mode="NULLABLE"), 135 bigquery.TableFieldSchema(name='temp', type='FLOAT64', mode="REPEATED"), 136 bigquery.TableFieldSchema(name='count', type='INTEGER', mode=None) 137 ] 138 schema = bigquery.TableSchema(fields=fields) 139 table = apache_beam.io.gcp.internal.clients.bigquery. \ 140 bigquery_v2_messages.Table( 141 schema=schema) 142 get_table.return_value = table 143 144 with self.assertRaisesRegex(ValueError, 145 "Encountered an unsupported type: 'DOUBLE'"): 146 p = apache_beam.Pipeline() 147 pipeline = p | apache_beam.io.gcp.bigquery.ReadFromBigQuery( 148 table="dataset.sample_table", 149 method="DIRECT_READ", 150 project="project", 151 output_type='BEAM_ROW') 152 pipeline 153 154 def test_unsupported_value_provider(self): 155 with self.assertRaisesRegex(TypeError, 156 'ReadFromBigQuery: table must be of type string' 157 '; got ValueProvider instead'): 158 p = apache_beam.Pipeline() 159 pipeline = p | apache_beam.io.gcp.bigquery.ReadFromBigQuery( 160 table=value_provider.ValueProvider(), output_type='BEAM_ROW') 161 pipeline 162 163 def test_unsupported_callable(self): 164 def filterTable(table): 165 if table is not None: 166 return table 167 168 res = filterTable 169 with self.assertRaisesRegex(TypeError, 170 'ReadFromBigQuery: table must be of type string' 171 '; got a callable instead'): 172 p = apache_beam.Pipeline() 173 pipeline = p | apache_beam.io.gcp.bigquery.ReadFromBigQuery( 174 table=res, output_type='BEAM_ROW') 175 pipeline 176 177 def test_unsupported_query_export(self): 178 with self.assertRaisesRegex( 179 ValueError, 180 "Both a query and an output type of 'BEAM_ROW' were specified. " 181 "'BEAM_ROW' is not currently supported with queries."): 182 p = apache_beam.Pipeline() 183 pipeline = p | apache_beam.io.gcp.bigquery.ReadFromBigQuery( 184 table="project:dataset.sample_table", 185 method="EXPORT", 186 query='SELECT name FROM dataset.sample_table', 187 output_type='BEAM_ROW') 188 pipeline 189 190 def test_unsupported_query_direct_read(self): 191 with self.assertRaisesRegex( 192 ValueError, 193 "Both a query and an output type of 'BEAM_ROW' were specified. " 194 "'BEAM_ROW' is not currently supported with queries."): 195 p = apache_beam.Pipeline() 196 pipeline = p | apache_beam.io.gcp.bigquery.ReadFromBigQuery( 197 table="project:dataset.sample_table", 198 method="DIRECT_READ", 199 query='SELECT name FROM dataset.sample_table', 200 output_type='BEAM_ROW') 201 pipeline 202 203 if __name__ == '__main__': 204 logging.getLogger().setLevel(logging.INFO) 205 unittest.main()