github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/gcp/bigquery_schema_tools_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  import logging
    18  import typing
    19  import unittest.mock
    20  
    21  import mock
    22  import numpy as np
    23  
    24  import apache_beam.io.gcp.bigquery
    25  from apache_beam.io.gcp import bigquery_schema_tools
    26  from apache_beam.io.gcp.bigquery_tools import BigQueryWrapper
    27  from apache_beam.io.gcp.internal.clients import bigquery
    28  from apache_beam.options import value_provider
    29  
    30  try:
    31    from apitools.base.py.exceptions import HttpError
    32  except ImportError:
    33    HttpError = None
    34  
    35  
    36  @unittest.skipIf(HttpError is None, 'GCP dependencies are not installed')
    37  class TestBigQueryToSchema(unittest.TestCase):
    38    def test_check_schema_conversions(self):
    39      fields = [
    40          bigquery.TableFieldSchema(name='stn', type='STRING', mode="NULLABLE"),
    41          bigquery.TableFieldSchema(name='temp', type='FLOAT64', mode="REPEATED"),
    42          bigquery.TableFieldSchema(name='count', type='INTEGER', mode=None)
    43      ]
    44      schema = bigquery.TableSchema(fields=fields)
    45  
    46      usertype = bigquery_schema_tools.generate_user_type_from_bq_schema(
    47          the_table_schema=schema)
    48      self.assertEqual(
    49          usertype.__annotations__,
    50          {
    51              'stn': typing.Optional[str],
    52              'temp': typing.Sequence[np.float64],
    53              'count': typing.Optional[np.int64]
    54          })
    55  
    56    def test_check_conversion_with_empty_schema(self):
    57      fields = []
    58      schema = bigquery.TableSchema(fields=fields)
    59  
    60      usertype = bigquery_schema_tools.generate_user_type_from_bq_schema(
    61          the_table_schema=schema)
    62      self.assertEqual(usertype.__annotations__, {})
    63  
    64    def test_check_schema_conversions_with_timestamp(self):
    65      fields = [
    66          bigquery.TableFieldSchema(name='stn', type='STRING', mode="NULLABLE"),
    67          bigquery.TableFieldSchema(name='temp', type='FLOAT64', mode="REPEATED"),
    68          bigquery.TableFieldSchema(
    69              name='times', type='TIMESTAMP', mode="NULLABLE")
    70      ]
    71      schema = bigquery.TableSchema(fields=fields)
    72  
    73      usertype = bigquery_schema_tools.generate_user_type_from_bq_schema(
    74          the_table_schema=schema)
    75      self.assertEqual(
    76          usertype.__annotations__,
    77          {
    78              'stn': typing.Optional[str],
    79              'temp': typing.Sequence[np.float64],
    80              'times': typing.Optional[apache_beam.utils.timestamp.Timestamp]
    81          })
    82  
    83    def test_unsupported_type(self):
    84      fields = [
    85          bigquery.TableFieldSchema(
    86              name='number', type='DOUBLE', mode="NULLABLE"),
    87          bigquery.TableFieldSchema(name='temp', type='FLOAT64', mode="REPEATED"),
    88          bigquery.TableFieldSchema(name='count', type='INTEGER', mode=None)
    89      ]
    90      schema = bigquery.TableSchema(fields=fields)
    91      with self.assertRaisesRegex(ValueError,
    92                                  "Encountered an unsupported type: 'DOUBLE'"):
    93        bigquery_schema_tools.generate_user_type_from_bq_schema(
    94            the_table_schema=schema)
    95  
    96    def test_unsupported_mode(self):
    97      fields = [
    98          bigquery.TableFieldSchema(name='number', type='INTEGER', mode="NESTED"),
    99          bigquery.TableFieldSchema(name='temp', type='FLOAT64', mode="REPEATED"),
   100          bigquery.TableFieldSchema(name='count', type='INTEGER', mode=None)
   101      ]
   102      schema = bigquery.TableSchema(fields=fields)
   103      with self.assertRaisesRegex(ValueError,
   104                                  "Encountered an unsupported mode: 'NESTED'"):
   105        bigquery_schema_tools.generate_user_type_from_bq_schema(
   106            the_table_schema=schema)
   107  
   108    @mock.patch.object(BigQueryWrapper, 'get_table')
   109    def test_bad_schema_public_api_export(self, get_table):
   110      fields = [
   111          bigquery.TableFieldSchema(name='stn', type='DOUBLE', mode="NULLABLE"),
   112          bigquery.TableFieldSchema(name='temp', type='FLOAT64', mode="REPEATED"),
   113          bigquery.TableFieldSchema(name='count', type='INTEGER', mode=None)
   114      ]
   115      schema = bigquery.TableSchema(fields=fields)
   116      table = apache_beam.io.gcp.internal.clients.bigquery.\
   117          bigquery_v2_messages.Table(
   118          schema=schema)
   119      get_table.return_value = table
   120  
   121      with self.assertRaisesRegex(ValueError,
   122                                  "Encountered an unsupported type: 'DOUBLE'"):
   123        p = apache_beam.Pipeline()
   124        pipeline = p | apache_beam.io.gcp.bigquery.ReadFromBigQuery(
   125            table="dataset.sample_table",
   126            method="EXPORT",
   127            project="project",
   128            output_type='BEAM_ROW')
   129        pipeline
   130  
   131    @mock.patch.object(BigQueryWrapper, 'get_table')
   132    def test_bad_schema_public_api_direct_read(self, get_table):
   133      fields = [
   134          bigquery.TableFieldSchema(name='stn', type='DOUBLE', mode="NULLABLE"),
   135          bigquery.TableFieldSchema(name='temp', type='FLOAT64', mode="REPEATED"),
   136          bigquery.TableFieldSchema(name='count', type='INTEGER', mode=None)
   137      ]
   138      schema = bigquery.TableSchema(fields=fields)
   139      table = apache_beam.io.gcp.internal.clients.bigquery. \
   140          bigquery_v2_messages.Table(
   141          schema=schema)
   142      get_table.return_value = table
   143  
   144      with self.assertRaisesRegex(ValueError,
   145                                  "Encountered an unsupported type: 'DOUBLE'"):
   146        p = apache_beam.Pipeline()
   147        pipeline = p | apache_beam.io.gcp.bigquery.ReadFromBigQuery(
   148            table="dataset.sample_table",
   149            method="DIRECT_READ",
   150            project="project",
   151            output_type='BEAM_ROW')
   152        pipeline
   153  
   154    def test_unsupported_value_provider(self):
   155      with self.assertRaisesRegex(TypeError,
   156                                  'ReadFromBigQuery: table must be of type string'
   157                                  '; got ValueProvider instead'):
   158        p = apache_beam.Pipeline()
   159        pipeline = p | apache_beam.io.gcp.bigquery.ReadFromBigQuery(
   160            table=value_provider.ValueProvider(), output_type='BEAM_ROW')
   161        pipeline
   162  
   163    def test_unsupported_callable(self):
   164      def filterTable(table):
   165        if table is not None:
   166          return table
   167  
   168      res = filterTable
   169      with self.assertRaisesRegex(TypeError,
   170                                  'ReadFromBigQuery: table must be of type string'
   171                                  '; got a callable instead'):
   172        p = apache_beam.Pipeline()
   173        pipeline = p | apache_beam.io.gcp.bigquery.ReadFromBigQuery(
   174            table=res, output_type='BEAM_ROW')
   175        pipeline
   176  
   177    def test_unsupported_query_export(self):
   178      with self.assertRaisesRegex(
   179          ValueError,
   180          "Both a query and an output type of 'BEAM_ROW' were specified. "
   181          "'BEAM_ROW' is not currently supported with queries."):
   182        p = apache_beam.Pipeline()
   183        pipeline = p | apache_beam.io.gcp.bigquery.ReadFromBigQuery(
   184            table="project:dataset.sample_table",
   185            method="EXPORT",
   186            query='SELECT name FROM dataset.sample_table',
   187            output_type='BEAM_ROW')
   188        pipeline
   189  
   190    def test_unsupported_query_direct_read(self):
   191      with self.assertRaisesRegex(
   192          ValueError,
   193          "Both a query and an output type of 'BEAM_ROW' were specified. "
   194          "'BEAM_ROW' is not currently supported with queries."):
   195        p = apache_beam.Pipeline()
   196        pipeline = p | apache_beam.io.gcp.bigquery.ReadFromBigQuery(
   197            table="project:dataset.sample_table",
   198            method="DIRECT_READ",
   199            query='SELECT name FROM dataset.sample_table',
   200            output_type='BEAM_ROW')
   201        pipeline
   202  
   203    if __name__ == '__main__':
   204      logging.getLogger().setLevel(logging.INFO)
   205      unittest.main()