github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/gcp/tests/bigquery_matcher_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Unit test for Bigquery verifier"""
    19  
    20  # pytype: skip-file
    21  
    22  import logging
    23  import unittest
    24  
    25  import mock
    26  from hamcrest import assert_that as hc_assert_that
    27  
    28  from apache_beam.io.gcp import bigquery_tools
    29  from apache_beam.io.gcp.tests import bigquery_matcher as bq_verifier
    30  from apache_beam.testing.test_utils import patch_retry
    31  
    32  # Protect against environments where bigquery library is not available.
    33  # pylint: disable=wrong-import-order, wrong-import-position
    34  try:
    35    from google.cloud import bigquery
    36    from google.cloud.exceptions import NotFound
    37  except ImportError:
    38    bigquery = None
    39    NotFound = None
    40  # pylint: enable=wrong-import-order, wrong-import-position
    41  
    42  
    43  @unittest.skipIf(bigquery is None, 'Bigquery dependencies are not installed.')
    44  @mock.patch.object(bigquery, 'Client')
    45  class BigqueryMatcherTest(unittest.TestCase):
    46    def setUp(self):
    47      self._mock_result = mock.Mock()
    48      patch_retry(self, bq_verifier)
    49  
    50    def test_bigquery_matcher_success(self, mock_bigquery):
    51      mock_query_result = [mock.Mock(), mock.Mock(), mock.Mock()]
    52      mock_query_result[0].values.return_value = []
    53      mock_query_result[1].values.return_value = None
    54      mock_query_result[2].values.return_value = None
    55  
    56      mock_query = mock_bigquery.return_value.query
    57      mock_query.return_value.result.return_value = mock_query_result
    58  
    59      matcher = bq_verifier.BigqueryMatcher(
    60          'mock_project',
    61          'mock_query',
    62          '59f9d6bdee30d67ea73b8aded121c3a0280f9cd8')
    63      hc_assert_that(self._mock_result, matcher)
    64      self.assertEqual(1, mock_query.call_count)
    65  
    66    def test_bigquery_matcher_success_streaming_retry(self, mock_bigquery):
    67      # Simulate case where a streaming insert takes time to process, such that
    68      # the first query result is incomplete (empty).
    69      empty_query_result = []
    70      mock_query_result = [mock.Mock(), mock.Mock(), mock.Mock()]
    71      mock_query_result[0].values.return_value = []
    72      mock_query_result[1].values.return_value = None
    73      mock_query_result[2].values.return_value = None
    74  
    75      mock_query = mock_bigquery.return_value.query
    76      mock_query.return_value.result.side_effect = [
    77          empty_query_result, mock_query_result
    78      ]
    79  
    80      matcher = bq_verifier.BigqueryMatcher(
    81          'mock_project',
    82          'mock_query',
    83          '59f9d6bdee30d67ea73b8aded121c3a0280f9cd8',
    84          timeout_secs=5,
    85      )
    86      hc_assert_that(self._mock_result, matcher)
    87      self.assertEqual(2, mock_query.call_count)
    88  
    89    def test_bigquery_matcher_query_error_retry(self, mock_bigquery):
    90      mock_query = mock_bigquery.return_value.query
    91      mock_query.side_effect = NotFound('table not found')
    92  
    93      matcher = bq_verifier.BigqueryMatcher(
    94          'mock_project', 'mock_query', 'mock_checksum')
    95      with self.assertRaises(NotFound):
    96        hc_assert_that(self._mock_result, matcher)
    97      self.assertEqual(bq_verifier.MAX_RETRIES + 1, mock_query.call_count)
    98  
    99    def test_bigquery_matcher_query_error_checksum(self, mock_bigquery):
   100      empty_query_result = []
   101  
   102      mock_query = mock_bigquery.return_value.query
   103      mock_query.return_value.result.return_value = empty_query_result
   104  
   105      matcher = bq_verifier.BigqueryMatcher(
   106          'mock_project',
   107          'mock_query',
   108          '59f9d6bdee30d67ea73b8aded121c3a0280f9cd8',
   109      )
   110      with self.assertRaisesRegex(AssertionError, r'Expected checksum'):
   111        hc_assert_that(self._mock_result, matcher)
   112      self.assertEqual(1, mock_query.call_count)
   113  
   114  
   115  @unittest.skipIf(bigquery is None, 'Bigquery dependencies are not installed.')
   116  @mock.patch.object(bigquery_tools, 'BigQueryWrapper')
   117  class BigqueryTableMatcherTest(unittest.TestCase):
   118    def setUp(self):
   119      self._mock_result = mock.Mock()
   120      patch_retry(self, bq_verifier)
   121  
   122    def test_bigquery_table_matcher_success(self, mock_bigquery):
   123      mock_query_result = mock.Mock(
   124          partitioning='a lot of partitioning', clustering={'column': 'FRIENDS'})
   125  
   126      mock_bigquery.return_value.get_table.return_value = mock_query_result
   127  
   128      matcher = bq_verifier.BigQueryTableMatcher(
   129          'mock_project',
   130          'mock_dataset',
   131          'mock_table',
   132          {
   133              'partitioning': 'a lot of partitioning',
   134              'clustering': {
   135                  'column': 'FRIENDS'
   136              }
   137          })
   138      hc_assert_that(self._mock_result, matcher)
   139  
   140    def test_bigquery_table_matcher_query_error_retry(self, mock_bigquery):
   141      mock_query = mock_bigquery.return_value.get_table
   142      mock_query.side_effect = ValueError('table not found')
   143  
   144      matcher = bq_verifier.BigQueryTableMatcher(
   145          'mock_project',
   146          'mock_dataset',
   147          'mock_table',
   148          {
   149              'partitioning': 'a lot of partitioning',
   150              'clustering': {
   151                  'column': 'FRIENDS'
   152              }
   153          })
   154  
   155      with self.assertRaises(ValueError):
   156        hc_assert_that(self._mock_result, matcher)
   157      self.assertEqual(bq_verifier.MAX_RETRIES + 1, mock_query.call_count)
   158  
   159  
   160  @unittest.skipIf(bigquery is None, 'Bigquery dependencies are not installed.')
   161  @mock.patch.object(bigquery, 'Client')
   162  class BigqueryFullResultStreamingMatcherTest(unittest.TestCase):
   163    def setUp(self):
   164      self.timeout = 0.01
   165  
   166    def test__get_query_result_timeout(self, mock_bigquery):
   167      mock_query = mock_bigquery.return_value.query
   168      mock_query.return_value.result.return_value = []
   169  
   170      matcher = bq_verifier.BigqueryFullResultStreamingMatcher(
   171          'some-project', 'some-query', [1, 2, 3], timeout=self.timeout)
   172      with self.assertRaises(TimeoutError):  # noqa: F821
   173        matcher._get_query_result()
   174  
   175  
   176  if __name__ == '__main__':
   177    logging.getLogger().setLevel(logging.INFO)
   178    unittest.main()