github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/gcp/tests/bigquery_matcher_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Unit test for Bigquery verifier""" 19 20 # pytype: skip-file 21 22 import logging 23 import unittest 24 25 import mock 26 from hamcrest import assert_that as hc_assert_that 27 28 from apache_beam.io.gcp import bigquery_tools 29 from apache_beam.io.gcp.tests import bigquery_matcher as bq_verifier 30 from apache_beam.testing.test_utils import patch_retry 31 32 # Protect against environments where bigquery library is not available. 33 # pylint: disable=wrong-import-order, wrong-import-position 34 try: 35 from google.cloud import bigquery 36 from google.cloud.exceptions import NotFound 37 except ImportError: 38 bigquery = None 39 NotFound = None 40 # pylint: enable=wrong-import-order, wrong-import-position 41 42 43 @unittest.skipIf(bigquery is None, 'Bigquery dependencies are not installed.') 44 @mock.patch.object(bigquery, 'Client') 45 class BigqueryMatcherTest(unittest.TestCase): 46 def setUp(self): 47 self._mock_result = mock.Mock() 48 patch_retry(self, bq_verifier) 49 50 def test_bigquery_matcher_success(self, mock_bigquery): 51 mock_query_result = [mock.Mock(), mock.Mock(), mock.Mock()] 52 mock_query_result[0].values.return_value = [] 53 mock_query_result[1].values.return_value = None 54 mock_query_result[2].values.return_value = None 55 56 mock_query = mock_bigquery.return_value.query 57 mock_query.return_value.result.return_value = mock_query_result 58 59 matcher = bq_verifier.BigqueryMatcher( 60 'mock_project', 61 'mock_query', 62 '59f9d6bdee30d67ea73b8aded121c3a0280f9cd8') 63 hc_assert_that(self._mock_result, matcher) 64 self.assertEqual(1, mock_query.call_count) 65 66 def test_bigquery_matcher_success_streaming_retry(self, mock_bigquery): 67 # Simulate case where a streaming insert takes time to process, such that 68 # the first query result is incomplete (empty). 69 empty_query_result = [] 70 mock_query_result = [mock.Mock(), mock.Mock(), mock.Mock()] 71 mock_query_result[0].values.return_value = [] 72 mock_query_result[1].values.return_value = None 73 mock_query_result[2].values.return_value = None 74 75 mock_query = mock_bigquery.return_value.query 76 mock_query.return_value.result.side_effect = [ 77 empty_query_result, mock_query_result 78 ] 79 80 matcher = bq_verifier.BigqueryMatcher( 81 'mock_project', 82 'mock_query', 83 '59f9d6bdee30d67ea73b8aded121c3a0280f9cd8', 84 timeout_secs=5, 85 ) 86 hc_assert_that(self._mock_result, matcher) 87 self.assertEqual(2, mock_query.call_count) 88 89 def test_bigquery_matcher_query_error_retry(self, mock_bigquery): 90 mock_query = mock_bigquery.return_value.query 91 mock_query.side_effect = NotFound('table not found') 92 93 matcher = bq_verifier.BigqueryMatcher( 94 'mock_project', 'mock_query', 'mock_checksum') 95 with self.assertRaises(NotFound): 96 hc_assert_that(self._mock_result, matcher) 97 self.assertEqual(bq_verifier.MAX_RETRIES + 1, mock_query.call_count) 98 99 def test_bigquery_matcher_query_error_checksum(self, mock_bigquery): 100 empty_query_result = [] 101 102 mock_query = mock_bigquery.return_value.query 103 mock_query.return_value.result.return_value = empty_query_result 104 105 matcher = bq_verifier.BigqueryMatcher( 106 'mock_project', 107 'mock_query', 108 '59f9d6bdee30d67ea73b8aded121c3a0280f9cd8', 109 ) 110 with self.assertRaisesRegex(AssertionError, r'Expected checksum'): 111 hc_assert_that(self._mock_result, matcher) 112 self.assertEqual(1, mock_query.call_count) 113 114 115 @unittest.skipIf(bigquery is None, 'Bigquery dependencies are not installed.') 116 @mock.patch.object(bigquery_tools, 'BigQueryWrapper') 117 class BigqueryTableMatcherTest(unittest.TestCase): 118 def setUp(self): 119 self._mock_result = mock.Mock() 120 patch_retry(self, bq_verifier) 121 122 def test_bigquery_table_matcher_success(self, mock_bigquery): 123 mock_query_result = mock.Mock( 124 partitioning='a lot of partitioning', clustering={'column': 'FRIENDS'}) 125 126 mock_bigquery.return_value.get_table.return_value = mock_query_result 127 128 matcher = bq_verifier.BigQueryTableMatcher( 129 'mock_project', 130 'mock_dataset', 131 'mock_table', 132 { 133 'partitioning': 'a lot of partitioning', 134 'clustering': { 135 'column': 'FRIENDS' 136 } 137 }) 138 hc_assert_that(self._mock_result, matcher) 139 140 def test_bigquery_table_matcher_query_error_retry(self, mock_bigquery): 141 mock_query = mock_bigquery.return_value.get_table 142 mock_query.side_effect = ValueError('table not found') 143 144 matcher = bq_verifier.BigQueryTableMatcher( 145 'mock_project', 146 'mock_dataset', 147 'mock_table', 148 { 149 'partitioning': 'a lot of partitioning', 150 'clustering': { 151 'column': 'FRIENDS' 152 } 153 }) 154 155 with self.assertRaises(ValueError): 156 hc_assert_that(self._mock_result, matcher) 157 self.assertEqual(bq_verifier.MAX_RETRIES + 1, mock_query.call_count) 158 159 160 @unittest.skipIf(bigquery is None, 'Bigquery dependencies are not installed.') 161 @mock.patch.object(bigquery, 'Client') 162 class BigqueryFullResultStreamingMatcherTest(unittest.TestCase): 163 def setUp(self): 164 self.timeout = 0.01 165 166 def test__get_query_result_timeout(self, mock_bigquery): 167 mock_query = mock_bigquery.return_value.query 168 mock_query.return_value.result.return_value = [] 169 170 matcher = bq_verifier.BigqueryFullResultStreamingMatcher( 171 'some-project', 'some-query', [1, 2, 3], timeout=self.timeout) 172 with self.assertRaises(TimeoutError): # noqa: F821 173 matcher._get_query_result() 174 175 176 if __name__ == '__main__': 177 logging.getLogger().setLevel(logging.INFO) 178 unittest.main()