github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/gcp/bigquery_file_loads_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Unit tests for BigQuery file loads utilities.""" 19 20 # pytype: skip-file 21 22 import logging 23 import os 24 import secrets 25 import time 26 import unittest 27 28 import mock 29 import pytest 30 from hamcrest.core import assert_that as hamcrest_assert 31 from hamcrest.core.core.allof import all_of 32 from hamcrest.core.core.is_ import is_ 33 from parameterized import param 34 from parameterized import parameterized 35 36 import apache_beam as beam 37 from apache_beam.io.filebasedsink_test import _TestCaseWithTempDirCleanUp 38 from apache_beam.io.gcp import bigquery_file_loads as bqfl 39 from apache_beam.io.gcp import bigquery 40 from apache_beam.io.gcp import bigquery_tools 41 from apache_beam.io.gcp.bigquery import BigQueryDisposition 42 from apache_beam.io.gcp.internal.clients import bigquery as bigquery_api 43 from apache_beam.io.gcp.tests.bigquery_matcher import BigqueryFullResultMatcher 44 from apache_beam.io.gcp.tests.bigquery_matcher import BigqueryFullResultStreamingMatcher 45 from apache_beam.options.pipeline_options import PipelineOptions 46 from apache_beam.options.pipeline_options import StandardOptions 47 from apache_beam.runners.dataflow.test_dataflow_runner import TestDataflowRunner 48 from apache_beam.testing.test_pipeline import TestPipeline 49 from apache_beam.testing.test_stream import TestStream 50 from apache_beam.testing.util import assert_that 51 from apache_beam.testing.util import equal_to 52 from apache_beam.transforms import combiners 53 from apache_beam.transforms.window import TimestampedValue 54 from apache_beam.typehints.typehints import Tuple 55 from apache_beam.utils import timestamp 56 57 try: 58 from apitools.base.py.exceptions import HttpError 59 except ImportError: 60 raise unittest.SkipTest('GCP dependencies are not installed') 61 62 _LOGGER = logging.getLogger(__name__) 63 64 _DESTINATION_ELEMENT_PAIRS = [ 65 # DESTINATION 1 66 ('project1:dataset1.table1', { 67 'name': 'beam', 'language': 'py' 68 }), 69 ('project1:dataset1.table1', { 70 'name': 'beam', 'language': 'java' 71 }), 72 ('project1:dataset1.table1', { 73 'name': 'beam', 'language': 'go' 74 }), 75 ('project1:dataset1.table1', { 76 'name': 'flink', 'language': 'java' 77 }), 78 ('project1:dataset1.table1', { 79 'name': 'flink', 'language': 'scala' 80 }), 81 82 # DESTINATION 3 83 ('project1:dataset1.table3', { 84 'name': 'spark', 'language': 'scala' 85 }), 86 87 # DESTINATION 1 88 ('project1:dataset1.table1', { 89 'name': 'spark', 'language': 'py' 90 }), 91 ('project1:dataset1.table1', { 92 'name': 'spark', 'language': 'scala' 93 }), 94 95 # DESTINATION 2 96 ('project1:dataset1.table2', { 97 'name': 'beam', 'foundation': 'apache' 98 }), 99 ('project1:dataset1.table2', { 100 'name': 'flink', 'foundation': 'apache' 101 }), 102 ('project1:dataset1.table2', { 103 'name': 'spark', 'foundation': 'apache' 104 }), 105 ] 106 107 _DISTINCT_DESTINATIONS = list({elm[0] for elm in _DESTINATION_ELEMENT_PAIRS}) 108 109 _ELEMENTS = [elm[1] for elm in _DESTINATION_ELEMENT_PAIRS] 110 111 _ELEMENTS_SCHEMA = bigquery.WriteToBigQuery.get_dict_table_schema( 112 bigquery_api.TableSchema( 113 fields=[ 114 bigquery_api.TableFieldSchema( 115 name="name", type="STRING", mode="REQUIRED"), 116 bigquery_api.TableFieldSchema(name="language", type="STRING"), 117 bigquery_api.TableFieldSchema(name="foundation", type="STRING"), 118 ])) 119 120 121 class TestWriteRecordsToFile(_TestCaseWithTempDirCleanUp): 122 maxDiff = None 123 124 def _consume_input(self, fn, checks=None): 125 if checks is None: 126 return 127 128 with TestPipeline() as p: 129 output_pcs = ( 130 p 131 | beam.Create(_DESTINATION_ELEMENT_PAIRS, reshuffle=False) 132 | beam.ParDo(fn, self.tmpdir).with_outputs( 133 fn.WRITTEN_FILE_TAG, fn.UNWRITTEN_RECORD_TAG)) 134 135 checks(output_pcs) 136 return output_pcs 137 138 @parameterized.expand([ 139 param(file_format=bigquery_tools.FileFormat.AVRO), 140 param(file_format=bigquery_tools.FileFormat.JSON), 141 param(file_format=None), 142 ]) 143 def test_files_created(self, file_format): 144 """Test that the files are created and written.""" 145 146 fn = bqfl.WriteRecordsToFile( 147 schema=_ELEMENTS_SCHEMA, file_format=file_format) 148 self.tmpdir = self._new_tempdir() 149 150 def check_files_created(output_pcs): 151 dest_file_pc = output_pcs[bqfl.WriteRecordsToFile.WRITTEN_FILE_TAG] 152 153 files = dest_file_pc | "GetFiles" >> beam.Map(lambda x: x[1][0]) 154 file_count = files | "CountFiles" >> combiners.Count.Globally() 155 156 _ = files | "FilesExist" >> beam.Map( 157 lambda x: hamcrest_assert(os.path.exists(x), is_(True))) 158 assert_that(file_count, equal_to([3]), label='check file count') 159 160 destinations = ( 161 dest_file_pc 162 | "GetDests" >> 163 beam.Map(lambda x: bigquery_tools.get_hashable_destination(x[0]))) 164 assert_that( 165 destinations, 166 equal_to(list(_DISTINCT_DESTINATIONS)), 167 label='check destinations ') 168 169 self._consume_input(fn, check_files_created) 170 171 def test_many_files(self): 172 """Forces records to be written to many files. 173 174 For each destination multiple files are necessary. This is because the max 175 file length is very small, so only a couple records fit in each file. 176 """ 177 178 fn = bqfl.WriteRecordsToFile(schema=_ELEMENTS_SCHEMA, max_file_size=50) 179 self.tmpdir = self._new_tempdir() 180 181 def check_many_files(output_pcs): 182 dest_file_pc = output_pcs[bqfl.WriteRecordsToFile.WRITTEN_FILE_TAG] 183 184 files_per_dest = ( 185 dest_file_pc 186 | beam.Map(lambda x: x).with_output_types( 187 beam.typehints.KV[str, Tuple[str, int]]) 188 | combiners.Count.PerKey()) 189 files_per_dest = ( 190 files_per_dest 191 | "GetDests" >> beam.Map( 192 lambda x: (bigquery_tools.get_hashable_destination(x[0]), x[1]))) 193 assert_that( 194 files_per_dest, 195 equal_to([('project1:dataset1.table1', 4), 196 ('project1:dataset1.table2', 2), 197 ('project1:dataset1.table3', 1)])) 198 199 # Check that the files exist 200 _ = dest_file_pc | beam.Map(lambda x: x[1][0]) | beam.Map( 201 lambda x: hamcrest_assert(os.path.exists(x), is_(True))) 202 203 self._consume_input(fn, check_many_files) 204 205 @parameterized.expand([ 206 param(file_format=bigquery_tools.FileFormat.AVRO), 207 param(file_format=bigquery_tools.FileFormat.JSON), 208 ]) 209 def test_records_are_spilled(self, file_format): 210 """Forces records to be written to many files. 211 212 For each destination multiple files are necessary, and at most two files 213 can be created. This forces records to be spilled to the next stage of 214 processing. 215 """ 216 217 fn = bqfl.WriteRecordsToFile( 218 schema=_ELEMENTS_SCHEMA, 219 max_files_per_bundle=2, 220 file_format=file_format) 221 self.tmpdir = self._new_tempdir() 222 223 def check_many_files(output_pcs): 224 dest_file_pc = output_pcs[bqfl.WriteRecordsToFile.WRITTEN_FILE_TAG] 225 spilled_records_pc = output_pcs[ 226 bqfl.WriteRecordsToFile.UNWRITTEN_RECORD_TAG] 227 228 spilled_records_count = (spilled_records_pc | combiners.Count.Globally()) 229 assert_that(spilled_records_count, equal_to([3]), label='spilled count') 230 231 files_per_dest = ( 232 dest_file_pc 233 | beam.Map(lambda x: x).with_output_types( 234 beam.typehints.KV[str, Tuple[str, int]]) 235 | combiners.Count.PerKey()) 236 files_per_dest = ( 237 files_per_dest 238 | "GetDests" >> beam.Map( 239 lambda x: (bigquery_tools.get_hashable_destination(x[0]), x[1]))) 240 241 # Only table1 and table3 get files. table2 records get spilled. 242 assert_that( 243 files_per_dest, 244 equal_to([('project1:dataset1.table1', 1), 245 ('project1:dataset1.table3', 1)]), 246 label='file count') 247 248 # Check that the files exist 249 _ = dest_file_pc | beam.Map(lambda x: x[1][0]) | beam.Map( 250 lambda x: hamcrest_assert(os.path.exists(x), is_(True))) 251 252 self._consume_input(fn, check_many_files) 253 254 255 class TestWriteGroupedRecordsToFile(_TestCaseWithTempDirCleanUp): 256 def _consume_input(self, fn, input, checks): 257 if checks is None: 258 return 259 260 with TestPipeline() as p: 261 res = ( 262 p 263 | beam.Create(input) 264 | beam.GroupByKey() 265 | beam.ParDo(fn, self.tmpdir)) 266 267 checks(res) 268 return res 269 270 @parameterized.expand([ 271 param(file_format=bigquery_tools.FileFormat.AVRO), 272 param(file_format=bigquery_tools.FileFormat.JSON), 273 param(file_format=None), 274 ]) 275 def test_files_are_created(self, file_format): 276 """Test that the files are created and written.""" 277 278 fn = bqfl.WriteGroupedRecordsToFile( 279 schema=_ELEMENTS_SCHEMA, file_format=file_format) 280 self.tmpdir = self._new_tempdir() 281 282 def check_files_created(output_pc): 283 files = output_pc | "GetFiles" >> beam.Map(lambda x: x[1][0]) 284 file_count = files | "CountFiles" >> combiners.Count.Globally() 285 286 _ = files | "FilesExist" >> beam.Map( 287 lambda x: hamcrest_assert(os.path.exists(x), is_(True))) 288 assert_that(file_count, equal_to([3]), label='check file count') 289 290 destinations = ( 291 output_pc 292 | "GetDests" >> 293 beam.Map(lambda x: bigquery_tools.get_hashable_destination(x[0]))) 294 assert_that( 295 destinations, 296 equal_to(list(_DISTINCT_DESTINATIONS)), 297 label='check destinations ') 298 299 self._consume_input(fn, _DESTINATION_ELEMENT_PAIRS, check_files_created) 300 301 def test_multiple_files(self): 302 """Forces records to be written to many files. 303 304 For each destination multiple files are necessary. This is because the max 305 file length is very small, so only a couple records fit in each file. 306 """ 307 fn = bqfl.WriteGroupedRecordsToFile( 308 schema=_ELEMENTS_SCHEMA, max_file_size=50) 309 self.tmpdir = self._new_tempdir() 310 311 def check_multiple_files(output_pc): 312 files_per_dest = output_pc | combiners.Count.PerKey() 313 files_per_dest = ( 314 files_per_dest 315 | "GetDests" >> beam.Map( 316 lambda x: (bigquery_tools.get_hashable_destination(x[0]), x[1]))) 317 assert_that( 318 files_per_dest, 319 equal_to([ 320 ('project1:dataset1.table1', 4), 321 ('project1:dataset1.table2', 2), 322 ('project1:dataset1.table3', 1), 323 ])) 324 325 # Check that the files exist 326 _ = output_pc | beam.Map(lambda x: x[1][0]) | beam.Map(os.path.exists) 327 328 self._consume_input(fn, _DESTINATION_ELEMENT_PAIRS, check_multiple_files) 329 330 331 class TestPartitionFiles(unittest.TestCase): 332 333 _ELEMENTS = [( 334 'destination0', [('file0', 50), ('file1', 50), ('file2', 50), 335 ('file3', 50)]), 336 ('destination1', [('file0', 50), ('file1', 50)])] 337 338 def test_partition(self): 339 partition = bqfl.PartitionFiles.Partition(1000, 1) 340 self.assertEqual(partition.can_accept(50), True) 341 self.assertEqual(partition.can_accept(2000), False) 342 self.assertEqual(partition.can_accept(1000), True) 343 344 partition.add('file1', 50) 345 self.assertEqual(partition.files, ['file1']) 346 self.assertEqual(partition.size, 50) 347 self.assertEqual(partition.can_accept(50), False) 348 self.assertEqual(partition.can_accept(0), False) 349 350 def test_partition_files_dofn_file_split(self): 351 """Force partitions to split based on max_files""" 352 multiple_partitions_result = [('destination0', ['file0', 'file1']), 353 ('destination0', ['file2', 'file3'])] 354 single_partition_result = [('destination1', ['file0', 'file1'])] 355 with TestPipeline() as p: 356 destination_file_pairs = p | beam.Create(self._ELEMENTS, reshuffle=False) 357 partitioned_files = ( 358 destination_file_pairs 359 | beam.ParDo(bqfl.PartitionFiles(1000, 2)).with_outputs( 360 bqfl.PartitionFiles.MULTIPLE_PARTITIONS_TAG, 361 bqfl.PartitionFiles.SINGLE_PARTITION_TAG)) 362 multiple_partitions = partitioned_files[bqfl.PartitionFiles\ 363 .MULTIPLE_PARTITIONS_TAG] 364 single_partition = partitioned_files[bqfl.PartitionFiles\ 365 .SINGLE_PARTITION_TAG] 366 367 assert_that( 368 multiple_partitions, 369 equal_to(multiple_partitions_result), 370 label='CheckMultiplePartitions') 371 assert_that( 372 single_partition, 373 equal_to(single_partition_result), 374 label='CheckSinglePartition') 375 376 def test_partition_files_dofn_size_split(self): 377 """Force partitions to split based on max_partition_size""" 378 multiple_partitions_result = [('destination0', ['file0', 'file1', 'file2']), 379 ('destination0', ['file3'])] 380 single_partition_result = [('destination1', ['file0', 'file1'])] 381 with TestPipeline() as p: 382 destination_file_pairs = p | beam.Create(self._ELEMENTS, reshuffle=False) 383 partitioned_files = ( 384 destination_file_pairs 385 | beam.ParDo(bqfl.PartitionFiles(150, 10)).with_outputs( 386 bqfl.PartitionFiles.MULTIPLE_PARTITIONS_TAG, 387 bqfl.PartitionFiles.SINGLE_PARTITION_TAG)) 388 multiple_partitions = partitioned_files[bqfl.PartitionFiles\ 389 .MULTIPLE_PARTITIONS_TAG] 390 single_partition = partitioned_files[bqfl.PartitionFiles\ 391 .SINGLE_PARTITION_TAG] 392 393 assert_that( 394 multiple_partitions, 395 equal_to(multiple_partitions_result), 396 label='CheckMultiplePartitions') 397 assert_that( 398 single_partition, 399 equal_to(single_partition_result), 400 label='CheckSinglePartition') 401 402 403 class TestBigQueryFileLoads(_TestCaseWithTempDirCleanUp): 404 def test_trigger_load_jobs_with_empty_files(self): 405 destination = "project:dataset.table" 406 empty_files = [] 407 load_job_prefix = "test_prefix" 408 409 with beam.Pipeline() as p: 410 partitions = ( 411 p 412 | beam.Create([(destination, empty_files)]) 413 | beam.ParDo(bqfl.PartitionFiles(1000, 10)).with_outputs( 414 bqfl.PartitionFiles.MULTIPLE_PARTITIONS_TAG, 415 bqfl.PartitionFiles.SINGLE_PARTITION_TAG)) 416 417 _ = ( 418 partitions[bqfl.PartitionFiles.SINGLE_PARTITION_TAG] 419 | beam.ParDo(bqfl.TriggerLoadJobs(), load_job_prefix)) 420 421 def test_records_traverse_transform_with_mocks(self): 422 destination = 'project1:dataset1.table1' 423 424 job_reference = bigquery_api.JobReference() 425 job_reference.projectId = 'project1' 426 job_reference.jobId = 'job_name1' 427 result_job = bigquery_api.Job() 428 result_job.jobReference = job_reference 429 430 mock_job = mock.Mock() 431 mock_job.status.state = 'DONE' 432 mock_job.status.errorResult = None 433 mock_job.jobReference = job_reference 434 435 bq_client = mock.Mock() 436 bq_client.jobs.Get.return_value = mock_job 437 438 bq_client.jobs.Insert.return_value = result_job 439 440 transform = bqfl.BigQueryBatchFileLoads( 441 destination, 442 custom_gcs_temp_location=self._new_tempdir(), 443 test_client=bq_client, 444 validate=False, 445 temp_file_format=bigquery_tools.FileFormat.JSON) 446 447 # Need to test this with the DirectRunner to avoid serializing mocks 448 with TestPipeline('DirectRunner') as p: 449 outputs = p | beam.Create(_ELEMENTS) | transform 450 451 dest_files = outputs[bqfl.BigQueryBatchFileLoads.DESTINATION_FILE_PAIRS] 452 dest_job = outputs[bqfl.BigQueryBatchFileLoads.DESTINATION_JOBID_PAIRS] 453 454 jobs = dest_job | "GetJobs" >> beam.Map(lambda x: x[1]) 455 456 files = dest_files | "GetFiles" >> beam.Map(lambda x: x[1][0]) 457 destinations = ( 458 dest_files 459 | "GetDests" >> beam.Map( 460 lambda x: (bigquery_tools.get_hashable_destination(x[0]), x[1])) 461 | "GetUniques" >> combiners.Count.PerKey() 462 | "GetFinalDests" >> beam.Keys()) 463 464 # All files exist 465 _ = ( 466 files 467 | beam.Map(lambda x: hamcrest_assert(os.path.exists(x), is_(True)))) 468 469 # One file per destination 470 assert_that( 471 files | combiners.Count.Globally(), equal_to([1]), label='CountFiles') 472 473 assert_that( 474 destinations, equal_to([destination]), label='CheckDestinations') 475 476 assert_that(jobs, equal_to([job_reference]), label='CheckJobs') 477 478 def test_load_job_id_used(self): 479 job_reference = bigquery_api.JobReference() 480 job_reference.projectId = 'loadJobProject' 481 job_reference.jobId = 'job_name1' 482 483 result_job = bigquery_api.Job() 484 result_job.jobReference = job_reference 485 486 mock_job = mock.Mock() 487 mock_job.status.state = 'DONE' 488 mock_job.status.errorResult = None 489 mock_job.jobReference = job_reference 490 491 bq_client = mock.Mock() 492 bq_client.jobs.Get.return_value = mock_job 493 494 bq_client.jobs.Insert.return_value = result_job 495 496 transform = bqfl.BigQueryBatchFileLoads( 497 'project1:dataset1.table1', 498 custom_gcs_temp_location=self._new_tempdir(), 499 test_client=bq_client, 500 validate=False, 501 load_job_project_id='loadJobProject') 502 503 with TestPipeline('DirectRunner') as p: 504 outputs = p | beam.Create(_ELEMENTS) | transform 505 jobs = outputs[bqfl.BigQueryBatchFileLoads.DESTINATION_JOBID_PAIRS] \ 506 | "GetJobs" >> beam.Map(lambda x: x[1]) 507 508 assert_that(jobs, equal_to([job_reference]), label='CheckJobProjectIds') 509 510 def test_load_job_id_use_for_copy_job(self): 511 destination = 'project1:dataset1.table1' 512 513 job_reference = bigquery_api.JobReference() 514 job_reference.projectId = 'loadJobProject' 515 job_reference.jobId = 'job_name1' 516 result_job = mock.Mock() 517 result_job.jobReference = job_reference 518 519 mock_job = mock.Mock() 520 mock_job.status.state = 'DONE' 521 mock_job.status.errorResult = None 522 mock_job.jobReference = job_reference 523 524 bq_client = mock.Mock() 525 bq_client.jobs.Get.return_value = mock_job 526 527 bq_client.jobs.Insert.return_value = result_job 528 bq_client.tables.Delete.return_value = None 529 530 with TestPipeline('DirectRunner') as p: 531 outputs = ( 532 p 533 | beam.Create(_ELEMENTS, reshuffle=False) 534 | bqfl.BigQueryBatchFileLoads( 535 destination, 536 custom_gcs_temp_location=self._new_tempdir(), 537 test_client=bq_client, 538 validate=False, 539 temp_file_format=bigquery_tools.FileFormat.JSON, 540 max_file_size=45, 541 max_partition_size=80, 542 max_files_per_partition=2, 543 load_job_project_id='loadJobProject')) 544 545 dest_copy_jobs = outputs[ 546 bqfl.BigQueryBatchFileLoads.DESTINATION_COPY_JOBID_PAIRS] 547 548 copy_jobs = dest_copy_jobs | "GetCopyJobs" >> beam.Map(lambda x: x[1]) 549 550 assert_that( 551 copy_jobs, 552 equal_to([ 553 job_reference, 554 job_reference, 555 job_reference, 556 job_reference, 557 job_reference, 558 job_reference 559 ]), 560 label='CheckCopyJobProjectIds') 561 562 @mock.patch('time.sleep') 563 def test_wait_for_load_job_completion(self, sleep_mock): 564 job_1 = bigquery_api.Job() 565 job_1.jobReference = bigquery_api.JobReference() 566 job_1.jobReference.projectId = 'project1' 567 job_1.jobReference.jobId = 'jobId1' 568 job_2 = bigquery_api.Job() 569 job_2.jobReference = bigquery_api.JobReference() 570 job_2.jobReference.projectId = 'project1' 571 job_2.jobReference.jobId = 'jobId2' 572 573 job_1_waiting = mock.Mock() 574 job_1_waiting.status.state = 'RUNNING' 575 job_2_done = mock.Mock() 576 job_2_done.status.state = 'DONE' 577 job_2_done.status.errorResult = None 578 579 job_1_done = mock.Mock() 580 job_1_done.status.state = 'DONE' 581 job_1_done.status.errorResult = None 582 583 bq_client = mock.Mock() 584 bq_client.jobs.Get.side_effect = [ 585 job_1_waiting, job_2_done, job_1_done, job_2_done 586 ] 587 partition_1 = ('project:dataset.table0', ['file0']) 588 partition_2 = ('project:dataset.table1', ['file1']) 589 bq_client.jobs.Insert.side_effect = [job_1, job_2] 590 test_job_prefix = "test_job" 591 592 expected_dest_jobref_list = [(partition_1[0], job_1.jobReference), 593 (partition_2[0], job_2.jobReference)] 594 with TestPipeline('DirectRunner') as p: 595 partitions = p | beam.Create([partition_1, partition_2]) 596 outputs = ( 597 partitions 598 | beam.ParDo( 599 bqfl.TriggerLoadJobs(test_client=bq_client), test_job_prefix)) 600 601 assert_that(outputs, equal_to(expected_dest_jobref_list)) 602 603 sleep_mock.assert_called_once() 604 605 @mock.patch('time.sleep') 606 def test_one_load_job_failed_after_waiting(self, sleep_mock): 607 job_1 = bigquery_api.Job() 608 job_1.jobReference = bigquery_api.JobReference() 609 job_1.jobReference.projectId = 'project1' 610 job_1.jobReference.jobId = 'jobId1' 611 job_2 = bigquery_api.Job() 612 job_2.jobReference = bigquery_api.JobReference() 613 job_2.jobReference.projectId = 'project1' 614 job_2.jobReference.jobId = 'jobId2' 615 616 job_1_waiting = mock.Mock() 617 job_1_waiting.status.state = 'RUNNING' 618 job_2_done = mock.Mock() 619 job_2_done.status.state = 'DONE' 620 job_2_done.status.errorResult = None 621 622 job_1_error = mock.Mock() 623 job_1_error.status.state = 'DONE' 624 job_1_error.status.errorResult = 'Some problems happened' 625 626 bq_client = mock.Mock() 627 bq_client.jobs.Get.side_effect = [ 628 job_1_waiting, job_2_done, job_1_error, job_2_done 629 ] 630 partition_1 = ('project:dataset.table0', ['file0']) 631 partition_2 = ('project:dataset.table1', ['file1']) 632 bq_client.jobs.Insert.side_effect = [job_1, job_2] 633 test_job_prefix = "test_job" 634 635 with self.assertRaises(Exception): 636 with TestPipeline('DirectRunner') as p: 637 partitions = p | beam.Create([partition_1, partition_2]) 638 _ = ( 639 partitions 640 | beam.ParDo( 641 bqfl.TriggerLoadJobs(test_client=bq_client), test_job_prefix)) 642 643 sleep_mock.assert_called_once() 644 645 def test_multiple_partition_files(self): 646 destination = 'project1:dataset1.table1' 647 648 job_reference = bigquery_api.JobReference() 649 job_reference.projectId = 'project1' 650 job_reference.jobId = 'job_name1' 651 result_job = mock.Mock() 652 result_job.jobReference = job_reference 653 654 mock_job = mock.Mock() 655 mock_job.status.state = 'DONE' 656 mock_job.status.errorResult = None 657 mock_job.jobReference = job_reference 658 659 bq_client = mock.Mock() 660 bq_client.jobs.Get.return_value = mock_job 661 662 bq_client.jobs.Insert.return_value = result_job 663 bq_client.tables.Delete.return_value = None 664 665 with TestPipeline('DirectRunner') as p: 666 outputs = ( 667 p 668 | beam.Create(_ELEMENTS, reshuffle=False) 669 | bqfl.BigQueryBatchFileLoads( 670 destination, 671 custom_gcs_temp_location=self._new_tempdir(), 672 test_client=bq_client, 673 validate=False, 674 temp_file_format=bigquery_tools.FileFormat.JSON, 675 max_file_size=45, 676 max_partition_size=80, 677 max_files_per_partition=2)) 678 679 dest_files = outputs[bqfl.BigQueryBatchFileLoads.DESTINATION_FILE_PAIRS] 680 dest_load_jobs = outputs[ 681 bqfl.BigQueryBatchFileLoads.DESTINATION_JOBID_PAIRS] 682 dest_copy_jobs = outputs[ 683 bqfl.BigQueryBatchFileLoads.DESTINATION_COPY_JOBID_PAIRS] 684 685 load_jobs = dest_load_jobs | "GetLoadJobs" >> beam.Map(lambda x: x[1]) 686 copy_jobs = dest_copy_jobs | "GetCopyJobs" >> beam.Map(lambda x: x[1]) 687 688 files = dest_files | "GetFiles" >> beam.Map(lambda x: x[1][0]) 689 destinations = ( 690 dest_files 691 | "GetDests" >> beam.Map( 692 lambda x: (bigquery_tools.get_hashable_destination(x[0]), x[1])) 693 | "GetUniques" >> combiners.Count.PerKey() 694 | "GetFinalDests" >> beam.Keys()) 695 696 # All files exist 697 _ = ( 698 files 699 | beam.Map(lambda x: hamcrest_assert(os.path.exists(x), is_(True)))) 700 701 # One file per destination 702 assert_that( 703 files | "CountFiles" >> combiners.Count.Globally(), 704 equal_to([6]), 705 label='CheckFileCount') 706 707 assert_that( 708 destinations, equal_to([destination]), label='CheckDestinations') 709 710 assert_that( 711 load_jobs | "CountLoadJobs" >> combiners.Count.Globally(), 712 equal_to([6]), 713 label='CheckLoadJobCount') 714 assert_that( 715 copy_jobs | "CountCopyJobs" >> combiners.Count.Globally(), 716 equal_to([6]), 717 label='CheckCopyJobCount') 718 719 @parameterized.expand([ 720 param(write_disposition=BigQueryDisposition.WRITE_TRUNCATE), 721 param(write_disposition=BigQueryDisposition.WRITE_EMPTY) 722 ]) 723 @mock.patch( 724 'apache_beam.io.gcp.bigquery_file_loads.TriggerCopyJobs.process', 725 wraps=lambda *x: None) 726 def test_multiple_partition_files_write_dispositions( 727 self, mock_call_process, write_disposition): 728 destination = 'project1:dataset1.table1' 729 730 job_reference = bigquery_api.JobReference() 731 job_reference.projectId = 'project1' 732 job_reference.jobId = 'job_name1' 733 result_job = mock.Mock() 734 result_job.jobReference = job_reference 735 736 mock_job = mock.Mock() 737 mock_job.status.state = 'DONE' 738 mock_job.status.errorResult = None 739 mock_job.jobReference = job_reference 740 741 bq_client = mock.Mock() 742 bq_client.jobs.Get.return_value = mock_job 743 744 bq_client.jobs.Insert.return_value = result_job 745 bq_client.tables.Delete.return_value = None 746 747 with TestPipeline('DirectRunner') as p: 748 _ = ( 749 p 750 | beam.Create(_ELEMENTS, reshuffle=False) 751 | bqfl.BigQueryBatchFileLoads( 752 destination, 753 custom_gcs_temp_location=self._new_tempdir(), 754 test_client=bq_client, 755 validate=False, 756 temp_file_format=bigquery_tools.FileFormat.JSON, 757 max_file_size=45, 758 max_partition_size=80, 759 max_files_per_partition=2, 760 write_disposition=write_disposition)) 761 # TriggerCopyJob only processes once 762 self.assertEqual(mock_call_process.call_count, 1) 763 764 @parameterized.expand([ 765 param(is_streaming=False, with_auto_sharding=False), 766 param(is_streaming=True, with_auto_sharding=False), 767 param(is_streaming=True, with_auto_sharding=True), 768 ]) 769 def test_triggering_frequency(self, is_streaming, with_auto_sharding): 770 destination = 'project1:dataset1.table1' 771 772 job_reference = bigquery_api.JobReference() 773 job_reference.projectId = 'project1' 774 job_reference.jobId = 'job_name1' 775 result_job = bigquery_api.Job() 776 result_job.jobReference = job_reference 777 778 mock_job = mock.Mock() 779 mock_job.status.state = 'DONE' 780 mock_job.status.errorResult = None 781 mock_job.jobReference = job_reference 782 783 bq_client = mock.Mock() 784 bq_client.jobs.Get.return_value = mock_job 785 bq_client.jobs.Insert.return_value = result_job 786 787 # Insert a fake clock to work with auto-sharding which needs a processing 788 # time timer. 789 class _FakeClock(object): 790 def __init__(self, now=time.time()): 791 self._now = now 792 793 def __call__(self): 794 return self._now 795 796 start_time = timestamp.Timestamp(0) 797 bq_client.test_clock = _FakeClock(now=start_time) 798 799 triggering_frequency = 20 if is_streaming else None 800 transform = bqfl.BigQueryBatchFileLoads( 801 destination, 802 custom_gcs_temp_location=self._new_tempdir(), 803 test_client=bq_client, 804 validate=False, 805 temp_file_format=bigquery_tools.FileFormat.JSON, 806 is_streaming_pipeline=is_streaming, 807 triggering_frequency=triggering_frequency, 808 with_auto_sharding=with_auto_sharding) 809 810 # Need to test this with the DirectRunner to avoid serializing mocks 811 test_options = PipelineOptions(flags=['--allow_unsafe_triggers']) 812 test_options.view_as(StandardOptions).streaming = is_streaming 813 with TestPipeline(runner='BundleBasedDirectRunner', 814 options=test_options) as p: 815 if is_streaming: 816 _SIZE = len(_ELEMENTS) 817 fisrt_batch = [ 818 TimestampedValue(value, start_time + i + 1) for i, 819 value in enumerate(_ELEMENTS[:_SIZE // 2]) 820 ] 821 second_batch = [ 822 TimestampedValue(value, start_time + _SIZE // 2 + i + 1) for i, 823 value in enumerate(_ELEMENTS[_SIZE // 2:]) 824 ] 825 # Advance processing time between batches of input elements to fire the 826 # user triggers. Intentionally advance the processing time twice for the 827 # auto-sharding case since we need to first fire the timer and then 828 # fire the trigger. 829 test_stream = ( 830 TestStream().advance_watermark_to(start_time).add_elements( 831 fisrt_batch).advance_processing_time( 832 30).advance_processing_time(30).add_elements(second_batch). 833 advance_processing_time(30).advance_processing_time( 834 30).advance_watermark_to_infinity()) 835 input = p | test_stream 836 else: 837 input = p | beam.Create(_ELEMENTS) 838 outputs = input | transform 839 840 dest_files = outputs[bqfl.BigQueryBatchFileLoads.DESTINATION_FILE_PAIRS] 841 dest_job = outputs[bqfl.BigQueryBatchFileLoads.DESTINATION_JOBID_PAIRS] 842 843 files = dest_files | "GetFiles" >> beam.Map(lambda x: x[1][0]) 844 destinations = ( 845 dest_files 846 | "GetDests" >> beam.Map( 847 lambda x: (bigquery_tools.get_hashable_destination(x[0]), x[1])) 848 | "GetUniques" >> combiners.Count.PerKey() 849 | "GetFinalDests" >> beam.Keys()) 850 jobs = dest_job | "GetJobs" >> beam.Map(lambda x: x[1]) 851 852 # Check that all files exist. 853 _ = ( 854 files 855 | beam.Map(lambda x: hamcrest_assert(os.path.exists(x), is_(True)))) 856 857 # Expect two load jobs are generated in the streaming case due to the 858 # triggering frequency. Grouping is per trigger so we expect two entries 859 # in the output as opposed to one. 860 file_count = files | combiners.Count.Globally().without_defaults() 861 expected_file_count = [1, 1] if is_streaming else [1] 862 expected_destinations = [destination, destination 863 ] if is_streaming else [destination] 864 expected_jobs = [job_reference, job_reference 865 ] if is_streaming else [job_reference] 866 assert_that(file_count, equal_to(expected_file_count), label='CountFiles') 867 assert_that( 868 destinations, 869 equal_to(expected_destinations), 870 label='CheckDestinations') 871 assert_that(jobs, equal_to(expected_jobs), label='CheckJobs') 872 873 874 class BigQueryFileLoadsIT(unittest.TestCase): 875 876 BIG_QUERY_DATASET_ID = 'python_bq_file_loads_' 877 BIG_QUERY_SCHEMA = ( 878 '{"fields": [{"name": "name","type": "STRING"},' 879 '{"name": "language","type": "STRING"}]}') 880 881 BIG_QUERY_SCHEMA_2 = ( 882 '{"fields": [{"name": "name","type": "STRING"},' 883 '{"name": "foundation","type": "STRING"}]}') 884 885 BIG_QUERY_STREAMING_SCHEMA = ({ 886 'fields': [{ 887 'name': 'Integr', 'type': 'INTEGER', 'mode': 'NULLABLE' 888 }] 889 }) 890 891 def setUp(self): 892 self.test_pipeline = TestPipeline(is_integration_test=True) 893 self.runner_name = type(self.test_pipeline.runner).__name__ 894 self.project = self.test_pipeline.get_option('project') 895 896 self.dataset_id = '%s%d%s' % ( 897 self.BIG_QUERY_DATASET_ID, int(time.time()), secrets.token_hex(3)) 898 self.bigquery_client = bigquery_tools.BigQueryWrapper() 899 self.bigquery_client.get_or_create_dataset(self.project, self.dataset_id) 900 self.output_table = "%s.output_table" % (self.dataset_id) 901 _LOGGER.info( 902 "Created dataset %s in project %s", self.dataset_id, self.project) 903 904 @pytest.mark.it_postcommit 905 def test_multiple_destinations_transform(self): 906 output_table_1 = '%s%s' % (self.output_table, 1) 907 output_table_2 = '%s%s' % (self.output_table, 2) 908 output_table_3 = '%s%s' % (self.output_table, 3) 909 output_table_4 = '%s%s' % (self.output_table, 4) 910 schema1 = bigquery.WriteToBigQuery.get_dict_table_schema( 911 bigquery_tools.parse_table_schema_from_json(self.BIG_QUERY_SCHEMA)) 912 schema2 = bigquery.WriteToBigQuery.get_dict_table_schema( 913 bigquery_tools.parse_table_schema_from_json(self.BIG_QUERY_SCHEMA_2)) 914 915 schema_kv_pairs = [(output_table_1, schema1), (output_table_2, schema2), 916 (output_table_3, schema1), (output_table_4, schema2)] 917 pipeline_verifiers = [ 918 BigqueryFullResultMatcher( 919 project=self.project, 920 query="SELECT name, language FROM %s" % output_table_1, 921 data=[(d['name'], d['language']) for d in _ELEMENTS 922 if 'language' in d]), 923 BigqueryFullResultMatcher( 924 project=self.project, 925 query="SELECT name, foundation FROM %s" % output_table_2, 926 data=[(d['name'], d['foundation']) for d in _ELEMENTS 927 if 'foundation' in d]), 928 BigqueryFullResultMatcher( 929 project=self.project, 930 query="SELECT name, language FROM %s" % output_table_3, 931 data=[(d['name'], d['language']) for d in _ELEMENTS 932 if 'language' in d]), 933 BigqueryFullResultMatcher( 934 project=self.project, 935 query="SELECT name, foundation FROM %s" % output_table_4, 936 data=[(d['name'], d['foundation']) for d in _ELEMENTS 937 if 'foundation' in d]) 938 ] 939 940 args = self.test_pipeline.get_full_options_as_args( 941 on_success_matcher=all_of(*pipeline_verifiers)) 942 943 with beam.Pipeline(argv=args) as p: 944 input = p | beam.Create(_ELEMENTS, reshuffle=False) 945 946 schema_map_pcv = beam.pvalue.AsDict( 947 p | "MakeSchemas" >> beam.Create(schema_kv_pairs)) 948 949 table_record_pcv = beam.pvalue.AsDict( 950 p | "MakeTables" >> beam.Create([('table1', output_table_1), 951 ('table2', output_table_2)])) 952 953 # Get all input in same machine 954 input = ( 955 input 956 | beam.Map(lambda x: (None, x)) 957 | beam.GroupByKey() 958 | beam.FlatMap(lambda elm: elm[1])) 959 960 _ = ( 961 input | "WriteWithMultipleDestsFreely" >> bigquery.WriteToBigQuery( 962 table=lambda x, 963 tables: 964 (tables['table1'] if 'language' in x else tables['table2']), 965 table_side_inputs=(table_record_pcv, ), 966 schema=lambda dest, 967 schema_map: schema_map.get(dest, None), 968 schema_side_inputs=(schema_map_pcv, ), 969 create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED, 970 write_disposition=beam.io.BigQueryDisposition.WRITE_EMPTY)) 971 972 _ = ( 973 input | "WriteWithMultipleDests" >> bigquery.WriteToBigQuery( 974 table=lambda x: 975 (output_table_3 if 'language' in x else output_table_4), 976 schema=lambda dest, 977 schema_map: schema_map.get(dest, None), 978 schema_side_inputs=(schema_map_pcv, ), 979 create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED, 980 write_disposition=beam.io.BigQueryDisposition.WRITE_EMPTY, 981 max_file_size=20, 982 max_files_per_bundle=-1)) 983 984 @pytest.mark.it_postcommit 985 def test_bqfl_streaming(self): 986 if isinstance(self.test_pipeline.runner, TestDataflowRunner): 987 self.skipTest("TestStream is not supported on TestDataflowRunner") 988 output_table = '%s_%s' % (self.output_table, 'ints') 989 _SIZE = 100 990 schema = self.BIG_QUERY_STREAMING_SCHEMA 991 l = [{'Integr': i} for i in range(_SIZE)] 992 993 bq_matcher = BigqueryFullResultStreamingMatcher( 994 project=self.project, 995 query="SELECT Integr FROM %s" % output_table, 996 data=[(i, ) for i in range(100)]) 997 998 args = self.test_pipeline.get_full_options_as_args( 999 on_success_matcher=bq_matcher, 1000 streaming=True, 1001 allow_unsafe_triggers=True) 1002 with beam.Pipeline(argv=args) as p: 1003 stream_source = ( 1004 TestStream().advance_watermark_to(0).advance_processing_time( 1005 100).add_elements(l[:_SIZE // 4]). 1006 advance_processing_time(100).advance_watermark_to(100).add_elements( 1007 l[_SIZE // 4:2 * _SIZE // 4]).advance_processing_time( 1008 100).advance_watermark_to(200).add_elements( 1009 l[2 * _SIZE // 4:3 * _SIZE // 4]).advance_processing_time( 1010 100).advance_watermark_to(300).add_elements( 1011 l[3 * _SIZE // 4:]).advance_processing_time( 1012 100).advance_watermark_to_infinity()) 1013 _ = (p 1014 | stream_source 1015 | bigquery.WriteToBigQuery(output_table, 1016 schema=schema, 1017 method=bigquery.WriteToBigQuery \ 1018 .Method.FILE_LOADS, 1019 triggering_frequency=100)) 1020 1021 hamcrest_assert(p, bq_matcher) 1022 1023 @pytest.mark.it_postcommit 1024 def test_bqfl_streaming_with_copy_jobs(self): 1025 if isinstance(self.test_pipeline.runner, TestDataflowRunner): 1026 self.skipTest("TestStream is not supported on TestDataflowRunner") 1027 output_table = '%s_%s' % (self.output_table, 'with_copy_jobs') 1028 _SIZE = 100 1029 schema = self.BIG_QUERY_STREAMING_SCHEMA 1030 l = [{'Integr': i} for i in range(_SIZE)] 1031 1032 bq_matcher = BigqueryFullResultStreamingMatcher( 1033 project=self.project, 1034 query="SELECT Integr FROM %s" % output_table, 1035 data=[(i, ) for i in range(100)]) 1036 1037 args = self.test_pipeline.get_full_options_as_args( 1038 on_success_matcher=bq_matcher, 1039 streaming=True, 1040 allow_unsafe_triggers=True) 1041 1042 # Override these parameters to induce copy jobs 1043 bqfl._DEFAULT_MAX_FILE_SIZE = 100 1044 bqfl._MAXIMUM_LOAD_SIZE = 400 1045 1046 with beam.Pipeline(argv=args) as p: 1047 stream_source = ( 1048 TestStream().advance_watermark_to(0).advance_processing_time( 1049 100).add_elements(l[:_SIZE // 4]). 1050 advance_processing_time(100).advance_watermark_to(100).add_elements( 1051 l[_SIZE // 4:2 * _SIZE // 4]).advance_processing_time( 1052 100).advance_watermark_to(200).add_elements( 1053 l[2 * _SIZE // 4:3 * _SIZE // 4]).advance_processing_time( 1054 100).advance_watermark_to(300).add_elements( 1055 l[3 * _SIZE // 4:]).advance_processing_time(100). 1056 advance_watermark_to_infinity().advance_processing_time(100)) 1057 1058 _ = (p 1059 | stream_source 1060 | bigquery.WriteToBigQuery(output_table, 1061 schema=schema, 1062 method=bigquery.WriteToBigQuery \ 1063 .Method.FILE_LOADS, 1064 triggering_frequency=100)) 1065 1066 hamcrest_assert(p, bq_matcher) 1067 1068 @pytest.mark.it_postcommit 1069 def test_bqfl_streaming_with_dynamic_destinations(self): 1070 if isinstance(self.test_pipeline.runner, TestDataflowRunner): 1071 self.skipTest("TestStream is not supported on TestDataflowRunner") 1072 even_table = '%s_%s' % (self.output_table, "dynamic_dest_0") 1073 odd_table = '%s_%s' % (self.output_table, "dynamic_dest_1") 1074 output_table = lambda row: even_table if ( 1075 row['Integr'] % 2 == 0) else odd_table 1076 _SIZE = 100 1077 schema = self.BIG_QUERY_STREAMING_SCHEMA 1078 l = [{'Integr': i} for i in range(_SIZE)] 1079 1080 pipeline_verifiers = [ 1081 BigqueryFullResultStreamingMatcher( 1082 project=self.project, 1083 query="SELECT Integr FROM %s" % even_table, 1084 data=[(i, ) for i in range(0, 100, 2)]), 1085 BigqueryFullResultStreamingMatcher( 1086 project=self.project, 1087 query="SELECT Integr FROM %s" % odd_table, 1088 data=[(i, ) for i in range(1, 100, 2)]) 1089 ] 1090 1091 args = self.test_pipeline.get_full_options_as_args( 1092 on_success_matcher=all_of(*pipeline_verifiers), 1093 streaming=True, 1094 allow_unsafe_triggers=True) 1095 1096 with beam.Pipeline(argv=args) as p: 1097 stream_source = ( 1098 TestStream().advance_watermark_to(0).advance_processing_time( 1099 100).add_elements(l[:_SIZE // 4]). 1100 advance_processing_time(100).advance_watermark_to(100).add_elements( 1101 l[_SIZE // 4:2 * _SIZE // 4]).advance_processing_time( 1102 100).advance_watermark_to(200).add_elements( 1103 l[2 * _SIZE // 4:3 * _SIZE // 4]).advance_processing_time( 1104 100).advance_watermark_to(300).add_elements( 1105 l[3 * _SIZE // 4:]).advance_processing_time(100). 1106 advance_watermark_to_infinity().advance_processing_time(100)) 1107 1108 _ = (p 1109 | stream_source 1110 | bigquery.WriteToBigQuery(output_table, 1111 schema=schema, 1112 method=bigquery.WriteToBigQuery \ 1113 .Method.FILE_LOADS, 1114 triggering_frequency=100)) 1115 hamcrest_assert(p, all_of(*pipeline_verifiers)) 1116 1117 @pytest.mark.it_postcommit 1118 def test_one_job_fails_all_jobs_fail(self): 1119 1120 # If one of the import jobs fails, then other jobs must not be performed. 1121 # This is to avoid reinsertion of some records when a pipeline fails and 1122 # is rerun. 1123 output_table_1 = '%s%s' % (self.output_table, 1) 1124 output_table_2 = '%s%s' % (self.output_table, 2) 1125 1126 self.bigquery_client.get_or_create_table( 1127 self.project, 1128 self.dataset_id, 1129 output_table_1.split('.')[1], 1130 bigquery_tools.parse_table_schema_from_json(self.BIG_QUERY_SCHEMA), 1131 None, 1132 None) 1133 self.bigquery_client.get_or_create_table( 1134 self.project, 1135 self.dataset_id, 1136 output_table_2.split('.')[1], 1137 bigquery_tools.parse_table_schema_from_json(self.BIG_QUERY_SCHEMA_2), 1138 None, 1139 None) 1140 1141 pipeline_verifiers = [ 1142 BigqueryFullResultMatcher( 1143 project=self.project, 1144 query="SELECT name, language FROM %s" % output_table_1, 1145 data=[]), 1146 BigqueryFullResultMatcher( 1147 project=self.project, 1148 query="SELECT name, foundation FROM %s" % output_table_2, 1149 data=[]) 1150 ] 1151 1152 args = self.test_pipeline.get_full_options_as_args() 1153 1154 with self.assertRaises(Exception): 1155 # The pipeline below fails because neither a schema nor SCHEMA_AUTODETECT 1156 # are specified. 1157 with beam.Pipeline(argv=args) as p: 1158 input = p | beam.Create(_ELEMENTS) 1159 input2 = p | "Broken record" >> beam.Create(['language_broken_record']) 1160 1161 input = (input, input2) | beam.Flatten() 1162 1163 _ = ( 1164 input | "WriteWithMultipleDests" >> bigquery.WriteToBigQuery( 1165 table=lambda x: 1166 (output_table_1 if 'language' in x else output_table_2), 1167 create_disposition=( 1168 beam.io.BigQueryDisposition.CREATE_IF_NEEDED), 1169 write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND, 1170 temp_file_format=bigquery_tools.FileFormat.JSON)) 1171 1172 hamcrest_assert(p, all_of(*pipeline_verifiers)) 1173 1174 def tearDown(self): 1175 request = bigquery_api.BigqueryDatasetsDeleteRequest( 1176 projectId=self.project, datasetId=self.dataset_id, deleteContents=True) 1177 try: 1178 _LOGGER.info( 1179 "Deleting dataset %s in project %s", self.dataset_id, self.project) 1180 self.bigquery_client.client.datasets.Delete(request) 1181 except HttpError: 1182 _LOGGER.debug( 1183 'Failed to clean up dataset %s in project %s', 1184 self.dataset_id, 1185 self.project) 1186 1187 1188 if __name__ == '__main__': 1189 logging.getLogger().setLevel(logging.INFO) 1190 unittest.main()