github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/gcp/bigtableio_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Unit tests for BigTable service.""" 19 20 # pytype: skip-file 21 import datetime 22 import string 23 import unittest 24 import uuid 25 from random import choice 26 27 from mock import MagicMock 28 from mock import patch 29 30 from apache_beam.internal.metrics.metric import ServiceCallMetric 31 from apache_beam.io.gcp import bigtableio 32 from apache_beam.io.gcp import resource_identifiers 33 from apache_beam.metrics import monitoring_infos 34 from apache_beam.metrics.execution import MetricsEnvironment 35 36 # Protect against environments where bigtable library is not available. 37 try: 38 from google.cloud.bigtable import client, row 39 from google.cloud.bigtable.instance import Instance 40 from google.cloud.bigtable.table import Table 41 from google.rpc.code_pb2 import OK, ALREADY_EXISTS 42 from google.rpc.status_pb2 import Status 43 except ImportError as e: 44 client = None 45 46 47 @unittest.skipIf(client is None, 'Bigtable dependencies are not installed') 48 class TestWriteBigTable(unittest.TestCase): 49 TABLE_PREFIX = "python-test" 50 _PROJECT_ID = TABLE_PREFIX + "-" + str(uuid.uuid4())[:8] 51 _INSTANCE_ID = TABLE_PREFIX + "-" + str(uuid.uuid4())[:8] 52 _TABLE_ID = TABLE_PREFIX + "-" + str(uuid.uuid4())[:8] 53 54 def setUp(self): 55 client = MagicMock() 56 instance = Instance(self._INSTANCE_ID, client) 57 self.table = Table(self._TABLE_ID, instance) 58 59 def test_write_metrics(self): 60 MetricsEnvironment.process_wide_container().reset() 61 write_fn = bigtableio._BigTableWriteFn( 62 self._PROJECT_ID, self._INSTANCE_ID, self._TABLE_ID) 63 write_fn.table = self.table 64 write_fn.start_bundle() 65 number_of_rows = 2 66 error = Status() 67 error.message = 'Entity already exists.' 68 error.code = ALREADY_EXISTS 69 success = Status() 70 success.message = 'Success' 71 success.code = OK 72 rows_response = [error, success] * number_of_rows 73 with patch.object(Table, 'mutate_rows', return_value=rows_response): 74 direct_rows = [self.generate_row(i) for i in range(number_of_rows * 2)] 75 for direct_row in direct_rows: 76 write_fn.process(direct_row) 77 try: 78 write_fn.finish_bundle() 79 except: # pylint: disable=bare-except 80 # Currently we fail the bundle when there are any failures. 81 # TODO(https://github.com/apache/beam/issues/21396): remove after 82 # bigtableio can selectively retry. 83 pass 84 self.verify_write_call_metric( 85 self._PROJECT_ID, 86 self._INSTANCE_ID, 87 self._TABLE_ID, 88 ServiceCallMetric.bigtable_error_code_to_grpc_status_string( 89 ALREADY_EXISTS), 90 2) 91 self.verify_write_call_metric( 92 self._PROJECT_ID, 93 self._INSTANCE_ID, 94 self._TABLE_ID, 95 ServiceCallMetric.bigtable_error_code_to_grpc_status_string(OK), 96 2) 97 98 def generate_row(self, index=0): 99 rand = choice(string.ascii_letters + string.digits) 100 value = ''.join(rand for i in range(100)) 101 column_family_id = 'cf1' 102 key = "beam_key%s" % ('{0:07}'.format(index)) 103 direct_row = row.DirectRow(row_key=key) 104 for column_id in range(10): 105 direct_row.set_cell( 106 column_family_id, ('field%s' % column_id).encode('utf-8'), 107 value, 108 datetime.datetime.now()) 109 return direct_row 110 111 def verify_write_call_metric( 112 self, project_id, instance_id, table_id, status, count): 113 """Check if a metric was recorded for the Datastore IO write API call.""" 114 process_wide_monitoring_infos = list( 115 MetricsEnvironment.process_wide_container(). 116 to_runner_api_monitoring_infos(None).values()) 117 resource = resource_identifiers.BigtableTable( 118 project_id, instance_id, table_id) 119 labels = { 120 monitoring_infos.SERVICE_LABEL: 'BigTable', 121 monitoring_infos.METHOD_LABEL: 'google.bigtable.v2.MutateRows', 122 monitoring_infos.RESOURCE_LABEL: resource, 123 monitoring_infos.BIGTABLE_PROJECT_ID_LABEL: project_id, 124 monitoring_infos.INSTANCE_ID_LABEL: instance_id, 125 monitoring_infos.TABLE_ID_LABEL: table_id, 126 monitoring_infos.STATUS_LABEL: status 127 } 128 expected_mi = monitoring_infos.int64_counter( 129 monitoring_infos.API_REQUEST_COUNT_URN, count, labels=labels) 130 expected_mi.ClearField("start_time") 131 132 found = False 133 for actual_mi in process_wide_monitoring_infos: 134 actual_mi.ClearField("start_time") 135 if expected_mi == actual_mi: 136 found = True 137 break 138 self.assertTrue( 139 found, "Did not find write call metric with status: %s" % status) 140 141 142 if __name__ == '__main__': 143 unittest.main()