github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/load_tests/load_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 # pytype: skip-file 18 19 import argparse 20 import json 21 import logging 22 import os 23 import sys 24 25 from apache_beam.metrics import MetricsFilter 26 from apache_beam.options.pipeline_options import GoogleCloudOptions 27 from apache_beam.options.pipeline_options import PipelineOptions 28 from apache_beam.runners.runner import PipelineState 29 from apache_beam.testing.load_tests.load_test_metrics_utils import InfluxDBMetricsPublisherOptions 30 from apache_beam.testing.load_tests.load_test_metrics_utils import MetricsReader 31 from apache_beam.testing.test_pipeline import TestPipeline 32 33 34 class LoadTestOptions(PipelineOptions): 35 @classmethod 36 def _add_argparse_args(cls, parser): 37 parser.add_argument( 38 '--publish_to_big_query', 39 type=cls._str_to_boolean, 40 help='Publishes pipeline metrics to BigQuery table.') 41 parser.add_argument( 42 '--metrics_dataset', 43 help='A BigQuery dataset where metrics should be' 44 'written.') 45 parser.add_argument( 46 '--metrics_table', 47 help='A BigQuery table where metrics should be ' 48 'written.') 49 parser.add_argument( 50 '--influx_measurement', 51 help='An InfluxDB measurement where metrics should be published to. ' 52 'Measurement can be thought of as a SQL table. If empty, reporting to ' 53 'InfluxDB will be disabled.') 54 parser.add_argument( 55 '--influx_db_name', 56 help='InfluxDB database name. If empty, reporting to InfluxDB will be ' 57 'disabled.') 58 parser.add_argument( 59 '--influx_hostname', 60 help='Hostname to connect to InfluxDB. Defaults to ' 61 '"http://localhost:8086".', 62 default='http://localhost:8086') 63 parser.add_argument( 64 '--input_options', 65 type=json.loads, 66 help='Input specification of SyntheticSource.') 67 parser.add_argument( 68 '--timeout_ms', 69 type=int, 70 default=0, 71 help='Waiting time for the completion of the pipeline in milliseconds.' 72 'Defaults to waiting forever.') 73 74 @staticmethod 75 def _str_to_boolean(value): 76 try: 77 return bool(['false', 'true'].index(value.lower())) 78 except ValueError: 79 raise argparse.ArgumentTypeError( 80 '"true" or "false" expected, got "{}" ' 81 'instead.'.format(value)) 82 83 84 class LoadTest(object): 85 """Base class for all integration and performance tests which export 86 metrics to external databases: BigQuery or/and InfluxDB. 87 88 Refer to :class:`~apache_beam.testing.load_tests.LoadTestOptions` for more 89 information on the required pipeline options. 90 91 If using InfluxDB with Basic HTTP authentication enabled, provide the 92 following environment options: `INFLUXDB_USER` and `INFLUXDB_USER_PASSWORD`. 93 """ 94 def __init__(self, metrics_namespace=None): 95 # Be sure to set blocking to false for timeout_ms to work properly 96 self.pipeline = TestPipeline(is_integration_test=True, blocking=False) 97 assert not self.pipeline.blocking 98 99 options = self.pipeline.get_pipeline_options().view_as(LoadTestOptions) 100 self.timeout_ms = options.timeout_ms 101 self.input_options = options.input_options 102 self.extra_metrics = {} 103 104 if metrics_namespace: 105 self.metrics_namespace = metrics_namespace 106 else: 107 self.metrics_namespace = options.metrics_table \ 108 if options.metrics_table else 'default' 109 110 publish_to_bq = options.publish_to_big_query 111 if publish_to_bq is None: 112 logging.info( 113 'Missing --publish_to_big_query option. Metrics will not ' 114 'be published to BigQuery.') 115 if options.input_options is None: 116 logging.error('--input_options argument is required.') 117 sys.exit(1) 118 119 gcloud_options = self.pipeline.get_pipeline_options().view_as( 120 GoogleCloudOptions) 121 self.project_id = gcloud_options.project 122 123 self._metrics_monitor = MetricsReader( 124 publish_to_bq=publish_to_bq, 125 project_name=self.project_id, 126 bq_table=options.metrics_table, 127 bq_dataset=options.metrics_dataset, 128 namespace=self.metrics_namespace, 129 influxdb_options=InfluxDBMetricsPublisherOptions( 130 options.influx_measurement, 131 options.influx_db_name, 132 options.influx_hostname, 133 os.getenv('INFLUXDB_USER'), 134 os.getenv('INFLUXDB_USER_PASSWORD')), 135 # Apply filter to prevent system metrics from being published 136 filters=MetricsFilter().with_namespace(self.metrics_namespace)) 137 138 def test(self): 139 """An abstract method where the pipeline definition should be put.""" 140 pass 141 142 def cleanup(self): 143 """An abstract method that executes after the test method.""" 144 pass 145 146 def run(self): 147 try: 148 self.test() 149 if not hasattr(self, 'result'): 150 self.result = self.pipeline.run() 151 # Defaults to waiting forever, unless timeout_ms has been set 152 state = self.result.wait_until_finish(duration=self.timeout_ms) 153 assert state != PipelineState.FAILED 154 self._metrics_monitor.publish_metrics(self.result, self.extra_metrics) 155 finally: 156 self.cleanup() 157 158 def parse_synthetic_source_options(self, options=None): 159 if not options: 160 options = self.input_options 161 return { 162 'numRecords': options.get('num_records'), 163 'keySizeBytes': options.get('key_size'), 164 'valueSizeBytes': options.get('value_size'), 165 'hotKeyFraction': options.get('hot_key_fraction', 0), 166 'numHotKeys': options.get('num_hot_keys', 0), 167 'bundleSizeDistribution': { 168 'type': options.get('bundle_size_distribution_type', 'const'), 169 'param': options.get('bundle_size_distribution_param', 0) 170 }, 171 'forceNumInitialBundles': options.get('force_initial_num_bundles', 0) 172 } 173 174 def get_option_or_default(self, opt_name, default=0): 175 """Returns a testing option or a default value if it was not provided. 176 177 The returned value is cast to the type of the default value. 178 """ 179 option = self.pipeline.get_option( 180 opt_name, bool_option=type(default) == bool) 181 if option is None: 182 return default 183 try: 184 return type(default)(option) 185 except: 186 raise