github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/load_tests/load_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # pytype: skip-file
    18  
    19  import argparse
    20  import json
    21  import logging
    22  import os
    23  import sys
    24  
    25  from apache_beam.metrics import MetricsFilter
    26  from apache_beam.options.pipeline_options import GoogleCloudOptions
    27  from apache_beam.options.pipeline_options import PipelineOptions
    28  from apache_beam.runners.runner import PipelineState
    29  from apache_beam.testing.load_tests.load_test_metrics_utils import InfluxDBMetricsPublisherOptions
    30  from apache_beam.testing.load_tests.load_test_metrics_utils import MetricsReader
    31  from apache_beam.testing.test_pipeline import TestPipeline
    32  
    33  
    34  class LoadTestOptions(PipelineOptions):
    35    @classmethod
    36    def _add_argparse_args(cls, parser):
    37      parser.add_argument(
    38          '--publish_to_big_query',
    39          type=cls._str_to_boolean,
    40          help='Publishes pipeline metrics to BigQuery table.')
    41      parser.add_argument(
    42          '--metrics_dataset',
    43          help='A BigQuery dataset where metrics should be'
    44          'written.')
    45      parser.add_argument(
    46          '--metrics_table',
    47          help='A BigQuery table where metrics should be '
    48          'written.')
    49      parser.add_argument(
    50          '--influx_measurement',
    51          help='An InfluxDB measurement where metrics should be published to. '
    52          'Measurement can be thought of as a SQL table. If empty, reporting to '
    53          'InfluxDB will be disabled.')
    54      parser.add_argument(
    55          '--influx_db_name',
    56          help='InfluxDB database name. If empty, reporting to InfluxDB will be '
    57          'disabled.')
    58      parser.add_argument(
    59          '--influx_hostname',
    60          help='Hostname to connect to InfluxDB. Defaults to '
    61          '"http://localhost:8086".',
    62          default='http://localhost:8086')
    63      parser.add_argument(
    64          '--input_options',
    65          type=json.loads,
    66          help='Input specification of SyntheticSource.')
    67      parser.add_argument(
    68          '--timeout_ms',
    69          type=int,
    70          default=0,
    71          help='Waiting time for the completion of the pipeline in milliseconds.'
    72          'Defaults to waiting forever.')
    73  
    74    @staticmethod
    75    def _str_to_boolean(value):
    76      try:
    77        return bool(['false', 'true'].index(value.lower()))
    78      except ValueError:
    79        raise argparse.ArgumentTypeError(
    80            '"true" or "false" expected, got "{}" '
    81            'instead.'.format(value))
    82  
    83  
    84  class LoadTest(object):
    85    """Base class for all integration and performance tests which export
    86    metrics to external databases: BigQuery or/and InfluxDB.
    87  
    88    Refer to :class:`~apache_beam.testing.load_tests.LoadTestOptions` for more
    89    information on the required pipeline options.
    90  
    91    If using InfluxDB with Basic HTTP authentication enabled, provide the
    92    following environment options: `INFLUXDB_USER` and `INFLUXDB_USER_PASSWORD`.
    93    """
    94    def __init__(self, metrics_namespace=None):
    95      # Be sure to set blocking to false for timeout_ms to work properly
    96      self.pipeline = TestPipeline(is_integration_test=True, blocking=False)
    97      assert not self.pipeline.blocking
    98  
    99      options = self.pipeline.get_pipeline_options().view_as(LoadTestOptions)
   100      self.timeout_ms = options.timeout_ms
   101      self.input_options = options.input_options
   102      self.extra_metrics = {}
   103  
   104      if metrics_namespace:
   105        self.metrics_namespace = metrics_namespace
   106      else:
   107        self.metrics_namespace = options.metrics_table \
   108          if options.metrics_table else 'default'
   109  
   110      publish_to_bq = options.publish_to_big_query
   111      if publish_to_bq is None:
   112        logging.info(
   113            'Missing --publish_to_big_query option. Metrics will not '
   114            'be published to BigQuery.')
   115      if options.input_options is None:
   116        logging.error('--input_options argument is required.')
   117        sys.exit(1)
   118  
   119      gcloud_options = self.pipeline.get_pipeline_options().view_as(
   120          GoogleCloudOptions)
   121      self.project_id = gcloud_options.project
   122  
   123      self._metrics_monitor = MetricsReader(
   124          publish_to_bq=publish_to_bq,
   125          project_name=self.project_id,
   126          bq_table=options.metrics_table,
   127          bq_dataset=options.metrics_dataset,
   128          namespace=self.metrics_namespace,
   129          influxdb_options=InfluxDBMetricsPublisherOptions(
   130              options.influx_measurement,
   131              options.influx_db_name,
   132              options.influx_hostname,
   133              os.getenv('INFLUXDB_USER'),
   134              os.getenv('INFLUXDB_USER_PASSWORD')),
   135          # Apply filter to prevent system metrics from being published
   136          filters=MetricsFilter().with_namespace(self.metrics_namespace))
   137  
   138    def test(self):
   139      """An abstract method where the pipeline definition should be put."""
   140      pass
   141  
   142    def cleanup(self):
   143      """An abstract method that executes after the test method."""
   144      pass
   145  
   146    def run(self):
   147      try:
   148        self.test()
   149        if not hasattr(self, 'result'):
   150          self.result = self.pipeline.run()
   151          # Defaults to waiting forever, unless timeout_ms has been set
   152          state = self.result.wait_until_finish(duration=self.timeout_ms)
   153          assert state != PipelineState.FAILED
   154        self._metrics_monitor.publish_metrics(self.result, self.extra_metrics)
   155      finally:
   156        self.cleanup()
   157  
   158    def parse_synthetic_source_options(self, options=None):
   159      if not options:
   160        options = self.input_options
   161      return {
   162          'numRecords': options.get('num_records'),
   163          'keySizeBytes': options.get('key_size'),
   164          'valueSizeBytes': options.get('value_size'),
   165          'hotKeyFraction': options.get('hot_key_fraction', 0),
   166          'numHotKeys': options.get('num_hot_keys', 0),
   167          'bundleSizeDistribution': {
   168              'type': options.get('bundle_size_distribution_type', 'const'),
   169              'param': options.get('bundle_size_distribution_param', 0)
   170          },
   171          'forceNumInitialBundles': options.get('force_initial_num_bundles', 0)
   172      }
   173  
   174    def get_option_or_default(self, opt_name, default=0):
   175      """Returns a testing option or a default value if it was not provided.
   176  
   177      The returned value is cast to the type of the default value.
   178      """
   179      option = self.pipeline.get_option(
   180          opt_name, bool_option=type(default) == bool)
   181      if option is None:
   182        return default
   183      try:
   184        return type(default)(option)
   185      except:
   186        raise