github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/external/xlang_kafkaio_perf_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  import logging
    19  import sys
    20  import typing
    21  
    22  import apache_beam as beam
    23  from apache_beam.io import iobase
    24  from apache_beam.io import kafka
    25  from apache_beam.testing.load_tests.load_test import LoadTest
    26  from apache_beam.testing.load_tests.load_test import LoadTestOptions
    27  from apache_beam.testing.load_tests.load_test_metrics_utils import CountMessages
    28  from apache_beam.testing.load_tests.load_test_metrics_utils import MeasureTime
    29  from apache_beam.testing.synthetic_pipeline import SyntheticSource
    30  from apache_beam.testing.test_pipeline import TestPipeline
    31  from apache_beam.transforms.util import Reshuffle
    32  
    33  WRITE_NAMESPACE = 'write'
    34  READ_NAMESPACE = 'read'
    35  
    36  _LOGGER = logging.getLogger(__name__)
    37  
    38  
    39  class KafkaIOTestOptions(LoadTestOptions):
    40    @classmethod
    41    def _add_argparse_args(cls, parser):
    42      parser.add_argument(
    43          '--test_class', required=True, help='Test class to run.')
    44  
    45      parser.add_argument('--kafka_topic', required=True, help='Kafka topic.')
    46  
    47      parser.add_argument(
    48          '--bootstrap_servers', help='URL TO Kafka Bootstrap service.')
    49  
    50      parser.add_argument(
    51          '--read_timeout',
    52          type=int,
    53          required=True,
    54          help='Time to wait for the events to be processed by the read pipeline'
    55          ' (in seconds)')
    56  
    57  
    58  class KafkaIOPerfTest:
    59    """Performance test for cross-language Kafka IO pipeline."""
    60    def run(self):
    61      write_test = _KafkaIOBatchWritePerfTest()
    62      read_test = _KafkaIOSDFReadPerfTest()
    63      write_test.run()
    64      read_test.run()
    65  
    66  
    67  class _KafkaIOBatchWritePerfTest(LoadTest):
    68    def __init__(self):
    69      super().__init__(WRITE_NAMESPACE)
    70      self.test_options = self.pipeline.get_pipeline_options().view_as(
    71          KafkaIOTestOptions)
    72      self.kafka_topic = self.test_options.kafka_topic
    73      # otherwise see 'ValueError: Unexpected DoFn type: beam:dofn:javasdk:0.1'
    74      self.pipeline.not_use_test_runner_api = True
    75  
    76    def test(self):
    77      _ = (
    78          self.pipeline
    79          | 'Generate records' >> iobase.Read(
    80              SyntheticSource(self.parse_synthetic_source_options())) \
    81              .with_output_types(typing.Tuple[bytes, bytes])
    82          | 'Count records' >> beam.ParDo(CountMessages(self.metrics_namespace))
    83          | 'Avoid Fusion' >> Reshuffle()
    84          | 'Measure time' >> beam.ParDo(MeasureTime(self.metrics_namespace))
    85          | 'WriteToKafka' >> kafka.WriteToKafka(
    86              producer_config={
    87                  'bootstrap.servers': self.test_options.bootstrap_servers
    88              },
    89              topic=self.kafka_topic))
    90  
    91    def cleanup(self):
    92      pass
    93  
    94  
    95  class _KafkaIOSDFReadPerfTest(LoadTest):
    96    def __init__(self):
    97      super().__init__(READ_NAMESPACE)
    98      self.test_options = self.pipeline.get_pipeline_options().view_as(
    99          KafkaIOTestOptions)
   100      self.timeout_ms = self.test_options.read_timeout * 1000
   101      self.kafka_topic = self.test_options.kafka_topic
   102      # otherwise see 'ValueError: Unexpected DoFn type: beam:dofn:javasdk:0.1'
   103      self.pipeline.not_use_test_runner_api = True
   104  
   105    def test(self):
   106      _ = (
   107          self.pipeline
   108          | 'ReadFromKafka' >> kafka.ReadFromKafka(
   109              consumer_config={
   110                  'bootstrap.servers': self.test_options.bootstrap_servers,
   111                  'auto.offset.reset': 'earliest'
   112              },
   113              topics=[self.kafka_topic])
   114          | 'Count records' >> beam.ParDo(CountMessages(self.metrics_namespace))
   115          | 'Measure time' >> beam.ParDo(MeasureTime(self.metrics_namespace)))
   116  
   117    def cleanup(self):
   118      # assert number of records after test pipeline run
   119      total_messages = self._metrics_monitor.get_counter_metric(
   120          self.result, CountMessages.LABEL)
   121      assert total_messages == self.input_options['num_records']
   122  
   123  
   124  if __name__ == '__main__':
   125    logging.basicConfig(level=logging.INFO)
   126  
   127    test_options = TestPipeline().get_pipeline_options().view_as(
   128        KafkaIOTestOptions)
   129    supported_test_classes = list(
   130        filter(
   131            lambda s: s.endswith('PerfTest') and not s.startswith('_'),
   132            dir(sys.modules[__name__])))
   133  
   134    if test_options.test_class not in supported_test_classes:
   135      raise RuntimeError(
   136          f'Test {test_options.test_class} not found. '
   137          'Supported tests are {supported_test_classes}')
   138  
   139    getattr(sys.modules[__name__], test_options.test_class)().run()