github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/external/xlang_kafkaio_perf_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 import logging 19 import sys 20 import typing 21 22 import apache_beam as beam 23 from apache_beam.io import iobase 24 from apache_beam.io import kafka 25 from apache_beam.testing.load_tests.load_test import LoadTest 26 from apache_beam.testing.load_tests.load_test import LoadTestOptions 27 from apache_beam.testing.load_tests.load_test_metrics_utils import CountMessages 28 from apache_beam.testing.load_tests.load_test_metrics_utils import MeasureTime 29 from apache_beam.testing.synthetic_pipeline import SyntheticSource 30 from apache_beam.testing.test_pipeline import TestPipeline 31 from apache_beam.transforms.util import Reshuffle 32 33 WRITE_NAMESPACE = 'write' 34 READ_NAMESPACE = 'read' 35 36 _LOGGER = logging.getLogger(__name__) 37 38 39 class KafkaIOTestOptions(LoadTestOptions): 40 @classmethod 41 def _add_argparse_args(cls, parser): 42 parser.add_argument( 43 '--test_class', required=True, help='Test class to run.') 44 45 parser.add_argument('--kafka_topic', required=True, help='Kafka topic.') 46 47 parser.add_argument( 48 '--bootstrap_servers', help='URL TO Kafka Bootstrap service.') 49 50 parser.add_argument( 51 '--read_timeout', 52 type=int, 53 required=True, 54 help='Time to wait for the events to be processed by the read pipeline' 55 ' (in seconds)') 56 57 58 class KafkaIOPerfTest: 59 """Performance test for cross-language Kafka IO pipeline.""" 60 def run(self): 61 write_test = _KafkaIOBatchWritePerfTest() 62 read_test = _KafkaIOSDFReadPerfTest() 63 write_test.run() 64 read_test.run() 65 66 67 class _KafkaIOBatchWritePerfTest(LoadTest): 68 def __init__(self): 69 super().__init__(WRITE_NAMESPACE) 70 self.test_options = self.pipeline.get_pipeline_options().view_as( 71 KafkaIOTestOptions) 72 self.kafka_topic = self.test_options.kafka_topic 73 # otherwise see 'ValueError: Unexpected DoFn type: beam:dofn:javasdk:0.1' 74 self.pipeline.not_use_test_runner_api = True 75 76 def test(self): 77 _ = ( 78 self.pipeline 79 | 'Generate records' >> iobase.Read( 80 SyntheticSource(self.parse_synthetic_source_options())) \ 81 .with_output_types(typing.Tuple[bytes, bytes]) 82 | 'Count records' >> beam.ParDo(CountMessages(self.metrics_namespace)) 83 | 'Avoid Fusion' >> Reshuffle() 84 | 'Measure time' >> beam.ParDo(MeasureTime(self.metrics_namespace)) 85 | 'WriteToKafka' >> kafka.WriteToKafka( 86 producer_config={ 87 'bootstrap.servers': self.test_options.bootstrap_servers 88 }, 89 topic=self.kafka_topic)) 90 91 def cleanup(self): 92 pass 93 94 95 class _KafkaIOSDFReadPerfTest(LoadTest): 96 def __init__(self): 97 super().__init__(READ_NAMESPACE) 98 self.test_options = self.pipeline.get_pipeline_options().view_as( 99 KafkaIOTestOptions) 100 self.timeout_ms = self.test_options.read_timeout * 1000 101 self.kafka_topic = self.test_options.kafka_topic 102 # otherwise see 'ValueError: Unexpected DoFn type: beam:dofn:javasdk:0.1' 103 self.pipeline.not_use_test_runner_api = True 104 105 def test(self): 106 _ = ( 107 self.pipeline 108 | 'ReadFromKafka' >> kafka.ReadFromKafka( 109 consumer_config={ 110 'bootstrap.servers': self.test_options.bootstrap_servers, 111 'auto.offset.reset': 'earliest' 112 }, 113 topics=[self.kafka_topic]) 114 | 'Count records' >> beam.ParDo(CountMessages(self.metrics_namespace)) 115 | 'Measure time' >> beam.ParDo(MeasureTime(self.metrics_namespace))) 116 117 def cleanup(self): 118 # assert number of records after test pipeline run 119 total_messages = self._metrics_monitor.get_counter_metric( 120 self.result, CountMessages.LABEL) 121 assert total_messages == self.input_options['num_records'] 122 123 124 if __name__ == '__main__': 125 logging.basicConfig(level=logging.INFO) 126 127 test_options = TestPipeline().get_pipeline_options().view_as( 128 KafkaIOTestOptions) 129 supported_test_classes = list( 130 filter( 131 lambda s: s.endswith('PerfTest') and not s.startswith('_'), 132 dir(sys.modules[__name__]))) 133 134 if test_options.test_class not in supported_test_classes: 135 raise RuntimeError( 136 f'Test {test_options.test_class} not found. ' 137 'Supported tests are {supported_test_classes}') 138 139 getattr(sys.modules[__name__], test_options.test_class)().run()