github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/portability/flink_runner_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 # pytype: skip-file 18 19 import argparse 20 import logging 21 import shlex 22 import typing 23 import unittest 24 from os import linesep 25 from os import path 26 from os.path import exists 27 from shutil import rmtree 28 from tempfile import mkdtemp 29 30 import pytest 31 32 import apache_beam as beam 33 from apache_beam import Impulse 34 from apache_beam import Map 35 from apache_beam.io.external.generate_sequence import GenerateSequence 36 from apache_beam.io.kafka import ReadFromKafka 37 from apache_beam.io.kafka import WriteToKafka 38 from apache_beam.options.pipeline_options import DebugOptions 39 from apache_beam.options.pipeline_options import FlinkRunnerOptions 40 from apache_beam.options.pipeline_options import PortableOptions 41 from apache_beam.options.pipeline_options import StandardOptions 42 from apache_beam.runners.portability import job_server 43 from apache_beam.runners.portability import portable_runner 44 from apache_beam.runners.portability import portable_runner_test 45 from apache_beam.testing.util import assert_that 46 from apache_beam.testing.util import equal_to 47 from apache_beam.transforms.sql import SqlTransform 48 49 # Run as 50 # 51 # pytest flink_runner_test.py[::TestClass::test_case] \ 52 # --test-pipeline-options="--environment_type=LOOPBACK" 53 54 _LOGGER = logging.getLogger(__name__) 55 56 Row = typing.NamedTuple("Row", [("col1", int), ("col2", str)]) 57 beam.coders.registry.register_coder(Row, beam.coders.RowCoder) 58 59 60 class FlinkRunnerTest(portable_runner_test.PortableRunnerTest): 61 _use_grpc = True 62 _use_subprocesses = True 63 64 conf_dir = None 65 expansion_port = None 66 flink_job_server_jar = None 67 68 def __init__(self, *args, **kwargs): 69 super().__init__(*args, **kwargs) 70 self.environment_type = None 71 self.environment_config = None 72 self.enable_commit = False 73 74 def setUp(self): 75 self.enable_commit = False 76 77 @pytest.fixture(autouse=True) 78 def parse_options(self, request): 79 if not request.config.option.test_pipeline_options: 80 raise unittest.SkipTest( 81 'Skipping because --test-pipeline-options is not specified.') 82 test_pipeline_options = request.config.option.test_pipeline_options 83 parser = argparse.ArgumentParser(add_help=True) 84 parser.add_argument( 85 '--flink_job_server_jar', 86 help='Job server jar to submit jobs.', 87 action='store') 88 parser.add_argument( 89 '--environment_type', 90 default='LOOPBACK', 91 choices=['DOCKER', 'PROCESS', 'LOOPBACK'], 92 help='Set the environment type for running user code. DOCKER runs ' 93 'user code in a container. PROCESS runs user code in ' 94 'automatically started processes. LOOPBACK runs user code on ' 95 'the same process that originally submitted the job.') 96 parser.add_argument( 97 '--environment_option', 98 '--environment_options', 99 dest='environment_options', 100 action='append', 101 default=None, 102 help=( 103 'Environment configuration for running the user code. ' 104 'Recognized options depend on --environment_type.\n ' 105 'For DOCKER: docker_container_image (optional)\n ' 106 'For PROCESS: process_command (required), process_variables ' 107 '(optional, comma-separated)\n ' 108 'For EXTERNAL: external_service_address (required)')) 109 known_args, unknown_args = parser.parse_known_args( 110 shlex.split(test_pipeline_options)) 111 if unknown_args: 112 _LOGGER.warning('Discarding unrecognized arguments %s' % unknown_args) 113 self.set_flink_job_server_jar( 114 known_args.flink_job_server_jar or 115 job_server.JavaJarJobServer.path_to_beam_jar(( 116 ':runners:flink:%s:job-server:shadowJar' % 117 FlinkRunnerOptions.PUBLISHED_FLINK_VERSIONS[-1]))) 118 self.environment_type = known_args.environment_type 119 self.environment_options = known_args.environment_options 120 121 @classmethod 122 def tearDownClass(cls): 123 if cls.conf_dir and exists(cls.conf_dir): 124 _LOGGER.info("removing conf dir: %s" % cls.conf_dir) 125 rmtree(cls.conf_dir) 126 super().tearDownClass() 127 128 @classmethod 129 def _create_conf_dir(cls): 130 """Create (and save a static reference to) a "conf dir", used to provide 131 metrics configs and verify metrics output 132 133 It gets cleaned up when the suite is done executing""" 134 135 if hasattr(cls, 'conf_dir'): 136 cls.conf_dir = mkdtemp(prefix='flinktest-conf') 137 138 # path for a FileReporter to write metrics to 139 cls.test_metrics_path = path.join(cls.conf_dir, 'test-metrics.txt') 140 141 # path to write Flink configuration to 142 conf_path = path.join(cls.conf_dir, 'flink-conf.yaml') 143 file_reporter = 'org.apache.beam.runners.flink.metrics.FileReporter' 144 with open(conf_path, 'w') as f: 145 f.write( 146 linesep.join([ 147 'metrics.reporters: file', 148 'metrics.reporter.file.class: %s' % file_reporter, 149 'metrics.reporter.file.path: %s' % cls.test_metrics_path, 150 'metrics.scope.operator: <operator_name>', 151 ])) 152 153 @classmethod 154 def _subprocess_command(cls, job_port, expansion_port): 155 # will be cleaned up at the end of this method, and recreated and used by 156 # the job server 157 tmp_dir = mkdtemp(prefix='flinktest') 158 159 cls._create_conf_dir() 160 cls.expansion_port = expansion_port 161 162 try: 163 return [ 164 'java', 165 '-Dorg.slf4j.simpleLogger.defaultLogLevel=warn', 166 '-jar', 167 cls.flink_job_server_jar, 168 '--flink-master', 169 '[local]', 170 '--flink-conf-dir', 171 cls.conf_dir, 172 '--artifacts-dir', 173 tmp_dir, 174 '--job-port', 175 str(job_port), 176 '--artifact-port', 177 '0', 178 '--expansion-port', 179 str(expansion_port), 180 ] 181 finally: 182 rmtree(tmp_dir) 183 184 @classmethod 185 def get_runner(cls): 186 return portable_runner.PortableRunner() 187 188 @classmethod 189 def get_expansion_service(cls): 190 # TODO Move expansion address resides into PipelineOptions 191 return 'localhost:%s' % cls.expansion_port 192 193 @classmethod 194 def set_flink_job_server_jar(cls, flink_job_server_jar): 195 cls.flink_job_server_jar = flink_job_server_jar 196 197 def create_options(self): 198 options = super().create_options() 199 options.view_as(DebugOptions).experiments = ['beam_fn_api'] 200 options._all_options['parallelism'] = 2 201 options.view_as(PortableOptions).environment_type = self.environment_type 202 options.view_as( 203 PortableOptions).environment_options = self.environment_options 204 if self.enable_commit: 205 options.view_as(StandardOptions).streaming = True 206 options._all_options['checkpointing_interval'] = 3000 207 options._all_options['shutdown_sources_after_idle_ms'] = 60000 208 options._all_options['number_of_execution_retries'] = 1 209 210 return options 211 212 # Can't read host files from within docker, read a "local" file there. 213 def test_read(self): 214 print('name:', __name__) 215 with self.create_pipeline() as p: 216 lines = p | beam.io.ReadFromText('/etc/profile') 217 assert_that(lines, lambda lines: len(lines) > 0) 218 219 def test_no_subtransform_composite(self): 220 raise unittest.SkipTest("BEAM-4781") 221 222 def test_external_transform(self): 223 with self.create_pipeline() as p: 224 res = ( 225 p 226 | GenerateSequence( 227 start=1, stop=10, expansion_service=self.get_expansion_service())) 228 229 assert_that(res, equal_to([i for i in range(1, 10)])) 230 231 def test_expand_kafka_read(self): 232 # We expect to fail here because we do not have a Kafka cluster handy. 233 # Nevertheless, we check that the transform is expanded by the 234 # ExpansionService and that the pipeline fails during execution. 235 with self.assertRaises(Exception) as ctx: 236 self.enable_commit = True 237 with self.create_pipeline() as p: 238 # pylint: disable=expression-not-assigned 239 ( 240 p 241 | ReadFromKafka( 242 consumer_config={ 243 'bootstrap.servers': 'notvalid1:7777, notvalid2:3531', 244 'group.id': 'any_group' 245 }, 246 topics=['topic1', 'topic2'], 247 key_deserializer='org.apache.kafka.' 248 'common.serialization.' 249 'ByteArrayDeserializer', 250 value_deserializer='org.apache.kafka.' 251 'common.serialization.' 252 'LongDeserializer', 253 commit_offset_in_finalize=True, 254 timestamp_policy=ReadFromKafka.create_time_policy, 255 expansion_service=self.get_expansion_service())) 256 self.assertTrue( 257 'No resolvable bootstrap urls given in bootstrap.servers' in str( 258 ctx.exception), 259 'Expected to fail due to invalid bootstrap.servers, but ' 260 'failed due to:\n%s' % str(ctx.exception)) 261 262 def test_expand_kafka_write(self): 263 # We just test the expansion but do not execute. 264 # pylint: disable=expression-not-assigned 265 ( 266 self.create_pipeline() 267 | Impulse() 268 | Map(lambda input: (1, input)) 269 | WriteToKafka( 270 producer_config={ 271 'bootstrap.servers': 'localhost:9092, notvalid2:3531' 272 }, 273 topic='topic1', 274 key_serializer='org.apache.kafka.' 275 'common.serialization.' 276 'LongSerializer', 277 value_serializer='org.apache.kafka.' 278 'common.serialization.' 279 'ByteArraySerializer', 280 expansion_service=self.get_expansion_service())) 281 282 def test_sql(self): 283 with self.create_pipeline() as p: 284 output = ( 285 p 286 | 'Create' >> beam.Create([Row(x, str(x)) for x in range(5)]) 287 | 'Sql' >> SqlTransform( 288 """SELECT col1, col2 || '*' || col2 as col2, 289 power(col1, 2) as col3 290 FROM PCOLLECTION 291 """, 292 expansion_service=self.get_expansion_service())) 293 assert_that( 294 output, 295 equal_to([(x, '{x}*{x}'.format(x=x), x * x) for x in range(5)])) 296 297 def test_flattened_side_input(self): 298 # Blocked on support for transcoding 299 # https://jira.apache.org/jira/browse/BEAM-6523 300 super().test_flattened_side_input(with_transcoding=False) 301 302 def test_metrics(self): 303 super().test_metrics(check_gauge=False) 304 305 def test_sdf_with_watermark_tracking(self): 306 raise unittest.SkipTest("BEAM-2939") 307 308 def test_callbacks_with_exception(self): 309 raise unittest.SkipTest("https://github.com/apache/beam/issues/19526") 310 311 def test_register_finalizations(self): 312 raise unittest.SkipTest("https://github.com/apache/beam/issues/19526") 313 314 def test_custom_merging_window(self): 315 raise unittest.SkipTest("https://github.com/apache/beam/issues/20641") 316 317 # Inherits all other tests. 318 319 320 class FlinkRunnerTestOptimized(FlinkRunnerTest): 321 # TODO: Remove these tests after resolving 322 # https://github.com/apache/beam/issues/19422 and enabling 323 # PortableRunnerOptimized 324 def create_options(self): 325 options = super().create_options() 326 options.view_as(DebugOptions).experiments = [ 327 'pre_optimize=all' 328 ] + options.view_as(DebugOptions).experiments 329 return options 330 331 def test_external_transform(self): 332 raise unittest.SkipTest("https://github.com/apache/beam/issues/19461") 333 334 def test_expand_kafka_read(self): 335 raise unittest.SkipTest("https://github.com/apache/beam/issues/19461") 336 337 def test_expand_kafka_write(self): 338 raise unittest.SkipTest("https://github.com/apache/beam/issues/19461") 339 340 def test_sql(self): 341 raise unittest.SkipTest("https://github.com/apache/beam/issues/19461") 342 343 def test_pack_combiners(self): 344 # Stages produced by translations.pack_combiners are fused 345 # by translations.greedily_fuse, which prevent the stages 346 # from being detecting using counters by the test. 347 self._test_pack_combiners(assert_using_counter_names=False) 348 349 350 class FlinkRunnerTestStreaming(FlinkRunnerTest): 351 def create_options(self): 352 options = super().create_options() 353 options.view_as(StandardOptions).streaming = True 354 return options 355 356 def test_callbacks_with_exception(self): 357 self.enable_commit = True 358 super().test_callbacks_with_exception() 359 360 def test_register_finalizations(self): 361 self.enable_commit = True 362 super().test_register_finalizations() 363 364 365 if __name__ == '__main__': 366 # Run the tests. 367 logging.getLogger().setLevel(logging.INFO) 368 unittest.main()