github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/portability/flink_runner.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """A runner for executing portable pipelines on Flink.""" 19 20 # pytype: skip-file 21 22 import logging 23 import os 24 import re 25 import urllib 26 27 from apache_beam.options import pipeline_options 28 from apache_beam.runners.portability import flink_uber_jar_job_server 29 from apache_beam.runners.portability import job_server 30 from apache_beam.runners.portability import portable_runner 31 32 MAGIC_HOST_NAMES = ['[local]', '[auto]'] 33 34 _LOGGER = logging.getLogger(__name__) 35 36 37 class FlinkRunner(portable_runner.PortableRunner): 38 def run_pipeline(self, pipeline, options): 39 portable_options = options.view_as(pipeline_options.PortableOptions) 40 flink_options = options.view_as(pipeline_options.FlinkRunnerOptions) 41 if (flink_options.flink_master in MAGIC_HOST_NAMES and 42 not portable_options.environment_type and 43 not portable_options.output_executable_path): 44 portable_options.environment_type = 'LOOPBACK' 45 return super().run_pipeline(pipeline, options) 46 47 def default_job_server(self, options): 48 flink_options = options.view_as(pipeline_options.FlinkRunnerOptions) 49 flink_master = self.add_http_scheme(flink_options.flink_master) 50 flink_options.flink_master = flink_master 51 if (flink_options.flink_submit_uber_jar and 52 flink_master not in MAGIC_HOST_NAMES): 53 # This has to be changed [auto], otherwise we will attempt to submit a 54 # the pipeline remotely on the Flink JobMaster which will _fail_. 55 # DO NOT CHANGE the following line, unless you have tested this. 56 flink_options.flink_master = '[auto]' 57 return flink_uber_jar_job_server.FlinkUberJarJobServer( 58 flink_master, options) 59 else: 60 return job_server.StopOnExitJobServer(FlinkJarJobServer(options)) 61 62 def create_job_service_handle(self, job_service, options): 63 return portable_runner.JobServiceHandle( 64 job_service, 65 options, 66 retain_unknown_options=options.view_as( 67 pipeline_options.FlinkRunnerOptions).flink_submit_uber_jar) 68 69 @staticmethod 70 def add_http_scheme(flink_master): 71 """Adds a http protocol scheme if none provided.""" 72 flink_master = flink_master.strip() 73 if not flink_master in MAGIC_HOST_NAMES and \ 74 not re.search('^http[s]?://', flink_master): 75 _LOGGER.info( 76 'Adding HTTP protocol scheme to flink_master parameter: ' 77 'http://%s', 78 flink_master) 79 flink_master = 'http://' + flink_master 80 return flink_master 81 82 83 class FlinkJarJobServer(job_server.JavaJarJobServer): 84 def __init__(self, options): 85 super().__init__(options) 86 options = options.view_as(pipeline_options.FlinkRunnerOptions) 87 self._jar = options.flink_job_server_jar 88 self._master_url = options.flink_master 89 self._flink_version = options.flink_version 90 91 def path_to_jar(self): 92 if self._jar: 93 if not os.path.exists(self._jar): 94 url = urllib.parse.urlparse(self._jar) 95 if not url.scheme: 96 raise ValueError( 97 'Unable to parse jar URL "%s". If using a full URL, make sure ' 98 'the scheme is specified. If using a local file path, make sure ' 99 'the file exists; you may have to first build the job server ' 100 'using `./gradlew runners:flink:%s:job-server:shadowJar`.' % 101 (self._jar, self._flink_version)) 102 return self._jar 103 else: 104 return self.path_to_beam_jar( 105 ':runners:flink:%s:job-server:shadowJar' % self._flink_version) 106 107 def java_arguments( 108 self, job_port, artifact_port, expansion_port, artifacts_dir): 109 return [ 110 '--flink-master', 111 self._master_url, 112 '--artifacts-dir', 113 artifacts_dir, 114 '--job-port', 115 job_port, 116 '--artifact-port', 117 artifact_port, 118 '--expansion-port', 119 expansion_port 120 ]