github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/portability/spark_runner_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 # pytype: skip-file 18 19 import argparse 20 import logging 21 import shlex 22 import unittest 23 from shutil import rmtree 24 from tempfile import mkdtemp 25 26 import pytest 27 28 from apache_beam.options.pipeline_options import PortableOptions 29 from apache_beam.runners.portability import job_server 30 from apache_beam.runners.portability import portable_runner 31 from apache_beam.runners.portability import portable_runner_test 32 33 # Run as 34 # 35 # pytest spark_runner_test.py[::TestClass::test_case] \ 36 # --test-pipeline-options="--environment_type=LOOPBACK" 37 38 _LOGGER = logging.getLogger(__name__) 39 40 41 class SparkRunnerTest(portable_runner_test.PortableRunnerTest): 42 _use_grpc = True 43 _use_subprocesses = True 44 45 expansion_port = None 46 spark_job_server_jar = None 47 48 @pytest.fixture(autouse=True) 49 def parse_options(self, request): 50 if not request.config.option.test_pipeline_options: 51 raise unittest.SkipTest( 52 'Skipping because --test-pipeline-options is not specified.') 53 test_pipeline_options = request.config.option.test_pipeline_options 54 parser = argparse.ArgumentParser(add_help=True) 55 parser.add_argument( 56 '--spark_job_server_jar', 57 help='Job server jar to submit jobs.', 58 action='store') 59 parser.add_argument( 60 '--environment_type', 61 default='LOOPBACK', 62 choices=['DOCKER', 'PROCESS', 'LOOPBACK'], 63 help='Set the environment type for running user code. DOCKER runs ' 64 'user code in a container. PROCESS runs user code in ' 65 'automatically started processes. LOOPBACK runs user code on ' 66 'the same process that originally submitted the job.') 67 parser.add_argument( 68 '--environment_option', 69 '--environment_options', 70 dest='environment_options', 71 action='append', 72 default=None, 73 help=( 74 'Environment configuration for running the user code. ' 75 'Recognized options depend on --environment_type.\n ' 76 'For DOCKER: docker_container_image (optional)\n ' 77 'For PROCESS: process_command (required), process_variables ' 78 '(optional, comma-separated)\n ' 79 'For EXTERNAL: external_service_address (required)')) 80 known_args, unknown_args = parser.parse_known_args( 81 shlex.split(test_pipeline_options)) 82 if unknown_args: 83 _LOGGER.warning('Discarding unrecognized arguments %s' % unknown_args) 84 self.set_spark_job_server_jar( 85 known_args.spark_job_server_jar or 86 job_server.JavaJarJobServer.path_to_beam_jar( 87 ':runners:spark:3:job-server:shadowJar')) 88 self.environment_type = known_args.environment_type 89 self.environment_options = known_args.environment_options 90 91 @classmethod 92 def _subprocess_command(cls, job_port, expansion_port): 93 # will be cleaned up at the end of this method, and recreated and used by 94 # the job server 95 tmp_dir = mkdtemp(prefix='sparktest') 96 97 cls.expansion_port = expansion_port 98 99 try: 100 return [ 101 'java', 102 '-Dbeam.spark.test.reuseSparkContext=true', 103 '-jar', 104 cls.spark_job_server_jar, 105 '--spark-master-url', 106 'local', 107 '--artifacts-dir', 108 tmp_dir, 109 '--job-port', 110 str(job_port), 111 '--artifact-port', 112 '0', 113 '--expansion-port', 114 str(expansion_port), 115 ] 116 finally: 117 rmtree(tmp_dir) 118 119 @classmethod 120 def get_runner(cls): 121 return portable_runner.PortableRunner() 122 123 @classmethod 124 def get_expansion_service(cls): 125 # TODO Move expansion address resides into PipelineOptions 126 return 'localhost:%s' % cls.expansion_port 127 128 @classmethod 129 def set_spark_job_server_jar(cls, spark_job_server_jar): 130 cls.spark_job_server_jar = spark_job_server_jar 131 132 def create_options(self): 133 options = super().create_options() 134 options.view_as(PortableOptions).environment_type = self.environment_type 135 options.view_as( 136 PortableOptions).environment_options = self.environment_options 137 138 return options 139 140 def test_metrics(self): 141 # Skip until Spark runner supports metrics. 142 raise unittest.SkipTest("https://github.com/apache/beam/issues/19496") 143 144 def test_sdf(self): 145 # Skip until Spark runner supports SDF. 146 raise unittest.SkipTest("https://github.com/apache/beam/issues/19468") 147 148 def test_sdf_with_watermark_tracking(self): 149 # Skip until Spark runner supports SDF. 150 raise unittest.SkipTest("https://github.com/apache/beam/issues/19468") 151 152 def test_sdf_with_sdf_initiated_checkpointing(self): 153 # Skip until Spark runner supports SDF. 154 raise unittest.SkipTest("https://github.com/apache/beam/issues/19468") 155 156 def test_sdf_synthetic_source(self): 157 # Skip until Spark runner supports SDF. 158 raise unittest.SkipTest("https://github.com/apache/beam/issues/19468") 159 160 def test_callbacks_with_exception(self): 161 # Skip until Spark runner supports bundle finalization. 162 raise unittest.SkipTest("https://github.com/apache/beam/issues/19517") 163 164 def test_register_finalizations(self): 165 # Skip until Spark runner supports bundle finalization. 166 raise unittest.SkipTest("https://github.com/apache/beam/issues/19517") 167 168 def test_sdf_with_dofn_as_watermark_estimator(self): 169 # Skip until Spark runner supports SDF and self-checkpoint. 170 raise unittest.SkipTest("https://github.com/apache/beam/issues/19468") 171 172 def test_pardo_dynamic_timer(self): 173 raise unittest.SkipTest("https://github.com/apache/beam/issues/20179") 174 175 def test_flattened_side_input(self): 176 # Blocked on support for transcoding 177 # https://jira.apache.org/jira/browse/BEAM-7236 178 super().test_flattened_side_input(with_transcoding=False) 179 180 def test_custom_merging_window(self): 181 raise unittest.SkipTest("https://github.com/apache/beam/issues/20641") 182 183 # Inherits all other tests from PortableRunnerTest. 184 185 186 if __name__ == '__main__': 187 # Run the tests. 188 logging.getLogger().setLevel(logging.INFO) 189 unittest.main()