github.com/apache/beam/sdks/v2@v2.48.2/python/test-suites/portable/common.gradle (about)

     1  import org.apache.tools.ant.taskdefs.condition.Os
     2  
     3  /*
     4   * Licensed to the Apache Software Foundation (ASF) under one
     5   * or more contributor license agreements.  See the NOTICE file
     6   * distributed with this work for additional information
     7   * regarding copyright ownership.  The ASF licenses this file
     8   * to you under the Apache License, Version 2.0 (the
     9   * "License"); you may not use this file except in compliance
    10   * with the License.  You may obtain a copy of the License at
    11   *
    12   *     http://www.apache.org/licenses/LICENSE-2.0
    13   *
    14   * Unless required by applicable law or agreed to in writing, software
    15   * distributed under the License is distributed on an "AS IS" BASIS,
    16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    17   * See the License for the specific language governing permissions and
    18   * limitations under the License.
    19   */
    20  
    21  def pythonRootDir = "${rootDir}/sdks/python"
    22  def pythonVersionSuffix = project.ext.pythonVersion.replace('.', '')
    23  def latestFlinkVersion = project.ext.latestFlinkVersion
    24  
    25  ext {
    26    pythonContainerTask = ":sdks:python:container:py${pythonVersionSuffix}:docker"
    27  }
    28  
    29  def createFlinkRunnerTestTask(String workerType) {
    30    def taskName = "flinkCompatibilityMatrix${workerType}"
    31    // project(":runners:flink:${latestFlinkVersion}:job-server").shadowJar.archivePath is not resolvable until runtime, so hard-code it here.
    32    def jobServerJar = "${rootDir}/runners/flink/${latestFlinkVersion}/job-server/build/libs/beam-runners-flink-${latestFlinkVersion}-job-server-${version}.jar"
    33    def options = "--flink_job_server_jar=${jobServerJar} --environment_type=${workerType}"
    34    if (workerType == 'PROCESS') {
    35      options += " --environment_options=process_command=${buildDir.absolutePath}/sdk_worker.sh"
    36    }
    37    def task = toxTask(taskName, 'flink-runner-test', options)
    38    // Through the Flink job server, we transitively add dependencies on the expansion services needed in tests.
    39    task.configure {
    40      dependsOn ":runners:flink:${latestFlinkVersion}:job-server:shadowJar"
    41      // The Java SDK worker is required to execute external transforms.
    42      dependsOn ':sdks:java:container:java8:docker'
    43      if (workerType == 'DOCKER') {
    44        dependsOn pythonContainerTask
    45      } else if (workerType == 'PROCESS') {
    46        dependsOn createProcessWorker
    47      }
    48    }
    49    return task
    50  }
    51  
    52  createFlinkRunnerTestTask('DOCKER')
    53  createFlinkRunnerTestTask('PROCESS')
    54  createFlinkRunnerTestTask('LOOPBACK')
    55  
    56  task flinkValidatesRunner() {
    57    dependsOn 'flinkCompatibilityMatrixLOOPBACK'
    58  }
    59  
    60  // TODO(https://github.com/apache/beam/issues/19962): Enable on pre-commit.
    61  tasks.register("flinkTriggerTranscript") {
    62    dependsOn 'setupVirtualenv'
    63    dependsOn ":runners:flink:${latestFlinkVersion}:job-server:shadowJar"
    64    doLast {
    65      exec {
    66        executable 'sh'
    67        args '-c', """
    68            . ${envdir}/bin/activate \\
    69            && cd ${pythonRootDir} \\
    70            && pip install -e .[test] \\
    71            && pytest \\
    72                apache_beam/transforms/trigger_test.py::WeakTestStreamTranscriptTest \\
    73                --test-pipeline-options='--runner=FlinkRunner --environment_type=LOOPBACK --flink_job_server_jar=${project(":runners:flink:${latestFlinkVersion}:job-server:").shadowJar.archivePath}'
    74            """
    75      }
    76    }
    77  }
    78  
    79  // Verifies BEAM-10702.
    80  tasks.register("portableLocalRunnerJuliaSetWithSetupPy") {
    81    dependsOn 'setupVirtualenv'
    82    dependsOn ":sdks:python:container:py${pythonVersionSuffix}:docker"
    83  
    84    doLast {
    85      exec {
    86        executable 'sh'
    87        args '-c', """
    88            . ${envdir}/bin/activate \\
    89            && cd ${pythonRootDir} \\
    90            && pip install -e . \\
    91            && cd apache_beam/examples/complete/juliaset \\
    92            && python juliaset_main.py \\
    93                --runner=PortableRunner \\
    94                --job_endpoint=embed \\
    95                --setup_file=./setup.py \\
    96                --coordinate_output=/tmp/juliaset \\
    97                --grid_size=1
    98            """
    99      }
   100    }
   101  }
   102  
   103  def createProcessWorker = tasks.register("createProcessWorker") {
   104    dependsOn ':sdks:python:container:build'
   105    dependsOn 'setupVirtualenv'
   106    def sdkWorkerFile = file("${buildDir}/sdk_worker.sh")
   107    def osType = 'linux'
   108    if (Os.isFamily(Os.FAMILY_MAC))
   109      osType = 'darwin'
   110    def workerScript = "${project(":sdks:python:container:").buildDir.absolutePath}/target/launcher/${osType}_amd64/boot"
   111    def sdkWorkerFileCode = "sh -c \"pip=`which pip` . ${envdir}/bin/activate && ${workerScript} \$* \""
   112    outputs.file sdkWorkerFile
   113    doLast {
   114      sdkWorkerFile.write sdkWorkerFileCode
   115      exec {
   116        commandLine('sh', '-c', ". ${envdir}/bin/activate && cd ${pythonRootDir} && pip install -e .[test]")
   117      }
   118      exec {
   119        commandLine('chmod', '+x', sdkWorkerFile)
   120      }
   121    }
   122  }
   123  
   124  // Requirements file is created during the runtime.
   125  tasks.register("portableLocalRunnerTestWithRequirementsFile") {
   126    dependsOn 'setupVirtualenv'
   127    dependsOn ":sdks:python:container:py${pythonVersionSuffix}:docker"
   128  
   129    doLast {
   130      exec {
   131        executable 'sh'
   132        args '-c', """
   133            . ${envdir}/bin/activate \\
   134            && cd ${pythonRootDir} \\
   135            && pip install -e . \\
   136            && cd apache_beam/runners/portability \\
   137            && python requirements_cache_it_test.py \\
   138                --runner=PortableRunner \\
   139                --job_endpoint=embed \\
   140                --environment_type="DOCKER"
   141            """
   142      }
   143    }
   144  }
   145  
   146  def createSamzaRunnerTestTask(String workerType) {
   147    def taskName = "samzaCompatibilityMatrix${workerType}"
   148    def jobServerJar = "${rootDir}/runners/samza/job-server/build/libs/beam-runners-samza-job-server-${version}.jar"
   149    def options = "--samza_job_server_jar=${jobServerJar} --environment_type=${workerType}"
   150    if (workerType == 'PROCESS') {
   151      options += " --environment_options=process_command=${buildDir.absolutePath}/sdk_worker.sh"
   152    }
   153    def task = toxTask(taskName, 'samza-runner-test', options)
   154    task.configure {
   155      dependsOn ":runners:samza:job-server:shadowJar"
   156      if (workerType == 'DOCKER') {
   157        dependsOn pythonContainerTask
   158      } else if (workerType == 'PROCESS') {
   159        dependsOn createProcessWorker
   160      }
   161    }
   162    return task
   163  }
   164  
   165  createSamzaRunnerTestTask('DOCKER')
   166  createSamzaRunnerTestTask('PROCESS')
   167  createSamzaRunnerTestTask('LOOPBACK')
   168  
   169  task samzaValidatesRunner() {
   170    dependsOn 'samzaCompatibilityMatrixLOOPBACK'
   171  }
   172  
   173  def createSparkRunnerTestTask(String workerType) {
   174    def taskName = "sparkCompatibilityMatrix${workerType}"
   175    // `project(':runners:spark:3:job-server').shadowJar.archivePath` is not resolvable until runtime, so hard-code it here.
   176    def jobServerJar = "${rootDir}/runners/spark/3/job-server/build/libs/beam-runners-spark-3-job-server-${version}.jar"
   177    def options = "--spark_job_server_jar=${jobServerJar} --environment_type=${workerType}"
   178    if (workerType == 'PROCESS') {
   179      options += " --environment_options=process_command=${buildDir.absolutePath}/sdk_worker.sh"
   180    }
   181    def task = toxTask(taskName, 'spark-runner-test', options)
   182    task.configure {
   183      dependsOn ':runners:spark:3:job-server:shadowJar'
   184      if (workerType == 'DOCKER') {
   185        dependsOn pythonContainerTask
   186      } else if (workerType == 'PROCESS') {
   187        dependsOn createProcessWorker
   188      }
   189    }
   190    return task
   191  }
   192  
   193  createSparkRunnerTestTask('DOCKER')
   194  createSparkRunnerTestTask('PROCESS')
   195  createSparkRunnerTestTask('LOOPBACK')
   196  
   197  tasks.register("sparkValidatesRunner") {
   198    dependsOn 'sparkCompatibilityMatrixLOOPBACK'
   199  }
   200  
   201  project.tasks.register("preCommitPy${pythonVersionSuffix}") {
   202    dependsOn = [":sdks:python:container:py${pythonVersionSuffix}:docker",
   203                 ":runners:flink:${latestFlinkVersion}:job-server:shadowJar",
   204                 'portableWordCountFlinkRunnerBatch',
   205                 'portableWordCountFlinkRunnerStreaming']
   206  }
   207  
   208  project.tasks.register("postCommitPy${pythonVersionSuffix}") {
   209    dependsOn = ['setupVirtualenv',
   210                 "postCommitPy${pythonVersionSuffix}IT",
   211                 ':runners:spark:3:job-server:shadowJar',
   212                 'portableLocalRunnerJuliaSetWithSetupPy',
   213                 'portableWordCountSparkRunnerBatch',
   214                 'portableLocalRunnerTestWithRequirementsFile']
   215  }
   216  
   217  project.tasks.register("flinkExamples") {
   218    dependsOn = [
   219            'setupVirtualenv',
   220            'installGcpTest',
   221            ":runners:flink:${latestFlinkVersion}:job-server:shadowJar"
   222    ]
   223    doLast {
   224      def testOpts = [
   225              "--log-cli-level=INFO",
   226      ]
   227      def pipelineOpts = [
   228              "--runner=FlinkRunner",
   229              "--project=apache-beam-testing",
   230              "--environment_type=LOOPBACK",
   231              "--temp_location=gs://temp-storage-for-end-to-end-tests/temp-it",
   232              "--flink_job_server_jar=${project(":runners:flink:${latestFlinkVersion}:job-server").shadowJar.archivePath}",
   233              '--sdk_harness_log_level_overrides=' +
   234                  // suppress info level flink.runtime log flood
   235                  '{\\"org.apache.flink.runtime\\":\\"WARN\\",' +
   236                  // suppress full __metricscontainers log printed in FlinkPipelineRunner.createPortablePipelineResult
   237                  '\\"org.apache.beam.runners.flink.FlinkPipelineRunner\\":\\"WARN\\"}'
   238      ]
   239      def cmdArgs = mapToArgString([
   240              "test_opts": testOpts,
   241              "suite": "postCommitExamples-flink-py${pythonVersionSuffix}",
   242              "pipeline_opts": pipelineOpts.join(" "),
   243              "collect": "examples_postcommit and not sickbay_flink"
   244      ])
   245      exec {
   246        executable 'sh'
   247        args '-c', ". ${envdir}/bin/activate && ${pythonRootDir}/scripts/run_integration_test.sh $cmdArgs"
   248      }
   249    }
   250  }
   251  
   252  project.tasks.register("sparkExamples") {
   253    dependsOn = [
   254            'setupVirtualenv',
   255            'installGcpTest',
   256            ':runners:spark:3:job-server:shadowJar'
   257    ]
   258    doLast {
   259      def testOpts = [
   260              "--log-cli-level=INFO",
   261      ]
   262      def jobServerJar = "${rootDir}/runners/spark/3/job-server/build/libs/beam-runners-spark-3-job-server-${version}.jar"
   263      def pipelineOpts = [
   264              "--runner=SparkRunner",
   265              "--project=apache-beam-testing",
   266              "--environment_type=LOOPBACK",
   267              "--temp_location=gs://temp-storage-for-end-to-end-tests/temp-it",
   268              "--spark_job_server_jar=${jobServerJar}",
   269      ]
   270      def cmdArgs = mapToArgString([
   271              "test_opts": testOpts,
   272              "suite": "postCommitExamples-spark-py${pythonVersionSuffix}",
   273              "pipeline_opts": pipelineOpts.join(" "),
   274              "collect": "examples_postcommit and not sickbay_spark"
   275      ])
   276      exec {
   277        executable 'sh'
   278        args '-c', ". ${envdir}/bin/activate && ${pythonRootDir}/scripts/run_integration_test.sh $cmdArgs"
   279      }
   280    }
   281  }
   282  
   283  project.tasks.register("postCommitPy${pythonVersionSuffix}IT") {
   284    dependsOn = [
   285            'setupVirtualenv',
   286            'installGcpTest',
   287            ":runners:flink:${latestFlinkVersion}:job-server:shadowJar",
   288            ':sdks:java:container:java8:docker',
   289            ':sdks:java:testing:kafka-service:buildTestKafkaServiceJar',
   290            ':sdks:java:io:expansion-service:shadowJar',
   291            ':sdks:java:io:google-cloud-platform:expansion-service:shadowJar',
   292            ':sdks:java:io:kinesis:expansion-service:shadowJar',
   293            ':sdks:java:extensions:schemaio-expansion-service:shadowJar',
   294            ':sdks:java:io:debezium:expansion-service:shadowJar'
   295    ]
   296  
   297    doLast {
   298      def tests = [
   299              "apache_beam/io/gcp/bigquery_read_it_test.py",
   300              "apache_beam/io/external/xlang_jdbcio_it_test.py",
   301              "apache_beam/io/external/xlang_kafkaio_it_test.py",
   302              "apache_beam/io/external/xlang_kinesisio_it_test.py",
   303              "apache_beam/io/external/xlang_debeziumio_it_test.py",
   304      ]
   305      def testOpts = ["${tests.join(' ')}"] + ["--log-cli-level=INFO"]
   306      def pipelineOpts = [
   307          "--runner=FlinkRunner",
   308          "--project=apache-beam-testing",
   309          "--environment_type=LOOPBACK",
   310          "--temp_location=gs://temp-storage-for-end-to-end-tests/temp-it",
   311          "--flink_job_server_jar=${project(":runners:flink:${latestFlinkVersion}:job-server").shadowJar.archivePath}",
   312          '--sdk_harness_log_level_overrides=' +
   313              // suppress info level flink.runtime log flood
   314              '{\\"org.apache.flink.runtime\\":\\"WARN\\",' +
   315              // suppress full __metricscontainers log printed in FlinkPipelineRunner.createPortablePipelineResult
   316              '\\"org.apache.beam.runners.flink.FlinkPipelineRunner\\":\\"WARN\\",' +
   317              // suppress metric name collision warning logs
   318              '\\"org.apache.flink.runtime.metrics.groups\\":\\"ERROR\\"}'
   319      ]
   320      def cmdArgs = mapToArgString([
   321              "test_opts": testOpts,
   322              "suite": "postCommitIT-flink-py${pythonVersionSuffix}",
   323              "pipeline_opts": pipelineOpts.join(" "),
   324      ])
   325      def kafkaJar = project(":sdks:java:testing:kafka-service:").buildTestKafkaServiceJar.archivePath
   326      exec {
   327        environment "LOCAL_KAFKA_JAR", kafkaJar
   328        executable 'sh'
   329        args '-c', ". ${envdir}/bin/activate && ${pythonRootDir}/scripts/run_integration_test.sh $cmdArgs"
   330      }
   331    }
   332  }
   333  
   334  project.tasks.register("xlangSpannerIOIT") {
   335    dependsOn = [
   336            'setupVirtualenv',
   337            'installGcpTest',
   338            ":runners:flink:${latestFlinkVersion}:job-server:shadowJar",
   339            ':sdks:java:container:java8:docker',
   340            ':sdks:java:io:expansion-service:shadowJar',
   341            ':sdks:java:io:google-cloud-platform:expansion-service:shadowJar',
   342            ':sdks:java:io:kinesis:expansion-service:shadowJar',
   343            ':sdks:java:extensions:schemaio-expansion-service:shadowJar',
   344            ':sdks:java:io:debezium:expansion-service:shadowJar'
   345    ]
   346  
   347    doLast {
   348      def tests = [
   349              "apache_beam/io/gcp/tests/xlang_spannerio_it_test.py",
   350      ]
   351      def testOpts = ["${tests.join(' ')}"] + ["--log-cli-level=INFO"]
   352      def pipelineOpts = [
   353          "--runner=FlinkRunner",
   354          "--project=apache-beam-testing",
   355          "--environment_type=LOOPBACK",
   356          "--temp_location=gs://temp-storage-for-end-to-end-tests/temp-it",
   357          "--flink_job_server_jar=${project(":runners:flink:${latestFlinkVersion}:job-server").shadowJar.archivePath}",
   358          '--sdk_harness_log_level_overrides=' +
   359              // suppress info level flink.runtime log flood
   360              '{\\"org.apache.flink.runtime\\":\\"WARN\\",' +
   361              // suppress full __metricscontainers log printed in FlinkPipelineRunner.createPortablePipelineResult
   362              '\\"org.apache.beam.runners.flink.FlinkPipelineRunner\\":\\"WARN\\",' +
   363              // suppress metric name collision warning logs
   364              '\\"org.apache.flink.runtime.metrics.groups\\":\\"ERROR\\"}'
   365      ]
   366      def cmdArgs = mapToArgString([
   367              "test_opts": testOpts,
   368              "suite": "postCommitIT-flink-py${pythonVersionSuffix}",
   369              "pipeline_opts": pipelineOpts.join(" "),
   370      ])
   371      exec {
   372        executable 'sh'
   373        args '-c', ". ${envdir}/bin/activate && ${pythonRootDir}/scripts/run_integration_test.sh $cmdArgs"
   374      }
   375    }
   376  }
   377  
   378  def addTestJavaJarCreator(String runner, Task jobServerJarTask) {
   379    project.tasks.register("testJavaJarCreator${runner}") {
   380      dependsOn jobServerJarTask
   381      dependsOn pythonContainerTask
   382      doLast{
   383        exec {
   384          executable "sh"
   385          def options = [
   386              "--runner ${runner}",
   387              "--job_server_jar ${jobServerJarTask.archivePath}",
   388              "--env_dir ${project.rootProject.buildDir}/gradleenv/${project.path.hashCode()}",
   389              "--python_root_dir ${project.rootDir}/sdks/python",
   390              "--python_version ${project.ext.pythonVersion}",
   391              "--python_container_image ${project.docker_image_default_repo_root}/"
   392                  + "${project.docker_image_default_repo_prefix}"
   393                  + "python${project.ext.pythonVersion}_sdk:${project.sdk_version}",
   394          ]
   395          args "-c", "${project.rootDir}/runners/portability/test_pipeline_jar.sh ${options.join(' ')}"
   396        }
   397      }
   398    }
   399  }
   400  
   401  // TODO(BEAM-11333) Update and test multiple Flink versions.
   402  addTestJavaJarCreator("FlinkRunner", tasks.getByPath(":runners:flink:${latestFlinkVersion}:job-server:shadowJar"))
   403  addTestJavaJarCreator("SparkRunner", tasks.getByPath(":runners:spark:3:job-server:shadowJar"))
   404  
   405  def addTestFlinkUberJar(boolean saveMainSession) {
   406    project.tasks.register("testUberJarFlinkRunner${saveMainSession ? 'SaveMainSession' : ''}") {
   407      dependsOn ":runners:flink:${latestFlinkVersion}:job-server:shadowJar"
   408      dependsOn ":runners:flink:${latestFlinkVersion}:job-server:miniCluster"
   409      dependsOn pythonContainerTask
   410      doLast{
   411        exec {
   412          executable "sh"
   413          def options = [
   414              "--flink_job_server_jar ${tasks.getByPath(":runners:flink:${latestFlinkVersion}:job-server:shadowJar").archivePath}",
   415              "--flink_mini_cluster_jar ${tasks.getByPath(":runners:flink:${latestFlinkVersion}:job-server:miniCluster").archivePath}",
   416              "--env_dir ${project.rootProject.buildDir}/gradleenv/${project.path.hashCode()}",
   417              "--python_root_dir ${project.rootDir}/sdks/python",
   418              "--python_version ${project.ext.pythonVersion}",
   419              "--python_container_image ${project.docker_image_default_repo_root}/"
   420                  + "${project.docker_image_default_repo_prefix}"
   421                  + "python${project.ext.pythonVersion}_sdk:${project.sdk_version}",
   422          ]
   423          if (saveMainSession) {
   424            options.add('--save_main_session')
   425          }
   426          args "-c", "${project.rootDir}/runners/portability/test_flink_uber_jar.sh ${options.join(' ')}"
   427        }
   428      }
   429    }
   430  }
   431  
   432  addTestFlinkUberJar(false)
   433  addTestFlinkUberJar(true)
   434  
   435  tasks.register("testPipelineJarSparkRunner") {
   436    dependsOn testJavaJarCreatorSparkRunner
   437  }
   438  
   439  tasks.register("testPipelineJarFlinkRunner") {
   440    dependsOn testJavaJarCreatorFlinkRunner
   441    dependsOn testUberJarFlinkRunner
   442    dependsOn testUberJarFlinkRunnerSaveMainSession
   443  }