github.com/apache/beam/sdks/v2@v2.48.2/python/test-suites/dataflow/common.gradle (about)

     1  /*
     2   * Licensed to the Apache Software Foundation (ASF) under one
     3   * or more contributor license agreements.  See the NOTICE file
     4   * distributed with this work for additional information
     5   * regarding copyright ownership.  The ASF licenses this file
     6   * to you under the Apache License, Version 2.0 (the
     7   * License); you may not use this file except in compliance
     8   * with the License.  You may obtain a copy of the License at
     9   *
    10   *     http://www.apache.org/licenses/LICENSE-2.0
    11   *
    12   * Unless required by applicable law or agreed to in writing, software
    13   * distributed under the License is distributed on an AS IS BASIS,
    14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15   * See the License for the specific language governing permissions and
    16   * limitations under the License.
    17   */
    18  
    19  evaluationDependsOn(':runners:google-cloud-dataflow-java:worker')
    20  evaluationDependsOn(':sdks:python:test-suites:xlang')
    21  enablePythonPerformanceTest()
    22  
    23  String pythonVersionNumber = project.ext.pythonVersion.replace('.', '')
    24  String pythonVersionSuffix = project.ext.pythonVersion
    25          ? "-py${pythonVersionNumber}"
    26          : ''
    27  
    28  dependencies {
    29    distTarBall project(path: ":sdks:python", configuration: "distTarBall")
    30  }
    31  
    32  task initializeForDataflowJob{
    33    def wheelCompatible = "amd64".equalsIgnoreCase(System.getProperty("os.arch"))
    34    if (!wheelCompatible && project.hasProperty('useWheelDistribution')) {
    35        throw new GradleException('-PuseWheelDistribution is set for the task but the ' +
    36        'host system platform is not compatible with Dataflow worker container image.')
    37    }
    38    dependsOn 'installGcpTest'
    39  
    40    if (project.hasProperty('useWheelDistribution')) {
    41      dependsOn ":sdks:python:bdistPy${pythonVersionNumber}linux"
    42  
    43      doLast {
    44        def collection = project.fileTree(project.project(':sdks:python').buildDir){
    45            include "**/apache_beam-*cp${pythonVersionNumber}*manylinux*.whl"
    46        }
    47        // sdkLocation ext is set at execution time
    48        String packageFilename = collection.singleFile.toString()
    49        project.ext.sdkLocation = packageFilename
    50        logger.info('Use wheel {} for sdk_location.', packageFilename)
    51      }
    52    } else {
    53      dependsOn ':sdks:python:sdist'
    54  
    55      // sdkLocation ext is available at config time
    56      String packageFilename = files(configurations.distTarBall.files).singleFile
    57      project.ext.sdkLocation = packageFilename
    58      logger.info('Use tarball {} for sdk_location.', packageFilename)
    59    }
    60  }
    61  
    62  def runScriptsDir = "${rootDir}/sdks/python/scripts"
    63  
    64  // Basic test options for ITs running on Jenkins.
    65  def basicPytestOpts = [
    66      "--capture=no",  // print stdout instantly
    67      "--timeout=4500", // timeout of whole command execution
    68      "--color=yes", // console color
    69      "--log-cli-level=INFO", //log level
    70  ]
    71  
    72  def preCommitIT(String runScriptsDir, String envdir, Boolean streaming, Boolean runnerV2, String pythonSuffix) {
    73    def suffix = runnerV2 ? '_V2' : ''
    74    suffix = streaming ? "_streaming$suffix" : "_batch$suffix"
    75    task "preCommitIT${suffix}" {
    76      dependsOn 'initializeForDataflowJob'
    77  
    78      doLast {
    79        // Basic integration tests to run in PreCommit
    80        def precommitTests = streaming ? [
    81                "apache_beam/examples/streaming_wordcount_it_test.py::StreamingWordCountIT::test_streaming_wordcount_it",
    82        ] : [
    83                "apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it",
    84        ]
    85        def testOpts = [
    86                "${precommitTests.join(' ')}",
    87                "--capture=no",    // Print stdout instantly
    88                "--numprocesses=2",    // Number of tests running in parallel
    89                "--timeout=1800",   // Timeout of whole command execution
    90        ]
    91  
    92        def argMap = [
    93                "test_opts"   : testOpts,
    94                "sdk_location": project.ext.sdkLocation,
    95                "suite"       : "preCommitIT-df${pythonSuffix}",
    96        ]
    97  
    98        if (streaming){
    99          argMap.put("streaming", "true")
   100          argMap.put("runner_v2", "true")
   101        } else if (runnerV2) {
   102          argMap.put("runner_v2", "true")
   103        }
   104  
   105        def cmdArgs = mapToArgString(argMap)
   106        exec {
   107          executable 'sh'
   108          args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs"
   109        }
   110      }
   111    }
   112  }
   113  
   114  preCommitIT(runScriptsDir, envdir, false, false, pythonVersionSuffix)
   115  preCommitIT(runScriptsDir, envdir, true, false, pythonVersionSuffix)
   116  preCommitIT(runScriptsDir, envdir, false, true, pythonVersionSuffix)
   117  preCommitIT(runScriptsDir, envdir, true, true, pythonVersionSuffix)
   118  
   119  task preCommitIT{
   120    dependsOn preCommitIT_batch
   121    dependsOn preCommitIT_streaming
   122  }
   123  
   124  task preCommitIT_V2{
   125    dependsOn preCommitIT_batch_V2
   126    dependsOn preCommitIT_streaming_V2
   127  }
   128  
   129  task postCommitIT {
   130    dependsOn 'initializeForDataflowJob'
   131  
   132    doLast {
   133      def testOpts = basicPytestOpts + ["--numprocesses=8", "--dist=loadfile"]
   134      def argMap = [
   135          "test_opts": testOpts,
   136          "sdk_location": project.ext.sdkLocation,
   137          "suite": "postCommitIT-df${pythonVersionSuffix}",
   138          "collect": "it_postcommit"
   139      ]
   140      def cmdArgs = mapToArgString(argMap)
   141      exec {
   142        executable 'sh'
   143        args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs"
   144      }
   145    }
   146  }
   147  
   148  task postCommitSickbay {
   149    dependsOn 'initializeForDataflowJob'
   150  
   151    doLast {
   152      def testOpts = basicPytestOpts + ["--numprocesses=8", "--dist=loadfile"]
   153      def argMap = [
   154          "test_opts": testOpts,
   155          "sdk_location": project.ext.sdkLocation,
   156          "suite": "postCommitIT-df${pythonVersionSuffix}",
   157          "collect": "it_postcommit_sickbay"
   158      ]
   159      def cmdArgs = mapToArgString(argMap)
   160      exec {
   161        executable 'sh'
   162        args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs"
   163      }
   164    }
   165  }
   166  
   167  task spannerioIT {
   168    dependsOn 'initializeForDataflowJob'
   169  
   170    doLast {
   171      def testOpts = basicPytestOpts + ["--numprocesses=8", "--dist=loadfile"]
   172      def argMap = [
   173          "test_opts": testOpts,
   174          "sdk_location": project.ext.sdkLocation,
   175          "suite": "postCommitIT-df${pythonVersionSuffix}",
   176          "collect": "spannerio_it"
   177      ]
   178      def cmdArgs = mapToArgString(argMap)
   179      exec {
   180        executable 'sh'
   181        args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs"
   182      }
   183    }
   184  }
   185  
   186  task examples {
   187    dependsOn 'initializeForDataflowJob'
   188    def testOpts = basicPytestOpts
   189  
   190    // Execute tests with xdists
   191    doFirst {
   192      def argMap = [
   193              "test_opts": testOpts + ["--numprocesses=8", "--dist=loadfile"],
   194              "sdk_location": project.ext.sdkLocation,
   195              "runner_v2": "true",
   196              "suite": "postCommitIT-df${pythonVersionSuffix}-xdist",
   197              "collect": "examples_postcommit and not no_xdist and not sickbay_dataflow"
   198      ]
   199      def cmdArgs = mapToArgString(argMap)
   200      exec {
   201        executable 'sh'
   202        args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs"
   203      }
   204    }
   205  
   206    // Execute tests that fail with xdists
   207    doLast {
   208      def argMap = [
   209              "test_opts": testOpts,
   210              "sdk_location": project.ext.sdkLocation,
   211              "runner_v2": "true",
   212              "suite": "postCommitIT-df${pythonVersionSuffix}-no-xdist",
   213              "collect": "examples_postcommit and no_xdist and not sickbay_dataflow"
   214      ]
   215      def cmdArgs = mapToArgString(argMap)
   216      exec {
   217        executable 'sh'
   218        args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs"
   219      }
   220    }
   221  }
   222  
   223  task validatesRunnerBatchTests {
   224    dependsOn 'initializeForDataflowJob'
   225  
   226    doLast {
   227      def argMap = [
   228          "test_opts"   : basicPytestOpts + ["--numprocesses=8"],
   229          "sdk_location": project.ext.sdkLocation,
   230          "suite"       : "validatesRunnerBatchTests-df${pythonVersionSuffix}",
   231          "collect": "it_validatesrunner and not no_sickbay_batch"
   232      ]
   233  
   234      if (project.hasProperty('useRunnerV2')) {
   235        argMap.put("runner_v2", "true")
   236      }
   237  
   238      if (project.hasProperty('disableRunnerV2')) {
   239        argMap.put("disable_runner_v2", "true")
   240      }
   241      def cmdArgs = mapToArgString(argMap)
   242      exec {
   243        executable 'sh'
   244        args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs"
   245      }
   246    }
   247  }
   248  
   249  task validatesRunnerStreamingTests {
   250    dependsOn 'initializeForDataflowJob'
   251  
   252    // TODO(BEAM-3544,https://github.com/apache/beam/issues/19012): Disable tests with 'sickbay-streaming' tag.
   253    // Execute tests with xdists
   254    doFirst {
   255      def argMap = [
   256                  "test_opts": basicPytestOpts + ["--numprocesses=8"],
   257                  "streaming": "true",
   258                  "sdk_location": project.ext.sdkLocation,
   259                  "suite": "validatesRunnerStreamingTests-df${pythonVersionSuffix}-xdist",
   260                  "collect": "it_validatesrunner and not no_sickbay_streaming and not no_xdist",
   261                  "runner_v2": "true",
   262                  ]
   263  
   264      def cmdArgs = mapToArgString(argMap)
   265      exec {
   266        executable 'sh'
   267        args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs"
   268      }
   269    }
   270  
   271    // Execute tests that fail with xdists
   272    doLast {
   273      def argMap = [
   274                  "test_opts": basicPytestOpts,
   275                  "streaming": "true",
   276                  "sdk_location": project.ext.sdkLocation,
   277                  "suite": "validatesRunnerStreamingTests-df${pythonVersionSuffix}-noxdist",
   278                  "collect": "it_validatesrunner and not no_sickbay_streaming and no_xdist",
   279                  "runner_v2": "true",
   280                  ]
   281  
   282      def cmdArgs = mapToArgString(argMap)
   283      exec {
   284        executable 'sh'
   285        args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs"
   286      }
   287    }
   288  }
   289  
   290  task runPerformanceTest {
   291    dependsOn 'initializeForDataflowJob'
   292  
   293    def test = project.findProperty('test')
   294    def suite = "runPerformanceTest-df${pythonVersionSuffix}"
   295    def xUnitFile ="pytest-${suite}.xml"
   296  
   297    doLast {
   298      def testOpts = project.findProperty('test-pipeline-options')
   299      testOpts += " --sdk_location=${project.ext.sdkLocation}"
   300  
   301      exec {
   302        workingDir "${project.rootDir}/sdks/python"
   303        executable 'sh'
   304        args '-c', ". ${envdir}/bin/activate && pytest -o junit_suite_name=${suite}" +
   305                " ${test} --test-pipeline-options=\"${testOpts}\" --junitxml=${xUnitFile} --timeout=1800"
   306      }
   307    }
   308  }
   309  
   310  task mongodbioIT {
   311    dependsOn 'initializeForDataflowJob'
   312  
   313    doLast {
   314      def opts = findProperty('opts')
   315      opts = String.format("%s %s", opts, "--sdk_location=${project.ext.sdkLocation}")
   316  
   317      exec {
   318        executable 'sh'
   319        args '-c', ". ${envdir}/bin/activate && python -m apache_beam.io.mongodbio_it_test ${opts}"
   320      }
   321    }
   322  }
   323  
   324  task installChicagoTaxiExampleRequirements {
   325    dependsOn 'initializeForDataflowJob'
   326  
   327    doLast {
   328      exec {
   329        workingDir "$rootProject.projectDir/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/"
   330        executable 'sh'
   331        args '-c', ". ${envdir}/bin/activate && pip install -r requirements.txt"
   332      }
   333    }
   334  }
   335  
   336  task chicagoTaxiExample {
   337    dependsOn 'installChicagoTaxiExampleRequirements'
   338  
   339    doLast {
   340      def gcsRoot = findProperty('gcsRoot')
   341      def pipelineOptions = findProperty('pipelineOptions') ?: ""
   342      pipelineOptions += " --sdk_location=\"${project.ext.sdkLocation}\""
   343  
   344      exec {
   345        workingDir "$rootProject.projectDir/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/"
   346        executable 'sh'
   347        args '-c', ". ${envdir}/bin/activate && ./run_chicago.sh ${gcsRoot} TestDataflowRunner ${pipelineOptions}"
   348      }
   349    }
   350  }
   351  
   352  task validatesContainer() {
   353    def pyversion = "${project.ext.pythonVersion.replace('.', '')}"
   354    if (project.hasProperty("testRCDependencies")) {
   355      // Generate a requirements file with pre-release versions for the docker task
   356      // if testing with pre-release dependencies.
   357      dependsOn ":sdks:python:container:py${pyversion}:generatePythonRequirements"
   358      mustRunAfter ":sdks:python:container:py${pyversion}:generatePythonRequirements"
   359    }
   360    dependsOn 'initializeForDataflowJob'
   361    dependsOn ":sdks:python:container:py${pyversion}:docker"
   362    def runScriptsPath = "${rootDir}/sdks/python/container/run_validatescontainer.sh"
   363    doLast {
   364      exec {
   365        executable 'sh'
   366        args '-c', ". ${envdir}/bin/activate && cd ${rootDir} && ${runScriptsPath} " +
   367                "${project.ext.pythonVersion} " +
   368                "${project.ext.sdkLocation}"
   369      }
   370    }
   371  }
   372  
   373  def tensorRTTests = tasks.create("tensorRTtests") {
   374    dependsOn 'installGcpTest'
   375    dependsOn ':sdks:python:sdist'
   376   doLast {
   377    def testOpts = basicPytestOpts
   378    def argMap = [
   379      "runner": "DataflowRunner",
   380      "machine_type":"n1-standard-4",
   381      // TODO(https://github.com/apache/beam/issues/22651): Build docker image for tensor RT tests during Run time.
   382      // This would also enable to use wheel "--sdk_location" as other tasks, and eliminate distTarBall dependency
   383      // declaration for this project.
   384      "sdk_container_image": "us.gcr.io/apache-beam-testing/python-postcommit-it/tensor_rt:latest",
   385      "sdk_location": files(configurations.distTarBall.files).singleFile,
   386      "project": "apache-beam-testing",
   387      "region": "us-central1",
   388      "input": "gs://apache-beam-ml/testing/inputs/tensorrt_image_file_names.txt",
   389      "output": "gs://apache-beam-ml/outputs/tensorrt_predictions.txt",
   390      "engine_path":  "gs://apache-beam-ml/models/ssd_mobilenet_v2_320x320_coco17_tpu-8.trt",
   391      "disk_size_gb": 75
   392    ]
   393    def cmdArgs = mapToArgString(argMap)
   394    exec {
   395      executable 'sh'
   396      args '-c', ". ${envdir}/bin/activate && pip install pillow && python -m apache_beam.examples.inference.tensorrt_object_detection $cmdArgs --experiment='worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver' --experiment=no_use_multiple_sdk_containers"
   397    }
   398   }
   399  }
   400  
   401  task installTFTRequirements {
   402    dependsOn 'initializeForDataflowJob'
   403    doLast {
   404      exec {
   405        workingDir "$rootProject.projectDir/sdks/python/apache_beam/testing/benchmarks/cloudml/"
   406        executable 'sh'
   407        args '-c', ". ${envdir}/bin/activate && pip install -r requirements.txt"
   408      }
   409    }
   410  }
   411  
   412  // Tensorflow transform integration and benchmarking tests on Apache Beam.
   413  task tftTests {
   414    dependsOn "installTFTRequirements"
   415  
   416    doLast {
   417      def opts = project.findProperty('opts')
   418      opts += " --sdk_location=${project.ext.sdkLocation}"
   419      def testOpts = basicPytestOpts + ["--numprocesses=8", "--dist=loadfile"]
   420      def argMap = [
   421          "test_opts": testOpts,
   422          "suite": "TFTransformTests-df${pythonVersionSuffix}",
   423          "collect": "uses_tft",
   424          "requirements_file": "apache_beam/testing/benchmarks/cloudml/requirements.txt",
   425          "pipeline_opts": opts,
   426      ]
   427      def cmdArgs = mapToArgString(argMap)
   428      exec {
   429        executable 'sh'
   430        args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs "
   431      }
   432    }
   433  }
   434  
   435  // add all RunInference E2E tests that run on DataflowRunner
   436  // As of now, this test suite is enable in py38 suite as the base NVIDIA image used for Tensor RT
   437  // contains Python 3.8.
   438  // TODO: https://github.com/apache/beam/issues/22651
   439  project.tasks.register("inferencePostCommitIT") {
   440    dependsOn = [
   441    'tensorRTtests',
   442    ]
   443  }
   444  
   445  
   446  // Create cross-language tasks for running tests against Java expansion service(s)
   447  def dataflowProject = project.findProperty('dataflowProject') ?: 'apache-beam-testing'
   448  def dataflowRegion = project.findProperty('dataflowRegion') ?: 'us-central1'
   449  
   450  project(":sdks:python:test-suites:xlang").ext.xlangTasks.each { taskMetadata ->
   451      createCrossLanguageUsingJavaExpansionTask(
   452        name: taskMetadata.name,
   453        expansionProjectPath: taskMetadata.expansionProjectPath,
   454        collectMarker: taskMetadata.collectMarker,
   455        startJobServer: taskMetadata.startJobServer,
   456        cleanupJobServer: taskMetadata.cleanupJobServer,
   457        pythonPipelineOptions: [
   458          "--runner=TestDataflowRunner",
   459          "--project=${dataflowProject}",
   460          "--region=${dataflowRegion}",
   461          "--sdk_harness_container_image_overrides=.*java.*,gcr.io/apache-beam-testing/beam-sdk/beam_java8_sdk:latest"
   462        ],
   463        pytestOptions: basicPytestOpts
   464      )
   465  }