github.com/apache/beam/sdks/v2@v2.48.2/python/test-suites/dataflow/common.gradle (about) 1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * License); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an AS IS BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 evaluationDependsOn(':runners:google-cloud-dataflow-java:worker') 20 evaluationDependsOn(':sdks:python:test-suites:xlang') 21 enablePythonPerformanceTest() 22 23 String pythonVersionNumber = project.ext.pythonVersion.replace('.', '') 24 String pythonVersionSuffix = project.ext.pythonVersion 25 ? "-py${pythonVersionNumber}" 26 : '' 27 28 dependencies { 29 distTarBall project(path: ":sdks:python", configuration: "distTarBall") 30 } 31 32 task initializeForDataflowJob{ 33 def wheelCompatible = "amd64".equalsIgnoreCase(System.getProperty("os.arch")) 34 if (!wheelCompatible && project.hasProperty('useWheelDistribution')) { 35 throw new GradleException('-PuseWheelDistribution is set for the task but the ' + 36 'host system platform is not compatible with Dataflow worker container image.') 37 } 38 dependsOn 'installGcpTest' 39 40 if (project.hasProperty('useWheelDistribution')) { 41 dependsOn ":sdks:python:bdistPy${pythonVersionNumber}linux" 42 43 doLast { 44 def collection = project.fileTree(project.project(':sdks:python').buildDir){ 45 include "**/apache_beam-*cp${pythonVersionNumber}*manylinux*.whl" 46 } 47 // sdkLocation ext is set at execution time 48 String packageFilename = collection.singleFile.toString() 49 project.ext.sdkLocation = packageFilename 50 logger.info('Use wheel {} for sdk_location.', packageFilename) 51 } 52 } else { 53 dependsOn ':sdks:python:sdist' 54 55 // sdkLocation ext is available at config time 56 String packageFilename = files(configurations.distTarBall.files).singleFile 57 project.ext.sdkLocation = packageFilename 58 logger.info('Use tarball {} for sdk_location.', packageFilename) 59 } 60 } 61 62 def runScriptsDir = "${rootDir}/sdks/python/scripts" 63 64 // Basic test options for ITs running on Jenkins. 65 def basicPytestOpts = [ 66 "--capture=no", // print stdout instantly 67 "--timeout=4500", // timeout of whole command execution 68 "--color=yes", // console color 69 "--log-cli-level=INFO", //log level 70 ] 71 72 def preCommitIT(String runScriptsDir, String envdir, Boolean streaming, Boolean runnerV2, String pythonSuffix) { 73 def suffix = runnerV2 ? '_V2' : '' 74 suffix = streaming ? "_streaming$suffix" : "_batch$suffix" 75 task "preCommitIT${suffix}" { 76 dependsOn 'initializeForDataflowJob' 77 78 doLast { 79 // Basic integration tests to run in PreCommit 80 def precommitTests = streaming ? [ 81 "apache_beam/examples/streaming_wordcount_it_test.py::StreamingWordCountIT::test_streaming_wordcount_it", 82 ] : [ 83 "apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it", 84 ] 85 def testOpts = [ 86 "${precommitTests.join(' ')}", 87 "--capture=no", // Print stdout instantly 88 "--numprocesses=2", // Number of tests running in parallel 89 "--timeout=1800", // Timeout of whole command execution 90 ] 91 92 def argMap = [ 93 "test_opts" : testOpts, 94 "sdk_location": project.ext.sdkLocation, 95 "suite" : "preCommitIT-df${pythonSuffix}", 96 ] 97 98 if (streaming){ 99 argMap.put("streaming", "true") 100 argMap.put("runner_v2", "true") 101 } else if (runnerV2) { 102 argMap.put("runner_v2", "true") 103 } 104 105 def cmdArgs = mapToArgString(argMap) 106 exec { 107 executable 'sh' 108 args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" 109 } 110 } 111 } 112 } 113 114 preCommitIT(runScriptsDir, envdir, false, false, pythonVersionSuffix) 115 preCommitIT(runScriptsDir, envdir, true, false, pythonVersionSuffix) 116 preCommitIT(runScriptsDir, envdir, false, true, pythonVersionSuffix) 117 preCommitIT(runScriptsDir, envdir, true, true, pythonVersionSuffix) 118 119 task preCommitIT{ 120 dependsOn preCommitIT_batch 121 dependsOn preCommitIT_streaming 122 } 123 124 task preCommitIT_V2{ 125 dependsOn preCommitIT_batch_V2 126 dependsOn preCommitIT_streaming_V2 127 } 128 129 task postCommitIT { 130 dependsOn 'initializeForDataflowJob' 131 132 doLast { 133 def testOpts = basicPytestOpts + ["--numprocesses=8", "--dist=loadfile"] 134 def argMap = [ 135 "test_opts": testOpts, 136 "sdk_location": project.ext.sdkLocation, 137 "suite": "postCommitIT-df${pythonVersionSuffix}", 138 "collect": "it_postcommit" 139 ] 140 def cmdArgs = mapToArgString(argMap) 141 exec { 142 executable 'sh' 143 args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" 144 } 145 } 146 } 147 148 task postCommitSickbay { 149 dependsOn 'initializeForDataflowJob' 150 151 doLast { 152 def testOpts = basicPytestOpts + ["--numprocesses=8", "--dist=loadfile"] 153 def argMap = [ 154 "test_opts": testOpts, 155 "sdk_location": project.ext.sdkLocation, 156 "suite": "postCommitIT-df${pythonVersionSuffix}", 157 "collect": "it_postcommit_sickbay" 158 ] 159 def cmdArgs = mapToArgString(argMap) 160 exec { 161 executable 'sh' 162 args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" 163 } 164 } 165 } 166 167 task spannerioIT { 168 dependsOn 'initializeForDataflowJob' 169 170 doLast { 171 def testOpts = basicPytestOpts + ["--numprocesses=8", "--dist=loadfile"] 172 def argMap = [ 173 "test_opts": testOpts, 174 "sdk_location": project.ext.sdkLocation, 175 "suite": "postCommitIT-df${pythonVersionSuffix}", 176 "collect": "spannerio_it" 177 ] 178 def cmdArgs = mapToArgString(argMap) 179 exec { 180 executable 'sh' 181 args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" 182 } 183 } 184 } 185 186 task examples { 187 dependsOn 'initializeForDataflowJob' 188 def testOpts = basicPytestOpts 189 190 // Execute tests with xdists 191 doFirst { 192 def argMap = [ 193 "test_opts": testOpts + ["--numprocesses=8", "--dist=loadfile"], 194 "sdk_location": project.ext.sdkLocation, 195 "runner_v2": "true", 196 "suite": "postCommitIT-df${pythonVersionSuffix}-xdist", 197 "collect": "examples_postcommit and not no_xdist and not sickbay_dataflow" 198 ] 199 def cmdArgs = mapToArgString(argMap) 200 exec { 201 executable 'sh' 202 args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" 203 } 204 } 205 206 // Execute tests that fail with xdists 207 doLast { 208 def argMap = [ 209 "test_opts": testOpts, 210 "sdk_location": project.ext.sdkLocation, 211 "runner_v2": "true", 212 "suite": "postCommitIT-df${pythonVersionSuffix}-no-xdist", 213 "collect": "examples_postcommit and no_xdist and not sickbay_dataflow" 214 ] 215 def cmdArgs = mapToArgString(argMap) 216 exec { 217 executable 'sh' 218 args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" 219 } 220 } 221 } 222 223 task validatesRunnerBatchTests { 224 dependsOn 'initializeForDataflowJob' 225 226 doLast { 227 def argMap = [ 228 "test_opts" : basicPytestOpts + ["--numprocesses=8"], 229 "sdk_location": project.ext.sdkLocation, 230 "suite" : "validatesRunnerBatchTests-df${pythonVersionSuffix}", 231 "collect": "it_validatesrunner and not no_sickbay_batch" 232 ] 233 234 if (project.hasProperty('useRunnerV2')) { 235 argMap.put("runner_v2", "true") 236 } 237 238 if (project.hasProperty('disableRunnerV2')) { 239 argMap.put("disable_runner_v2", "true") 240 } 241 def cmdArgs = mapToArgString(argMap) 242 exec { 243 executable 'sh' 244 args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" 245 } 246 } 247 } 248 249 task validatesRunnerStreamingTests { 250 dependsOn 'initializeForDataflowJob' 251 252 // TODO(BEAM-3544,https://github.com/apache/beam/issues/19012): Disable tests with 'sickbay-streaming' tag. 253 // Execute tests with xdists 254 doFirst { 255 def argMap = [ 256 "test_opts": basicPytestOpts + ["--numprocesses=8"], 257 "streaming": "true", 258 "sdk_location": project.ext.sdkLocation, 259 "suite": "validatesRunnerStreamingTests-df${pythonVersionSuffix}-xdist", 260 "collect": "it_validatesrunner and not no_sickbay_streaming and not no_xdist", 261 "runner_v2": "true", 262 ] 263 264 def cmdArgs = mapToArgString(argMap) 265 exec { 266 executable 'sh' 267 args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" 268 } 269 } 270 271 // Execute tests that fail with xdists 272 doLast { 273 def argMap = [ 274 "test_opts": basicPytestOpts, 275 "streaming": "true", 276 "sdk_location": project.ext.sdkLocation, 277 "suite": "validatesRunnerStreamingTests-df${pythonVersionSuffix}-noxdist", 278 "collect": "it_validatesrunner and not no_sickbay_streaming and no_xdist", 279 "runner_v2": "true", 280 ] 281 282 def cmdArgs = mapToArgString(argMap) 283 exec { 284 executable 'sh' 285 args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" 286 } 287 } 288 } 289 290 task runPerformanceTest { 291 dependsOn 'initializeForDataflowJob' 292 293 def test = project.findProperty('test') 294 def suite = "runPerformanceTest-df${pythonVersionSuffix}" 295 def xUnitFile ="pytest-${suite}.xml" 296 297 doLast { 298 def testOpts = project.findProperty('test-pipeline-options') 299 testOpts += " --sdk_location=${project.ext.sdkLocation}" 300 301 exec { 302 workingDir "${project.rootDir}/sdks/python" 303 executable 'sh' 304 args '-c', ". ${envdir}/bin/activate && pytest -o junit_suite_name=${suite}" + 305 " ${test} --test-pipeline-options=\"${testOpts}\" --junitxml=${xUnitFile} --timeout=1800" 306 } 307 } 308 } 309 310 task mongodbioIT { 311 dependsOn 'initializeForDataflowJob' 312 313 doLast { 314 def opts = findProperty('opts') 315 opts = String.format("%s %s", opts, "--sdk_location=${project.ext.sdkLocation}") 316 317 exec { 318 executable 'sh' 319 args '-c', ". ${envdir}/bin/activate && python -m apache_beam.io.mongodbio_it_test ${opts}" 320 } 321 } 322 } 323 324 task installChicagoTaxiExampleRequirements { 325 dependsOn 'initializeForDataflowJob' 326 327 doLast { 328 exec { 329 workingDir "$rootProject.projectDir/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/" 330 executable 'sh' 331 args '-c', ". ${envdir}/bin/activate && pip install -r requirements.txt" 332 } 333 } 334 } 335 336 task chicagoTaxiExample { 337 dependsOn 'installChicagoTaxiExampleRequirements' 338 339 doLast { 340 def gcsRoot = findProperty('gcsRoot') 341 def pipelineOptions = findProperty('pipelineOptions') ?: "" 342 pipelineOptions += " --sdk_location=\"${project.ext.sdkLocation}\"" 343 344 exec { 345 workingDir "$rootProject.projectDir/sdks/python/apache_beam/testing/benchmarks/chicago_taxi/" 346 executable 'sh' 347 args '-c', ". ${envdir}/bin/activate && ./run_chicago.sh ${gcsRoot} TestDataflowRunner ${pipelineOptions}" 348 } 349 } 350 } 351 352 task validatesContainer() { 353 def pyversion = "${project.ext.pythonVersion.replace('.', '')}" 354 if (project.hasProperty("testRCDependencies")) { 355 // Generate a requirements file with pre-release versions for the docker task 356 // if testing with pre-release dependencies. 357 dependsOn ":sdks:python:container:py${pyversion}:generatePythonRequirements" 358 mustRunAfter ":sdks:python:container:py${pyversion}:generatePythonRequirements" 359 } 360 dependsOn 'initializeForDataflowJob' 361 dependsOn ":sdks:python:container:py${pyversion}:docker" 362 def runScriptsPath = "${rootDir}/sdks/python/container/run_validatescontainer.sh" 363 doLast { 364 exec { 365 executable 'sh' 366 args '-c', ". ${envdir}/bin/activate && cd ${rootDir} && ${runScriptsPath} " + 367 "${project.ext.pythonVersion} " + 368 "${project.ext.sdkLocation}" 369 } 370 } 371 } 372 373 def tensorRTTests = tasks.create("tensorRTtests") { 374 dependsOn 'installGcpTest' 375 dependsOn ':sdks:python:sdist' 376 doLast { 377 def testOpts = basicPytestOpts 378 def argMap = [ 379 "runner": "DataflowRunner", 380 "machine_type":"n1-standard-4", 381 // TODO(https://github.com/apache/beam/issues/22651): Build docker image for tensor RT tests during Run time. 382 // This would also enable to use wheel "--sdk_location" as other tasks, and eliminate distTarBall dependency 383 // declaration for this project. 384 "sdk_container_image": "us.gcr.io/apache-beam-testing/python-postcommit-it/tensor_rt:latest", 385 "sdk_location": files(configurations.distTarBall.files).singleFile, 386 "project": "apache-beam-testing", 387 "region": "us-central1", 388 "input": "gs://apache-beam-ml/testing/inputs/tensorrt_image_file_names.txt", 389 "output": "gs://apache-beam-ml/outputs/tensorrt_predictions.txt", 390 "engine_path": "gs://apache-beam-ml/models/ssd_mobilenet_v2_320x320_coco17_tpu-8.trt", 391 "disk_size_gb": 75 392 ] 393 def cmdArgs = mapToArgString(argMap) 394 exec { 395 executable 'sh' 396 args '-c', ". ${envdir}/bin/activate && pip install pillow && python -m apache_beam.examples.inference.tensorrt_object_detection $cmdArgs --experiment='worker_accelerator=type:nvidia-tesla-t4;count:1;install-nvidia-driver' --experiment=no_use_multiple_sdk_containers" 397 } 398 } 399 } 400 401 task installTFTRequirements { 402 dependsOn 'initializeForDataflowJob' 403 doLast { 404 exec { 405 workingDir "$rootProject.projectDir/sdks/python/apache_beam/testing/benchmarks/cloudml/" 406 executable 'sh' 407 args '-c', ". ${envdir}/bin/activate && pip install -r requirements.txt" 408 } 409 } 410 } 411 412 // Tensorflow transform integration and benchmarking tests on Apache Beam. 413 task tftTests { 414 dependsOn "installTFTRequirements" 415 416 doLast { 417 def opts = project.findProperty('opts') 418 opts += " --sdk_location=${project.ext.sdkLocation}" 419 def testOpts = basicPytestOpts + ["--numprocesses=8", "--dist=loadfile"] 420 def argMap = [ 421 "test_opts": testOpts, 422 "suite": "TFTransformTests-df${pythonVersionSuffix}", 423 "collect": "uses_tft", 424 "requirements_file": "apache_beam/testing/benchmarks/cloudml/requirements.txt", 425 "pipeline_opts": opts, 426 ] 427 def cmdArgs = mapToArgString(argMap) 428 exec { 429 executable 'sh' 430 args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs " 431 } 432 } 433 } 434 435 // add all RunInference E2E tests that run on DataflowRunner 436 // As of now, this test suite is enable in py38 suite as the base NVIDIA image used for Tensor RT 437 // contains Python 3.8. 438 // TODO: https://github.com/apache/beam/issues/22651 439 project.tasks.register("inferencePostCommitIT") { 440 dependsOn = [ 441 'tensorRTtests', 442 ] 443 } 444 445 446 // Create cross-language tasks for running tests against Java expansion service(s) 447 def dataflowProject = project.findProperty('dataflowProject') ?: 'apache-beam-testing' 448 def dataflowRegion = project.findProperty('dataflowRegion') ?: 'us-central1' 449 450 project(":sdks:python:test-suites:xlang").ext.xlangTasks.each { taskMetadata -> 451 createCrossLanguageUsingJavaExpansionTask( 452 name: taskMetadata.name, 453 expansionProjectPath: taskMetadata.expansionProjectPath, 454 collectMarker: taskMetadata.collectMarker, 455 startJobServer: taskMetadata.startJobServer, 456 cleanupJobServer: taskMetadata.cleanupJobServer, 457 pythonPipelineOptions: [ 458 "--runner=TestDataflowRunner", 459 "--project=${dataflowProject}", 460 "--region=${dataflowRegion}", 461 "--sdk_harness_container_image_overrides=.*java.*,gcr.io/apache-beam-testing/beam-sdk/beam_java8_sdk:latest" 462 ], 463 pytestOptions: basicPytestOpts 464 ) 465 }