github.com/apache/beam/sdks/v2@v2.48.2/python/scripts/run_integration_test.sh (about) 1 #!/bin/bash 2 # 3 # Licensed to the Apache Software Foundation (ASF) under one or more 4 # contributor license agreements. See the NOTICE file distributed with 5 # this work for additional information regarding copyright ownership. 6 # The ASF licenses this file to You under the Apache License, Version 2.0 7 # (the "License"); you may not use this file except in compliance with 8 # the License. You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 # 18 19 ########################################################################### 20 # 21 # This script is used in Gradle to run single or a set of Python integration tests 22 # locally or on Jenkins. Note, this script doesn't setup python environment which is 23 # required for integration test. In order to do so, run Gradle tasks defined in 24 # :sdks:python:test-suites instead. 25 # 26 # In order to run test with customer options, use following commandline flags: 27 # 28 # Pipeline related flags: 29 # runner -> Runner that execute pipeline job. 30 # e.g. TestDataflowRunner, TestDirectRunner 31 # project -> Project name of the cloud service. 32 # region -> Compute Engine region to create the Dataflow job. 33 # gcs_location -> Base location on GCS. Some pipeline options are 34 # derived from it including output, staging_location 35 # and temp_location. 36 # sdk_location -> Python tar ball location. Glob is accepted. 37 # num_workers -> Number of workers. 38 # sleep_secs -> Number of seconds to wait before verification. 39 # streaming -> True if a streaming job. 40 # kms_key_name -> Name of Cloud KMS encryption key to use in some tests. 41 # pipeline_opts -> List of space separated pipeline options. If this 42 # flag is specified, all above flag will be ignored. 43 # Please include all required pipeline options when 44 # using this flag. 45 # 46 # Test related flags: 47 # test_opts -> List of space separated options to configure Pytest test 48 # during execution. Commonly used options like `--capture=no` 49 # `--collect-only`. More can be found in 50 # https://docs.pytest.org/en/latest/reference.html#command-line-flags 51 # suite -> Namespace for this run of tests. Required if running 52 # under Jenkins. Used to differentiate runs of the same 53 # tests with different interpreters/dependencies/etc. 54 # 55 # Example usages: 56 # - Run full set of PostCommit tests with default pipeline options: 57 # `$ ./run_integration_test.sh` 58 # 59 # - Run single integration test with default pipeline options: 60 # `$ ./run_integration_test.sh --test_opts apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it` 61 # 62 # - Run full set of PostCommit tests with customized pipeline options: 63 # `$ ./run_integration_test.sh --project my-project --gcs_location gs://my-location` 64 65 ########################################################################### 66 # Get pipeline options specified from commandline arguments. 67 68 # Default pipeline options 69 PROJECT=apache-beam-testing 70 RUNNER=TestDataflowRunner 71 REGION=us-central1 72 GCS_LOCATION=gs://temp-storage-for-end-to-end-tests 73 SDK_LOCATION=build/apache-beam.tar.gz 74 NUM_WORKERS=1 75 SLEEP_SECS=20 76 STREAMING=false 77 KMS_KEY_NAME="projects/apache-beam-testing/locations/global/keyRings/beam-it/cryptoKeys/test" 78 SUITE="" 79 COLLECT_MARKERS= 80 REQUIREMENTS_FILE="" 81 82 # Default test (pytest) options. 83 # Run WordCountIT.test_wordcount_it by default if no test options are 84 # provided. 85 TEST_OPTS="apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it" 86 87 while [[ $# -gt 0 ]] 88 do 89 key="$1" 90 case $key in 91 --runner) 92 RUNNER="$2" 93 shift # past argument 94 shift # past value 95 ;; 96 --project) 97 PROJECT="$2" 98 shift # past argument 99 shift # past value 100 ;; 101 --region) 102 REGION="$2" 103 shift # past argument 104 shift # past value 105 ;; 106 --gcs_location) 107 GCS_LOCATION="$2" 108 shift # past argument 109 shift # past value 110 ;; 111 --sdk_location) 112 SDK_LOCATION="$2" 113 shift # past argument 114 shift # past value 115 ;; 116 --requirements_file) 117 REQUIREMENTS_FILE="$2" 118 shift # past argument 119 shift # past value 120 ;; 121 --num_workers) 122 NUM_WORKERS="$2" 123 shift # past argument 124 shift # past value 125 ;; 126 --sleep_secs) 127 SLEEP_SECS="$2" 128 shift # past argument 129 shift # past value 130 ;; 131 --streaming) 132 STREAMING="$2" 133 shift # past argument 134 shift # past value 135 ;; 136 --runner_v2) 137 RUNNER_V2="$2" 138 shift # past argument 139 shift # past value 140 ;; 141 --disable_runner_v2) 142 DISABLE_RUNNER_V2="$2" 143 shift # past argument 144 shift # past value 145 ;; 146 --kms_key_name) 147 KMS_KEY_NAME="$2" 148 shift # past argument 149 shift # past value 150 ;; 151 --dataflow_endpoint) 152 DATAFLOW_ENDPOINT="$2" 153 shift # past argument 154 shift # past value 155 ;; 156 --pipeline_opts) 157 PIPELINE_OPTS="$2" 158 shift # past argument 159 shift # past value 160 ;; 161 --test_opts) 162 TEST_OPTS="$2" 163 shift # past argument 164 shift # past value 165 ;; 166 --suite) 167 SUITE="$2" 168 shift # past argument 169 shift # past value 170 ;; 171 --collect) 172 COLLECT_MARKERS="-m=$2" 173 shift # past argument 174 shift # past value 175 ;; 176 *) # unknown option 177 echo "Unknown option: $1" 178 exit 1 179 ;; 180 esac 181 done 182 183 if [[ "$JENKINS_HOME" != "" && "$SUITE" == "" ]]; then 184 echo "Argument --suite is required in a Jenkins environment." 185 exit 1 186 fi 187 188 set -o errexit 189 190 191 ########################################################################### 192 193 # Check that the script is running in a known directory. 194 if [[ $PWD != *sdks/python* ]]; then 195 echo 'Unable to locate Apache Beam Python SDK root directory' 196 exit 1 197 fi 198 199 # Go to the Apache Beam Python SDK root 200 if [[ $PWD != *sdks/python ]]; then 201 cd $(pwd | sed 's/sdks\/python.*/sdks\/python/') 202 fi 203 204 205 ########################################################################### 206 # Build pipeline options if not provided in --pipeline_opts from commandline 207 208 if [[ -z $PIPELINE_OPTS ]]; then 209 # Get tar ball path 210 if [[ $(find ${SDK_LOCATION} 2> /dev/null) ]]; then 211 SDK_LOCATION=$(find ${SDK_LOCATION} | tail -n1) 212 else 213 echo "[WARNING] Could not find SDK tarball in SDK_LOCATION: $SDK_LOCATION." 214 fi 215 216 # Install test dependencies for ValidatesRunner tests. 217 # pyhamcrest==1.10.0 doesn't work on Py2. 218 # See: https://github.com/hamcrest/PyHamcrest/issues/131. 219 if [[ -z $REQUIREMENTS_FILE ]]; then 220 echo "pyhamcrest!=1.10.0,<2.0.0" > postcommit_requirements.txt 221 echo "mock<3.0.0" >> postcommit_requirements.txt 222 echo "parameterized>=0.7.1,<0.8.0" >> postcommit_requirements.txt 223 else 224 cp $REQUIREMENTS_FILE postcommit_requirements.txt 225 fi 226 227 # Options used to run testing pipeline on Cloud Dataflow Service. Also used for 228 # running on DirectRunner (some options ignored). 229 opts=( 230 "--runner=$RUNNER" 231 "--project=$PROJECT" 232 "--region=$REGION" 233 "--staging_location=$GCS_LOCATION/staging-it" 234 "--temp_location=$GCS_LOCATION/temp-it" 235 "--output=$GCS_LOCATION/py-it-cloud/output" 236 "--sdk_location=$SDK_LOCATION" 237 "--requirements_file=postcommit_requirements.txt" 238 "--num_workers=$NUM_WORKERS" 239 "--sleep_secs=$SLEEP_SECS" 240 ) 241 242 # Add --streaming if provided 243 if [[ "$STREAMING" = true ]]; then 244 opts+=("--streaming") 245 fi 246 247 # Add --runner_v2 if provided 248 if [[ "$RUNNER_V2" = true ]]; then 249 opts+=("--experiments=use_runner_v2") 250 if [[ "$STREAMING" = true ]]; then 251 # Dataflow Runner V2 only supports streaming engine. 252 opts+=("--enable_streaming_engine") 253 else 254 opts+=("--experiments=beam_fn_api") 255 fi 256 257 fi 258 259 # Add --disable_runner_v2 if provided 260 if [[ "$DISABLE_RUNNER_V2" = true ]]; then 261 opts+=("--experiments=disable_runner_v2") 262 fi 263 264 if [[ ! -z "$KMS_KEY_NAME" ]]; then 265 opts+=( 266 "--kms_key_name=$KMS_KEY_NAME" 267 "--dataflow_kms_key=$KMS_KEY_NAME" 268 ) 269 fi 270 271 if [[ ! -z "$DATAFLOW_ENDPOINT" ]]; then 272 opts+=("--dataflow_endpoint=$DATAFLOW_ENDPOINT") 273 fi 274 275 PIPELINE_OPTS=$(IFS=" " ; echo "${opts[*]}") 276 277 fi 278 279 # Handle double quotes in PIPELINE_OPTS 280 # add a backslash before `"` to keep it in command line options 281 PIPELINE_OPTS=${PIPELINE_OPTS//\"/\\\"} 282 283 ########################################################################### 284 # Run tests and validate that jobs finish successfully. 285 286 echo ">>> RUNNING integration tests with pipeline options: $PIPELINE_OPTS" 287 echo ">>> pytest options: $TEST_OPTS" 288 echo ">>> collect markers: $COLLECT_MARKERS" 289 ARGS="-o junit_suite_name=$SUITE -o log_cli=true -o log_level=INFO --junitxml=pytest_$SUITE.xml $TEST_OPTS" 290 # Handle markers as an independent argument from $TEST_OPTS to prevent errors in space separated flags 291 if [ -z "$COLLECT_MARKERS" ]; then 292 pytest $ARGS --test-pipeline-options="$PIPELINE_OPTS" 293 else 294 pytest $ARGS --test-pipeline-options="$PIPELINE_OPTS" "$COLLECT_MARKERS" 295 fi