github.com/apache/beam/sdks/v2@v2.48.2/go/test/run_validatesrunner_tests.sh (about) 1 #!/bin/bash 2 # 3 # Licensed to the Apache Software Foundation (ASF) under one or more 4 # contributor license agreements. See the NOTICE file distributed with 5 # this work for additional information regarding copyright ownership. 6 # The ASF licenses this file to You under the Apache License, Version 2.0 7 # (the "License"); you may not use this file except in compliance with 8 # the License. You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 18 # This script executes ValidatesRunner tests including launching any additional 19 # services needed, such as job services or expansion services. 20 # 21 # The following runners are supported, and selected via a flag: 22 # --runner {portable|direct|flink} (default: portable) 23 # Select which runner to execute tests on. This flag also determines which 24 # services to start up and which tests may be skipped. 25 # direct - Go SDK Direct Runner 26 # portable - (default) Python Portable Runner (aka. Reference Runner or FnAPI Runner) 27 # flink - Java Flink Runner (local mode) 28 # spark - Java Spark Runner (local mode) 29 # dataflow - Dataflow Runner 30 # 31 # General flags: 32 # --tests -> A space-seperated list of targets for "go test", written with 33 # beam/sdks/go as the working directory. Defaults to all packages in the 34 # integration and regression directories. 35 # --timeout -> Timeout for the go test command, on a per-package level. 36 # --simultaneous -> Number of simultaneous packages to test. 37 # Controls the -p flag for the go test command. 38 # Not used for Flink, Spark, or Samza runners. Defaults to 3 otherwise. 39 # --endpoint -> An endpoint for an existing job server outside the script. 40 # If present, job server jar flags are ignored. 41 # --test_expansion_jar -> Filepath to jar for an expansion service, for 42 # runners that support cross-language. The test expansion service is one 43 # that can expand test-only cross-language transforms. 44 # --test_expansion_addr -> An endpoint for an existing test expansion service 45 # outside the script. If present, --test_expansion_jar is ignored. 46 # --io_expansion_jar -> Filepath to jar for an expansion service, for 47 # runners that support cross-language. The IO expansion service is one 48 # that can expand cross-language transforms for Beam IOs. 49 # --io_expansion_addr -> An endpoint for an existing expansion service 50 # outside the script. If present, --io_expansion_jar is ignored. 51 # --sdk_overrides -> Only needed if performing cross-lanaguage tests with 52 # a staged SDK harness container. Note for Dataflow: Using this flag 53 # prevents the script from creating and staging a container. 54 # --pipeline_opts -> Appends additional pipeline options to the test command, 55 # in addition to those already added by this script. 56 # 57 # Runner-specific flags: 58 # Flink 59 # --flink_job_server_jar -> Filepath to jar, used if runner is Flink. 60 # Spark 61 # --spark_job_server_jar -> Filepath to jar, used if runner is Spark. 62 # Dataflow 63 # --dataflow_project -> GCP project to run Dataflow jobs on. 64 # --project -> Same project as --dataflow-project, but in URL format, for 65 # example in the format "us.gcr.io/<project>". 66 # --region -> GCP region to run Dataflow jobs on. 67 # --gcs_location -> GCS URL for storing temporary files for Dataflow jobs. 68 69 set -e 70 trap '! [[ "$BASH_COMMAND" =~ ^(echo|read|if|ARGS|shift|SOCKET_SCRIPT|\[\[) ]] && \ 71 cmd=`eval echo "$BASH_COMMAND" 2>/dev/null` && echo "\$ $cmd"' DEBUG 72 73 # Resolve current directory 74 CURRENT_DIRECTORY=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) 75 76 # Default test targets. 77 TESTS="./test/integration/... ./test/regression" 78 79 # Default runner. 80 RUNNER=portable 81 82 # Default timeout. This timeout is applied per-package, as tests in different 83 # packages are executed in parallel. 84 TIMEOUT=3h 85 86 # Default limit on simultaneous test binaries/packages being executed. 87 SIMULTANEOUS=3 88 89 # Where to store integration test outputs. 90 GCS_LOCATION=gs://temp-storage-for-end-to-end-tests 91 GCS_SUBFOLDER="test$RANDOM" 92 93 # Project for the container and integration test 94 PROJECT=apache-beam-testing 95 DATAFLOW_PROJECT=apache-beam-testing 96 REGION=us-central1 97 98 # Set up trap to close any running background processes when script ends. 99 exit_background_processes () { 100 if [[ ! -z "$JOBSERVER_PID" ]]; then 101 kill -9 $JOBSERVER_PID || true 102 fi 103 if [[ ! -z "$TEST_EXPANSION_PID" ]]; then 104 kill -9 $TEST_EXPANSION_PID 105 fi 106 if [[ ! -z "$IO_EXPANSION_PID" ]]; then 107 kill -9 $IO_EXPANSION_PID 108 fi 109 if [[ ! -z "$SCHEMAIO_EXPANSION_PID" ]]; then 110 kill -9 $SCHEMAIO_EXPANSION_PID 111 fi 112 if [[ ! -z "$DEBEZIUMIO_EXPANSION_PID" ]]; then 113 kill -9 $DEBEZIUMIO_EXPANSION_PID 114 fi 115 } 116 trap exit_background_processes SIGINT SIGTERM EXIT 117 118 while [[ $# -gt 0 ]] 119 do 120 key="$1" 121 case $key in 122 --tests) 123 TESTS="$2" 124 shift # past argument 125 shift # past value 126 ;; 127 --runner) 128 RUNNER="$2" 129 shift # past argument 130 shift # past value 131 ;; 132 --timeout) 133 TIMEOUT="$2" 134 shift # past argument 135 shift # past value 136 ;; 137 --simultaneous) 138 SIMULTANEOUS="$2" 139 shift # past argument 140 shift # past value 141 ;; 142 --project) 143 PROJECT="$2" 144 shift # past argument 145 shift # past value 146 ;; 147 --region) 148 REGION="$2" 149 shift # past argument 150 shift # past value 151 ;; 152 --dataflow_project) 153 DATAFLOW_PROJECT="$2" 154 shift # past argument 155 shift # past value 156 ;; 157 --gcs_location) 158 GCS_LOCATION="$2" 159 shift # past argument 160 shift # past value 161 ;; 162 --flink_job_server_jar) 163 FLINK_JOB_SERVER_JAR="$2" 164 shift # past argument 165 shift # past value 166 ;; 167 --samza_job_server_jar) 168 SAMZA_JOB_SERVER_JAR="$2" 169 shift # past argument 170 shift # past value 171 ;; 172 --spark_job_server_jar) 173 SPARK_JOB_SERVER_JAR="$2" 174 shift # past argument 175 shift # past value 176 ;; 177 --endpoint) 178 ENDPOINT="$2" 179 shift # past argument 180 shift # past value 181 ;; 182 --test_expansion_jar) 183 TEST_EXPANSION_JAR="$2" 184 shift # past argument 185 shift # past value 186 ;; 187 --test_expansion_addr) 188 TEST_EXPANSION_ADDR="$2" 189 shift # past argument 190 shift # past value 191 ;; 192 --io_expansion_jar) 193 IO_EXPANSION_JAR="$2" 194 shift # past argument 195 shift # past value 196 ;; 197 --io_expansion_addr) 198 IO_EXPANSION_ADDR="$2" 199 shift # past argument 200 shift # past value 201 ;; 202 --schemaio_expansion_jar) 203 SCHEMAIO_EXPANSION_JAR="$2" 204 shift # past argument 205 shift # past value 206 ;; 207 --schemaio_expansion_addr) 208 SCHEMAIO_EXPANSION_ADDR="$2" 209 shift # past argument 210 shift # past value 211 ;; 212 --debeziumio_expansion_jar) 213 DEBEZIUMIO_EXPANSION_JAR="$2" 214 shift # past argument 215 shift # past value 216 ;; 217 --debeziumio_expansion_addr) 218 DEBEZIUMIO_EXPANSION_ADDR="$2" 219 shift # past argument 220 shift # past value 221 ;; 222 --sdk_overrides) 223 SDK_OVERRIDES="$2" 224 shift # past argument 225 shift # past value 226 ;; 227 --pipeline_opts) 228 PIPELINE_OPTS="$2" 229 shift # past argument 230 shift # past value 231 ;; 232 --java11_home) 233 JAVA11_HOME="$2" 234 shift # past argument 235 shift # past value 236 ;; 237 *) # unknown option 238 echo "Unknown option: $1" 239 exit 1 240 ;; 241 esac 242 done 243 244 # Go to the root of the repository 245 cd $(git rev-parse --show-toplevel) 246 247 # Verify in the root of the repository 248 test -d sdks/go/test 249 250 # Hacky python script to find a free port. Note there is a small chance the chosen port could 251 # get taken before being claimed by the job server. 252 SOCKET_SCRIPT=" 253 import socket 254 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 255 s.bind(('localhost', 0)) 256 print(s.getsockname()[1]) 257 s.close() 258 " 259 260 # Set up environment based on runner. 261 if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" || "$RUNNER" == "portable" ]]; then 262 if [[ -z "$ENDPOINT" ]]; then 263 JOB_PORT=$(python3 -c "$SOCKET_SCRIPT") 264 ENDPOINT="localhost:$JOB_PORT" 265 echo "No endpoint specified; starting a new $RUNNER job server on $ENDPOINT" 266 if [[ "$RUNNER" == "flink" ]]; then 267 java \ 268 -jar $FLINK_JOB_SERVER_JAR \ 269 --flink-master [local] \ 270 --flink-conf-dir $CURRENT_DIRECTORY/../../../runners/flink/src/test/resources \ 271 --job-port $JOB_PORT \ 272 --expansion-port 0 \ 273 --artifact-port 0 & 274 elif [[ "$RUNNER" == "samza" ]]; then 275 java \ 276 -jar $SAMZA_JOB_SERVER_JAR \ 277 --job-port $JOB_PORT \ 278 --expansion-port 0 \ 279 --artifact-port 0 & 280 elif [[ "$RUNNER" == "spark" ]]; then 281 java \ 282 -jar $SPARK_JOB_SERVER_JAR \ 283 --spark-master-url local \ 284 --job-port $JOB_PORT \ 285 --expansion-port 0 \ 286 --artifact-port 0 & 287 elif [[ "$RUNNER" == "portable" ]]; then 288 python3 \ 289 -m apache_beam.runners.portability.local_job_service_main \ 290 --port $JOB_PORT & 291 else 292 echo "Unknown runner: $RUNNER" 293 exit 1; 294 fi 295 JOBSERVER_PID=$! 296 fi 297 fi 298 299 if [[ "$RUNNER" != "direct" ]]; then 300 if [[ -z "$TEST_EXPANSION_ADDR" && -n "$TEST_EXPANSION_JAR" ]]; then 301 EXPANSION_PORT=$(python3 -c "$SOCKET_SCRIPT") 302 TEST_EXPANSION_ADDR="localhost:$EXPANSION_PORT" 303 echo "No test expansion address specified; starting a new test expansion server on $TEST_EXPANSION_ADDR" 304 java -jar $TEST_EXPANSION_JAR $EXPANSION_PORT & 305 TEST_EXPANSION_PID=$! 306 fi 307 if [[ -z "$IO_EXPANSION_ADDR" && -n "$IO_EXPANSION_JAR" ]]; then 308 EXPANSION_PORT=$(python3 -c "$SOCKET_SCRIPT") 309 IO_EXPANSION_ADDR="localhost:$EXPANSION_PORT" 310 echo "No IO expansion address specified; starting a new IO expansion server on $IO_EXPANSION_ADDR" 311 java -jar $IO_EXPANSION_JAR $EXPANSION_PORT & 312 IO_EXPANSION_PID=$! 313 fi 314 if [[ -z "$SCHEMAIO_EXPANSION_ADDR" && -n "$SCHEMAIO_EXPANSION_JAR" ]]; then 315 EXPANSION_PORT=$(python3 -c "$SOCKET_SCRIPT") 316 SCHEMAIO_EXPANSION_ADDR="localhost:$EXPANSION_PORT" 317 echo "No SchemaIO expansion address specified; starting a new SchemaIO expansion server on $SCHEMAIO_EXPANSION_ADDR" 318 java -jar $SCHEMAIO_EXPANSION_JAR $EXPANSION_PORT & 319 SCHEMAIO_EXPANSION_PID=$! 320 fi 321 if [[ -z "$DEBEZIUMIO_EXPANSION_ADDR" && -n "$DEBEZIUMIO_EXPANSION_JAR" ]]; then 322 EXPANSION_PORT=$(python3 -c "$SOCKET_SCRIPT") 323 DEBEZIUMIO_EXPANSION_ADDR="localhost:$EXPANSION_PORT" 324 echo "No DebeziumIO expansion address specified; starting a new DebeziumIO expansion server on $DEBEZIUMIO_EXPANSION_ADDR" 325 java -jar $DEBEZIUMIO_EXPANSION_JAR $EXPANSION_PORT & 326 DEBEZIUMIO_EXPANSION_PID=$! 327 fi 328 fi 329 330 # Disable parallelism on runners that don't support it. 331 if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" ]]; then 332 SIMULTANEOUS=1 333 fi 334 335 if [[ "$RUNNER" == "dataflow" ]]; then 336 # Verify docker and gcloud commands exist 337 command -v docker 338 docker -v 339 command -v gcloud 340 gcloud --version 341 342 # ensure gcloud is version 186 or above 343 TMPDIR=$(mktemp -d) 344 gcloud_ver=$(gcloud -v | head -1 | awk '{print $4}') 345 if [[ "$gcloud_ver" < "186" ]] 346 then 347 pushd $TMPDIR 348 curl https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-186.0.0-linux-x86_64.tar.gz --output gcloud.tar.gz 349 tar xf gcloud.tar.gz 350 ./google-cloud-sdk/install.sh --quiet 351 . ./google-cloud-sdk/path.bash.inc 352 popd 353 gcloud components update --quiet || echo 'gcloud components update failed' 354 gcloud -v 355 fi 356 357 # Build the container 358 TAG=$(date +%Y%m%d-%H%M%S) 359 CONTAINER=us.gcr.io/$PROJECT/$USER/beam_go_sdk 360 echo "Using container $CONTAINER" 361 ./gradlew :sdks:go:container:docker -Pdocker-repository-root=us.gcr.io/$PROJECT/$USER -Pdocker-tag=$TAG 362 363 # Verify it exists 364 docker images | grep $TAG 365 366 # Push the container 367 gcloud docker -- push $CONTAINER:$TAG 368 369 if [[ -n "$TEST_EXPANSION_ADDR" || -n "$IO_EXPANSION_ADDR" || -n "$SCHEMAIO_EXPANSION_ADDR" || -n "$DEBEZIUMIO_EXPANSION_ADDR" ]]; then 370 ARGS="$ARGS --experiments=use_portable_job_submission" 371 372 if [[ -z "$SDK_OVERRIDES" ]]; then 373 # Build the java container for cross-language 374 JAVA_TAG=$(date +%Y%m%d-%H%M%S) 375 JAVA_CONTAINER=us.gcr.io/$PROJECT/$USER/beam_java11_sdk 376 echo "Using container $JAVA_CONTAINER for cross-language java transforms" 377 ./gradlew :sdks:java:container:java11:docker -Pdocker-repository-root=us.gcr.io/$PROJECT/$USER -Pdocker-tag=$JAVA_TAG -Pjava11Home=$JAVA11_HOME 378 379 # Verify it exists 380 docker images | grep $JAVA_TAG 381 382 # Push the container 383 gcloud docker -- push $JAVA_CONTAINER:$JAVA_TAG 384 385 SDK_OVERRIDES=".*java.*,$JAVA_CONTAINER:$JAVA_TAG" 386 fi 387 fi 388 else 389 TAG=dev 390 ./gradlew :sdks:go:container:docker -Pdocker-tag=$TAG 391 CONTAINER=apache/beam_go_sdk 392 fi 393 394 # The go test flag -p dictates the number of simultaneous test binaries running tests. 395 # Note that --parallel indicates within a test binary level of parallism. 396 ARGS="$ARGS -p $SIMULTANEOUS" 397 398 # Assemble test arguments and pipeline options. 399 ARGS="$ARGS -timeout $TIMEOUT" 400 ARGS="$ARGS --runner=$RUNNER" 401 ARGS="$ARGS --project=$DATAFLOW_PROJECT" 402 ARGS="$ARGS --region=$REGION" 403 ARGS="$ARGS --environment_type=DOCKER" 404 ARGS="$ARGS --environment_config=$CONTAINER:$TAG" 405 ARGS="$ARGS --staging_location=$GCS_LOCATION/staging-validatesrunner-test/$GCS_SUBFOLDER" 406 ARGS="$ARGS --temp_location=$GCS_LOCATION/temp-validatesrunner-test/$GCS_SUBFOLDER" 407 ARGS="$ARGS --endpoint=$ENDPOINT" 408 if [[ -n "$TEST_EXPANSION_ADDR" ]]; then 409 ARGS="$ARGS --test_expansion_addr=$TEST_EXPANSION_ADDR" 410 fi 411 if [[ -n "$IO_EXPANSION_ADDR" ]]; then 412 ARGS="$ARGS --io_expansion_addr=$IO_EXPANSION_ADDR" 413 fi 414 if [[ -n "$SCHEMAIO_EXPANSION_ADDR" ]]; then 415 ARGS="$ARGS --schemaio_expansion_addr=$SCHEMAIO_EXPANSION_ADDR" 416 fi 417 if [[ -n "$DEBEZIUMIO_EXPANSION_ADDR" ]]; then 418 ARGS="$ARGS --debeziumio_expansion_addr=$DEBEZIUMIO_EXPANSION_ADDR" 419 fi 420 if [[ -n "$SDK_OVERRIDES" ]]; then 421 OVERRIDE=--sdk_harness_container_image_override="$SDK_OVERRIDES" 422 ARGS="$ARGS $OVERRIDE" 423 fi 424 ARGS="$ARGS $PIPELINE_OPTS" 425 426 cd sdks/go 427 echo ">>> RUNNING $RUNNER integration tests with pipeline options: $ARGS" 428 ./run_with_go_version.sh test -v $TESTS $ARGS 1>&2 \ 429 || TEST_EXIT_CODE=$? # don't fail fast here; clean up environment before exiting 430 cd ../.. 431 432 if [[ "$RUNNER" == "dataflow" ]]; then 433 # Delete the container locally and remotely 434 docker rmi $CONTAINER:$TAG || echo "Failed to remove container" 435 gcloud --quiet container images delete $CONTAINER:$TAG || echo "Failed to delete container" 436 437 if [[ -n "$TEST_EXPANSION_ADDR" || -n "$IO_EXPANSION_ADDR" || -n "$SCHEMAIO_EXPANSION_ADDR" || -n "$DEBEZIUMIO_EXPANSION_ADDR" ]]; then 438 # Delete the java cross-language container locally and remotely 439 docker rmi $JAVA_CONTAINER:$JAVA_TAG || echo "Failed to remove container" 440 gcloud --quiet container images delete $JAVA_CONTAINER:$JAVA_TAG || echo "Failed to delete container" 441 fi 442 443 # Clean up tempdir 444 rm -rf $TMPDIR 445 fi 446 447 exit $TEST_EXIT_CODE