github.com/apache/beam/sdks/v2@v2.48.2/go/test/run_validatesrunner_tests.sh (about)

     1  #!/bin/bash
     2  #
     3  #    Licensed to the Apache Software Foundation (ASF) under one or more
     4  #    contributor license agreements.  See the NOTICE file distributed with
     5  #    this work for additional information regarding copyright ownership.
     6  #    The ASF licenses this file to You under the Apache License, Version 2.0
     7  #    (the "License"); you may not use this file except in compliance with
     8  #    the License.  You may obtain a copy of the License at
     9  #
    10  #       http://www.apache.org/licenses/LICENSE-2.0
    11  #
    12  #    Unless required by applicable law or agreed to in writing, software
    13  #    distributed under the License is distributed on an "AS IS" BASIS,
    14  #    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  #    See the License for the specific language governing permissions and
    16  #    limitations under the License.
    17  
    18  # This script executes ValidatesRunner tests including launching any additional
    19  # services needed, such as job services or expansion services.
    20  #
    21  # The following runners are supported, and selected via a flag:
    22  # --runner {portable|direct|flink} (default: portable)
    23  #  Select which runner to execute tests on. This flag also determines which
    24  #  services to start up and which tests may be skipped.
    25  #    direct   - Go SDK Direct Runner
    26  #    portable - (default) Python Portable Runner (aka. Reference Runner or FnAPI Runner)
    27  #    flink    - Java Flink Runner (local mode)
    28  #    spark    - Java Spark Runner (local mode)
    29  #    dataflow - Dataflow Runner
    30  #
    31  # General flags:
    32  #    --tests -> A space-seperated list of targets for "go test", written with
    33  #        beam/sdks/go as the working directory. Defaults to all packages in the
    34  #        integration and regression directories.
    35  #    --timeout -> Timeout for the go test command, on a per-package level.
    36  #    --simultaneous -> Number of simultaneous packages to test.
    37  #        Controls the -p flag for the go test command.
    38  #        Not used for Flink, Spark, or Samza runners.  Defaults to 3 otherwise.
    39  #    --endpoint -> An endpoint for an existing job server outside the script.
    40  #        If present, job server jar flags are ignored.
    41  #    --test_expansion_jar -> Filepath to jar for an expansion service, for
    42  #        runners that support cross-language. The test expansion service is one
    43  #        that can expand test-only cross-language transforms.
    44  #    --test_expansion_addr -> An endpoint for an existing test expansion service
    45  #        outside the script. If present, --test_expansion_jar is ignored.
    46  #    --io_expansion_jar -> Filepath to jar for an expansion service, for
    47  #        runners that support cross-language. The IO expansion service is one
    48  #        that can expand cross-language transforms for Beam IOs.
    49  #    --io_expansion_addr -> An endpoint for an existing expansion service
    50  #        outside the script. If present, --io_expansion_jar is ignored.
    51  #    --sdk_overrides -> Only needed if performing cross-lanaguage tests with
    52  #        a staged SDK harness container. Note for Dataflow: Using this flag
    53  #        prevents the script from creating and staging a container.
    54  #    --pipeline_opts -> Appends additional pipeline options to the test command,
    55  #        in addition to those already added by this script.
    56  #
    57  # Runner-specific flags:
    58  #  Flink
    59  #    --flink_job_server_jar -> Filepath to jar, used if runner is Flink.
    60  #  Spark
    61  #    --spark_job_server_jar -> Filepath to jar, used if runner is Spark.
    62  #  Dataflow
    63  #    --dataflow_project -> GCP project to run Dataflow jobs on.
    64  #    --project -> Same project as --dataflow-project, but in URL format, for
    65  #        example in the format "us.gcr.io/<project>".
    66  #    --region -> GCP region to run Dataflow jobs on.
    67  #    --gcs_location -> GCS URL for storing temporary files for Dataflow jobs.
    68  
    69  set -e
    70  trap '! [[ "$BASH_COMMAND" =~ ^(echo|read|if|ARGS|shift|SOCKET_SCRIPT|\[\[) ]] && \
    71  cmd=`eval echo "$BASH_COMMAND" 2>/dev/null` && echo "\$ $cmd"' DEBUG
    72  
    73  # Resolve current directory
    74  CURRENT_DIRECTORY=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
    75  
    76  # Default test targets.
    77  TESTS="./test/integration/... ./test/regression"
    78  
    79  # Default runner.
    80  RUNNER=portable
    81  
    82  # Default timeout. This timeout is applied per-package, as tests in different
    83  # packages are executed in parallel.
    84  TIMEOUT=3h
    85  
    86  # Default limit on simultaneous test binaries/packages being executed.
    87  SIMULTANEOUS=3
    88  
    89  # Where to store integration test outputs.
    90  GCS_LOCATION=gs://temp-storage-for-end-to-end-tests
    91  GCS_SUBFOLDER="test$RANDOM"
    92  
    93  # Project for the container and integration test
    94  PROJECT=apache-beam-testing
    95  DATAFLOW_PROJECT=apache-beam-testing
    96  REGION=us-central1
    97  
    98  # Set up trap to close any running background processes when script ends.
    99  exit_background_processes () {
   100    if [[ ! -z "$JOBSERVER_PID" ]]; then
   101      kill -9 $JOBSERVER_PID || true
   102    fi
   103    if [[ ! -z "$TEST_EXPANSION_PID" ]]; then
   104      kill -9 $TEST_EXPANSION_PID
   105    fi
   106    if [[ ! -z "$IO_EXPANSION_PID" ]]; then
   107      kill -9 $IO_EXPANSION_PID
   108    fi
   109    if [[ ! -z "$SCHEMAIO_EXPANSION_PID" ]]; then
   110      kill -9 $SCHEMAIO_EXPANSION_PID
   111    fi
   112    if [[ ! -z "$DEBEZIUMIO_EXPANSION_PID" ]]; then
   113      kill -9 $DEBEZIUMIO_EXPANSION_PID
   114    fi
   115  }
   116  trap exit_background_processes SIGINT SIGTERM EXIT
   117  
   118  while [[ $# -gt 0 ]]
   119  do
   120  key="$1"
   121  case $key in
   122      --tests)
   123          TESTS="$2"
   124          shift # past argument
   125          shift # past value
   126          ;;
   127      --runner)
   128          RUNNER="$2"
   129          shift # past argument
   130          shift # past value
   131          ;;
   132      --timeout)
   133          TIMEOUT="$2"
   134          shift # past argument
   135          shift # past value
   136          ;;
   137      --simultaneous)
   138          SIMULTANEOUS="$2"
   139          shift # past argument
   140          shift # past value
   141          ;;
   142      --project)
   143          PROJECT="$2"
   144          shift # past argument
   145          shift # past value
   146          ;;
   147      --region)
   148          REGION="$2"
   149          shift # past argument
   150          shift # past value
   151          ;;
   152      --dataflow_project)
   153          DATAFLOW_PROJECT="$2"
   154          shift # past argument
   155          shift # past value
   156          ;;
   157      --gcs_location)
   158          GCS_LOCATION="$2"
   159          shift # past argument
   160          shift # past value
   161          ;;
   162      --flink_job_server_jar)
   163          FLINK_JOB_SERVER_JAR="$2"
   164          shift # past argument
   165          shift # past value
   166          ;;
   167      --samza_job_server_jar)
   168          SAMZA_JOB_SERVER_JAR="$2"
   169          shift # past argument
   170          shift # past value
   171          ;;
   172      --spark_job_server_jar)
   173          SPARK_JOB_SERVER_JAR="$2"
   174          shift # past argument
   175          shift # past value
   176          ;;
   177      --endpoint)
   178          ENDPOINT="$2"
   179          shift # past argument
   180          shift # past value
   181          ;;
   182      --test_expansion_jar)
   183          TEST_EXPANSION_JAR="$2"
   184          shift # past argument
   185          shift # past value
   186          ;;
   187      --test_expansion_addr)
   188          TEST_EXPANSION_ADDR="$2"
   189          shift # past argument
   190          shift # past value
   191          ;;
   192      --io_expansion_jar)
   193          IO_EXPANSION_JAR="$2"
   194          shift # past argument
   195          shift # past value
   196          ;;
   197      --io_expansion_addr)
   198          IO_EXPANSION_ADDR="$2"
   199          shift # past argument
   200          shift # past value
   201          ;;
   202      --schemaio_expansion_jar)
   203          SCHEMAIO_EXPANSION_JAR="$2"
   204          shift # past argument
   205          shift # past value
   206          ;;
   207      --schemaio_expansion_addr)
   208          SCHEMAIO_EXPANSION_ADDR="$2"
   209          shift # past argument
   210          shift # past value
   211          ;;
   212      --debeziumio_expansion_jar)
   213          DEBEZIUMIO_EXPANSION_JAR="$2"
   214          shift # past argument
   215          shift # past value
   216          ;;
   217      --debeziumio_expansion_addr)
   218          DEBEZIUMIO_EXPANSION_ADDR="$2"
   219          shift # past argument
   220          shift # past value
   221          ;;
   222      --sdk_overrides)
   223          SDK_OVERRIDES="$2"
   224          shift # past argument
   225          shift # past value
   226          ;;
   227      --pipeline_opts)
   228          PIPELINE_OPTS="$2"
   229          shift # past argument
   230          shift # past value
   231          ;;
   232      --java11_home)
   233          JAVA11_HOME="$2"
   234          shift # past argument
   235          shift # past value
   236          ;;
   237      *)    # unknown option
   238          echo "Unknown option: $1"
   239          exit 1
   240          ;;
   241  esac
   242  done
   243  
   244  # Go to the root of the repository
   245  cd $(git rev-parse --show-toplevel)
   246  
   247  # Verify in the root of the repository
   248  test -d sdks/go/test
   249  
   250  # Hacky python script to find a free port. Note there is a small chance the chosen port could
   251  # get taken before being claimed by the job server.
   252  SOCKET_SCRIPT="
   253  import socket
   254  s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
   255  s.bind(('localhost', 0))
   256  print(s.getsockname()[1])
   257  s.close()
   258  "
   259  
   260  # Set up environment based on runner.
   261  if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" || "$RUNNER" == "portable" ]]; then
   262    if [[ -z "$ENDPOINT" ]]; then
   263      JOB_PORT=$(python3 -c "$SOCKET_SCRIPT")
   264      ENDPOINT="localhost:$JOB_PORT"
   265      echo "No endpoint specified; starting a new $RUNNER job server on $ENDPOINT"
   266      if [[ "$RUNNER" == "flink" ]]; then
   267        java \
   268            -jar $FLINK_JOB_SERVER_JAR \
   269            --flink-master [local] \
   270            --flink-conf-dir $CURRENT_DIRECTORY/../../../runners/flink/src/test/resources \
   271            --job-port $JOB_PORT \
   272            --expansion-port 0 \
   273            --artifact-port 0 &
   274      elif [[ "$RUNNER" == "samza" ]]; then
   275        java \
   276            -jar $SAMZA_JOB_SERVER_JAR \
   277            --job-port $JOB_PORT \
   278            --expansion-port 0 \
   279            --artifact-port 0 &
   280      elif [[ "$RUNNER" == "spark" ]]; then
   281        java \
   282            -jar $SPARK_JOB_SERVER_JAR \
   283            --spark-master-url local \
   284            --job-port $JOB_PORT \
   285            --expansion-port 0 \
   286            --artifact-port 0 &
   287      elif [[ "$RUNNER" == "portable" ]]; then
   288        python3 \
   289            -m apache_beam.runners.portability.local_job_service_main \
   290            --port $JOB_PORT &
   291      else
   292        echo "Unknown runner: $RUNNER"
   293        exit 1;
   294      fi
   295      JOBSERVER_PID=$!
   296    fi
   297  fi
   298  
   299  if [[ "$RUNNER" != "direct" ]]; then
   300    if [[ -z "$TEST_EXPANSION_ADDR" && -n "$TEST_EXPANSION_JAR" ]]; then
   301      EXPANSION_PORT=$(python3 -c "$SOCKET_SCRIPT")
   302      TEST_EXPANSION_ADDR="localhost:$EXPANSION_PORT"
   303      echo "No test expansion address specified; starting a new test expansion server on $TEST_EXPANSION_ADDR"
   304      java -jar $TEST_EXPANSION_JAR $EXPANSION_PORT &
   305      TEST_EXPANSION_PID=$!
   306    fi
   307    if [[ -z "$IO_EXPANSION_ADDR" && -n "$IO_EXPANSION_JAR" ]]; then
   308      EXPANSION_PORT=$(python3 -c "$SOCKET_SCRIPT")
   309      IO_EXPANSION_ADDR="localhost:$EXPANSION_PORT"
   310      echo "No IO expansion address specified; starting a new IO expansion server on $IO_EXPANSION_ADDR"
   311      java -jar $IO_EXPANSION_JAR $EXPANSION_PORT &
   312      IO_EXPANSION_PID=$!
   313    fi
   314    if [[ -z "$SCHEMAIO_EXPANSION_ADDR" && -n "$SCHEMAIO_EXPANSION_JAR" ]]; then
   315        EXPANSION_PORT=$(python3 -c "$SOCKET_SCRIPT")
   316        SCHEMAIO_EXPANSION_ADDR="localhost:$EXPANSION_PORT"
   317        echo "No SchemaIO expansion address specified; starting a new SchemaIO expansion server on $SCHEMAIO_EXPANSION_ADDR"
   318        java -jar $SCHEMAIO_EXPANSION_JAR $EXPANSION_PORT &
   319        SCHEMAIO_EXPANSION_PID=$!
   320    fi
   321    if [[ -z "$DEBEZIUMIO_EXPANSION_ADDR" && -n "$DEBEZIUMIO_EXPANSION_JAR" ]]; then
   322        EXPANSION_PORT=$(python3 -c "$SOCKET_SCRIPT")
   323        DEBEZIUMIO_EXPANSION_ADDR="localhost:$EXPANSION_PORT"
   324        echo "No DebeziumIO expansion address specified; starting a new DebeziumIO expansion server on $DEBEZIUMIO_EXPANSION_ADDR"
   325        java -jar $DEBEZIUMIO_EXPANSION_JAR $EXPANSION_PORT &
   326        DEBEZIUMIO_EXPANSION_PID=$!
   327    fi
   328  fi
   329  
   330  # Disable parallelism on runners that don't support it.
   331  if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" ]]; then
   332    SIMULTANEOUS=1
   333  fi
   334  
   335  if [[ "$RUNNER" == "dataflow" ]]; then
   336    # Verify docker and gcloud commands exist
   337    command -v docker
   338    docker -v
   339    command -v gcloud
   340    gcloud --version
   341  
   342    # ensure gcloud is version 186 or above
   343    TMPDIR=$(mktemp -d)
   344    gcloud_ver=$(gcloud -v | head -1 | awk '{print $4}')
   345    if [[ "$gcloud_ver" < "186" ]]
   346    then
   347      pushd $TMPDIR
   348      curl https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-186.0.0-linux-x86_64.tar.gz --output gcloud.tar.gz
   349      tar xf gcloud.tar.gz
   350      ./google-cloud-sdk/install.sh --quiet
   351      . ./google-cloud-sdk/path.bash.inc
   352      popd
   353      gcloud components update --quiet || echo 'gcloud components update failed'
   354      gcloud -v
   355    fi
   356  
   357    # Build the container
   358    TAG=$(date +%Y%m%d-%H%M%S)
   359    CONTAINER=us.gcr.io/$PROJECT/$USER/beam_go_sdk
   360    echo "Using container $CONTAINER"
   361    ./gradlew :sdks:go:container:docker -Pdocker-repository-root=us.gcr.io/$PROJECT/$USER -Pdocker-tag=$TAG
   362  
   363    # Verify it exists
   364    docker images | grep $TAG
   365  
   366    # Push the container
   367    gcloud docker -- push $CONTAINER:$TAG
   368  
   369    if [[ -n "$TEST_EXPANSION_ADDR" || -n "$IO_EXPANSION_ADDR" || -n "$SCHEMAIO_EXPANSION_ADDR" || -n "$DEBEZIUMIO_EXPANSION_ADDR" ]]; then
   370      ARGS="$ARGS --experiments=use_portable_job_submission"
   371  
   372      if [[ -z "$SDK_OVERRIDES" ]]; then
   373        # Build the java container for cross-language
   374        JAVA_TAG=$(date +%Y%m%d-%H%M%S)
   375        JAVA_CONTAINER=us.gcr.io/$PROJECT/$USER/beam_java11_sdk
   376        echo "Using container $JAVA_CONTAINER for cross-language java transforms"
   377        ./gradlew :sdks:java:container:java11:docker -Pdocker-repository-root=us.gcr.io/$PROJECT/$USER -Pdocker-tag=$JAVA_TAG -Pjava11Home=$JAVA11_HOME
   378  
   379        # Verify it exists
   380        docker images | grep $JAVA_TAG
   381  
   382        # Push the container
   383        gcloud docker -- push $JAVA_CONTAINER:$JAVA_TAG
   384  
   385        SDK_OVERRIDES=".*java.*,$JAVA_CONTAINER:$JAVA_TAG"
   386      fi
   387    fi
   388  else
   389    TAG=dev
   390    ./gradlew :sdks:go:container:docker -Pdocker-tag=$TAG
   391    CONTAINER=apache/beam_go_sdk
   392  fi
   393  
   394  # The go test flag -p dictates the number of simultaneous test binaries running tests.
   395  # Note that --parallel indicates within a test binary level of parallism.
   396  ARGS="$ARGS -p $SIMULTANEOUS"
   397  
   398  # Assemble test arguments and pipeline options.
   399  ARGS="$ARGS -timeout $TIMEOUT"
   400  ARGS="$ARGS --runner=$RUNNER"
   401  ARGS="$ARGS --project=$DATAFLOW_PROJECT"
   402  ARGS="$ARGS --region=$REGION"
   403  ARGS="$ARGS --environment_type=DOCKER"
   404  ARGS="$ARGS --environment_config=$CONTAINER:$TAG"
   405  ARGS="$ARGS --staging_location=$GCS_LOCATION/staging-validatesrunner-test/$GCS_SUBFOLDER"
   406  ARGS="$ARGS --temp_location=$GCS_LOCATION/temp-validatesrunner-test/$GCS_SUBFOLDER"
   407  ARGS="$ARGS --endpoint=$ENDPOINT"
   408  if [[ -n "$TEST_EXPANSION_ADDR" ]]; then
   409    ARGS="$ARGS --test_expansion_addr=$TEST_EXPANSION_ADDR"
   410  fi
   411  if [[ -n "$IO_EXPANSION_ADDR" ]]; then
   412    ARGS="$ARGS --io_expansion_addr=$IO_EXPANSION_ADDR"
   413  fi
   414  if [[ -n "$SCHEMAIO_EXPANSION_ADDR" ]]; then
   415    ARGS="$ARGS --schemaio_expansion_addr=$SCHEMAIO_EXPANSION_ADDR"
   416  fi
   417  if [[ -n "$DEBEZIUMIO_EXPANSION_ADDR" ]]; then
   418    ARGS="$ARGS --debeziumio_expansion_addr=$DEBEZIUMIO_EXPANSION_ADDR"
   419  fi
   420  if [[ -n "$SDK_OVERRIDES" ]]; then
   421    OVERRIDE=--sdk_harness_container_image_override="$SDK_OVERRIDES"
   422    ARGS="$ARGS $OVERRIDE"
   423  fi
   424  ARGS="$ARGS $PIPELINE_OPTS"
   425  
   426  cd sdks/go
   427  echo ">>> RUNNING $RUNNER integration tests with pipeline options: $ARGS"
   428  ./run_with_go_version.sh test -v $TESTS $ARGS 1>&2 \
   429      || TEST_EXIT_CODE=$? # don't fail fast here; clean up environment before exiting
   430  cd ../..
   431  
   432  if [[ "$RUNNER" == "dataflow" ]]; then
   433    # Delete the container locally and remotely
   434    docker rmi $CONTAINER:$TAG || echo "Failed to remove container"
   435    gcloud --quiet container images delete $CONTAINER:$TAG || echo "Failed to delete container"
   436  
   437    if [[ -n "$TEST_EXPANSION_ADDR" || -n "$IO_EXPANSION_ADDR" || -n "$SCHEMAIO_EXPANSION_ADDR" || -n "$DEBEZIUMIO_EXPANSION_ADDR" ]]; then
   438      # Delete the java cross-language container locally and remotely
   439      docker rmi $JAVA_CONTAINER:$JAVA_TAG || echo "Failed to remove container"
   440      gcloud --quiet container images delete $JAVA_CONTAINER:$JAVA_TAG || echo "Failed to delete container"
   441    fi
   442  
   443    # Clean up tempdir
   444    rm -rf $TMPDIR
   445  fi
   446  
   447  exit $TEST_EXIT_CODE