github.com/apache/beam/sdks/v2@v2.48.2/python/scripts/run_integration_test.sh (about)

     1  #!/bin/bash
     2  #
     3  #    Licensed to the Apache Software Foundation (ASF) under one or more
     4  #    contributor license agreements.  See the NOTICE file distributed with
     5  #    this work for additional information regarding copyright ownership.
     6  #    The ASF licenses this file to You under the Apache License, Version 2.0
     7  #    (the "License"); you may not use this file except in compliance with
     8  #    the License.  You may obtain a copy of the License at
     9  #
    10  #       http://www.apache.org/licenses/LICENSE-2.0
    11  #
    12  #    Unless required by applicable law or agreed to in writing, software
    13  #    distributed under the License is distributed on an "AS IS" BASIS,
    14  #    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  #    See the License for the specific language governing permissions and
    16  #    limitations under the License.
    17  #
    18  
    19  ###########################################################################
    20  #
    21  # This script is used in Gradle to run single or a set of Python integration tests
    22  # locally or on Jenkins. Note, this script doesn't setup python environment which is
    23  # required for integration test. In order to do so, run Gradle tasks defined in
    24  # :sdks:python:test-suites instead.
    25  #
    26  # In order to run test with customer options, use following commandline flags:
    27  #
    28  # Pipeline related flags:
    29  #     runner        -> Runner that execute pipeline job.
    30  #                      e.g. TestDataflowRunner, TestDirectRunner
    31  #     project       -> Project name of the cloud service.
    32  #     region        -> Compute Engine region to create the Dataflow job.
    33  #     gcs_location  -> Base location on GCS. Some pipeline options are
    34  #                      derived from it including output, staging_location
    35  #                      and temp_location.
    36  #     sdk_location  -> Python tar ball location. Glob is accepted.
    37  #     num_workers   -> Number of workers.
    38  #     sleep_secs    -> Number of seconds to wait before verification.
    39  #     streaming     -> True if a streaming job.
    40  #     kms_key_name  -> Name of Cloud KMS encryption key to use in some tests.
    41  #     pipeline_opts -> List of space separated pipeline options. If this
    42  #                      flag is specified, all above flag will be ignored.
    43  #                      Please include all required pipeline options when
    44  #                      using this flag.
    45  #
    46  # Test related flags:
    47  #     test_opts     -> List of space separated options to configure Pytest test
    48  #                      during execution. Commonly used options like `--capture=no`
    49  #                      `--collect-only`. More can be found in
    50  #                      https://docs.pytest.org/en/latest/reference.html#command-line-flags
    51  #     suite         -> Namespace for this run of tests. Required if running
    52  #                      under Jenkins. Used to differentiate runs of the same
    53  #                      tests with different interpreters/dependencies/etc.
    54  #
    55  # Example usages:
    56  #     - Run full set of PostCommit tests with default pipeline options:
    57  #     `$ ./run_integration_test.sh`
    58  #
    59  #     - Run single integration test with default pipeline options:
    60  #     `$ ./run_integration_test.sh --test_opts apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it`
    61  #
    62  #     - Run full set of PostCommit tests with customized pipeline options:
    63  #     `$ ./run_integration_test.sh --project my-project --gcs_location gs://my-location`
    64  
    65  ###########################################################################
    66  # Get pipeline options specified from commandline arguments.
    67  
    68  # Default pipeline options
    69  PROJECT=apache-beam-testing
    70  RUNNER=TestDataflowRunner
    71  REGION=us-central1
    72  GCS_LOCATION=gs://temp-storage-for-end-to-end-tests
    73  SDK_LOCATION=build/apache-beam.tar.gz
    74  NUM_WORKERS=1
    75  SLEEP_SECS=20
    76  STREAMING=false
    77  KMS_KEY_NAME="projects/apache-beam-testing/locations/global/keyRings/beam-it/cryptoKeys/test"
    78  SUITE=""
    79  COLLECT_MARKERS=
    80  REQUIREMENTS_FILE=""
    81  
    82  # Default test (pytest) options.
    83  # Run WordCountIT.test_wordcount_it by default if no test options are
    84  # provided.
    85  TEST_OPTS="apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it"
    86  
    87  while [[ $# -gt 0 ]]
    88  do
    89  key="$1"
    90  case $key in
    91      --runner)
    92          RUNNER="$2"
    93          shift # past argument
    94          shift # past value
    95          ;;
    96      --project)
    97          PROJECT="$2"
    98          shift # past argument
    99          shift # past value
   100          ;;
   101      --region)
   102          REGION="$2"
   103          shift # past argument
   104          shift # past value
   105          ;;
   106      --gcs_location)
   107          GCS_LOCATION="$2"
   108          shift # past argument
   109          shift # past value
   110          ;;
   111      --sdk_location)
   112          SDK_LOCATION="$2"
   113          shift # past argument
   114          shift # past value
   115          ;;
   116      --requirements_file)
   117        REQUIREMENTS_FILE="$2"
   118        shift # past argument
   119        shift # past value
   120        ;;
   121      --num_workers)
   122          NUM_WORKERS="$2"
   123          shift # past argument
   124          shift # past value
   125          ;;
   126      --sleep_secs)
   127          SLEEP_SECS="$2"
   128          shift # past argument
   129          shift # past value
   130          ;;
   131      --streaming)
   132          STREAMING="$2"
   133          shift # past argument
   134          shift # past value
   135          ;;
   136      --runner_v2)
   137          RUNNER_V2="$2"
   138          shift # past argument
   139          shift # past value
   140          ;;
   141      --disable_runner_v2)
   142          DISABLE_RUNNER_V2="$2"
   143          shift # past argument
   144          shift # past value
   145          ;;
   146      --kms_key_name)
   147          KMS_KEY_NAME="$2"
   148          shift # past argument
   149          shift # past value
   150          ;;
   151      --dataflow_endpoint)
   152          DATAFLOW_ENDPOINT="$2"
   153          shift # past argument
   154          shift # past value
   155          ;;
   156      --pipeline_opts)
   157          PIPELINE_OPTS="$2"
   158          shift # past argument
   159          shift # past value
   160          ;;
   161      --test_opts)
   162          TEST_OPTS="$2"
   163          shift # past argument
   164          shift # past value
   165          ;;
   166      --suite)
   167          SUITE="$2"
   168          shift # past argument
   169          shift # past value
   170          ;;
   171      --collect)
   172        COLLECT_MARKERS="-m=$2"
   173        shift # past argument
   174        shift # past value
   175        ;;
   176      *)    # unknown option
   177          echo "Unknown option: $1"
   178          exit 1
   179          ;;
   180  esac
   181  done
   182  
   183  if [[ "$JENKINS_HOME" != "" && "$SUITE" == "" ]]; then
   184      echo "Argument --suite is required in a Jenkins environment."
   185      exit 1
   186  fi
   187  
   188  set -o errexit
   189  
   190  
   191  ###########################################################################
   192  
   193  # Check that the script is running in a known directory.
   194  if [[ $PWD != *sdks/python* ]]; then
   195    echo 'Unable to locate Apache Beam Python SDK root directory'
   196    exit 1
   197  fi
   198  
   199  # Go to the Apache Beam Python SDK root
   200  if [[ $PWD != *sdks/python ]]; then
   201    cd $(pwd | sed 's/sdks\/python.*/sdks\/python/')
   202  fi
   203  
   204  
   205  ###########################################################################
   206  # Build pipeline options if not provided in --pipeline_opts from commandline
   207  
   208  if [[ -z $PIPELINE_OPTS ]]; then
   209    # Get tar ball path
   210    if [[ $(find ${SDK_LOCATION} 2> /dev/null) ]]; then
   211      SDK_LOCATION=$(find ${SDK_LOCATION} | tail -n1)
   212    else
   213      echo "[WARNING] Could not find SDK tarball in SDK_LOCATION: $SDK_LOCATION."
   214    fi
   215  
   216    # Install test dependencies for ValidatesRunner tests.
   217    # pyhamcrest==1.10.0 doesn't work on Py2.
   218    # See: https://github.com/hamcrest/PyHamcrest/issues/131.
   219    if [[ -z $REQUIREMENTS_FILE ]]; then
   220      echo "pyhamcrest!=1.10.0,<2.0.0" > postcommit_requirements.txt
   221      echo "mock<3.0.0" >> postcommit_requirements.txt
   222      echo "parameterized>=0.7.1,<0.8.0" >> postcommit_requirements.txt
   223    else
   224      cp $REQUIREMENTS_FILE postcommit_requirements.txt
   225    fi
   226  
   227    # Options used to run testing pipeline on Cloud Dataflow Service. Also used for
   228    # running on DirectRunner (some options ignored).
   229    opts=(
   230      "--runner=$RUNNER"
   231      "--project=$PROJECT"
   232      "--region=$REGION"
   233      "--staging_location=$GCS_LOCATION/staging-it"
   234      "--temp_location=$GCS_LOCATION/temp-it"
   235      "--output=$GCS_LOCATION/py-it-cloud/output"
   236      "--sdk_location=$SDK_LOCATION"
   237      "--requirements_file=postcommit_requirements.txt"
   238      "--num_workers=$NUM_WORKERS"
   239      "--sleep_secs=$SLEEP_SECS"
   240    )
   241  
   242    # Add --streaming if provided
   243    if [[ "$STREAMING" = true ]]; then
   244      opts+=("--streaming")
   245    fi
   246  
   247    # Add --runner_v2 if provided
   248    if [[ "$RUNNER_V2" = true ]]; then
   249      opts+=("--experiments=use_runner_v2")
   250      if [[ "$STREAMING" = true ]]; then
   251        # Dataflow Runner V2 only supports streaming engine.
   252        opts+=("--enable_streaming_engine")
   253      else
   254        opts+=("--experiments=beam_fn_api")
   255      fi
   256  
   257    fi
   258  
   259    # Add --disable_runner_v2 if provided
   260    if [[ "$DISABLE_RUNNER_V2" = true ]]; then
   261      opts+=("--experiments=disable_runner_v2")
   262    fi
   263  
   264    if [[ ! -z "$KMS_KEY_NAME" ]]; then
   265      opts+=(
   266        "--kms_key_name=$KMS_KEY_NAME"
   267        "--dataflow_kms_key=$KMS_KEY_NAME"
   268      )
   269    fi
   270  
   271    if [[ ! -z "$DATAFLOW_ENDPOINT" ]]; then
   272      opts+=("--dataflow_endpoint=$DATAFLOW_ENDPOINT")
   273    fi
   274  
   275    PIPELINE_OPTS=$(IFS=" " ; echo "${opts[*]}")
   276  
   277  fi
   278  
   279  # Handle double quotes in PIPELINE_OPTS
   280  # add a backslash before `"` to keep it in command line options
   281  PIPELINE_OPTS=${PIPELINE_OPTS//\"/\\\"}
   282  
   283  ###########################################################################
   284  # Run tests and validate that jobs finish successfully.
   285  
   286  echo ">>> RUNNING integration tests with pipeline options: $PIPELINE_OPTS"
   287  echo ">>>   pytest options: $TEST_OPTS"
   288  echo ">>>   collect markers: $COLLECT_MARKERS"
   289  ARGS="-o junit_suite_name=$SUITE -o log_cli=true -o log_level=INFO --junitxml=pytest_$SUITE.xml $TEST_OPTS"
   290  # Handle markers as an independent argument from $TEST_OPTS to prevent errors in space separated flags
   291  if [ -z "$COLLECT_MARKERS" ]; then
   292    pytest $ARGS --test-pipeline-options="$PIPELINE_OPTS"
   293  else
   294    pytest $ARGS --test-pipeline-options="$PIPELINE_OPTS" "$COLLECT_MARKERS"
   295  fi