github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/test_pipeline.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Test Pipeline, a wrapper of Pipeline for test purpose"""
    19  
    20  # pytype: skip-file
    21  
    22  import argparse
    23  import shlex
    24  from unittest import SkipTest
    25  
    26  from apache_beam.internal import pickler
    27  from apache_beam.options.pipeline_options import PipelineOptions
    28  from apache_beam.pipeline import Pipeline
    29  from apache_beam.runners.runner import PipelineState
    30  
    31  __all__ = [
    32      'TestPipeline',
    33  ]
    34  
    35  
    36  class TestPipeline(Pipeline):
    37    """:class:`TestPipeline` class is used inside of Beam tests that can be
    38    configured to run against pipeline runner.
    39  
    40    It has a functionality to parse arguments from command line and build pipeline
    41    options for tests who runs against a pipeline runner and utilizes resources
    42    of the pipeline runner. Those test functions are recommended to be tagged by
    43    ``@pytest.mark.it_validatesrunner`` annotation.
    44  
    45    In order to configure the test with customized pipeline options from command
    46    line, system argument ``--test-pipeline-options`` can be used to obtains a
    47    list of pipeline options. If no options specified, default value will be used.
    48  
    49    For example, use following command line to execute all ValidatesRunner tests::
    50  
    51      pytest -m it_validatesrunner \\
    52          --test-pipeline-options="--runner=DirectRunner \\
    53                                   --job_name=myJobName \\
    54                                   --num_workers=1"
    55  
    56    For example, use assert_that for test validation::
    57  
    58      with TestPipeline() as pipeline:
    59        pcoll = ...
    60        assert_that(pcoll, equal_to(...))
    61    """
    62    # Command line options read in by pytest.
    63    # If this is not None, will use as default value for --test-pipeline-options.
    64    pytest_test_pipeline_options = None
    65  
    66    def __init__(
    67        self,
    68        runner=None,
    69        options=None,
    70        argv=None,
    71        is_integration_test=False,
    72        blocking=True,
    73        additional_pipeline_args=None):
    74      """Initialize a pipeline object for test.
    75  
    76      Args:
    77        runner (~apache_beam.runners.runner.PipelineRunner): An object of type
    78          :class:`~apache_beam.runners.runner.PipelineRunner` that will be used
    79          to execute the pipeline. For registered runners, the runner name can be
    80          specified, otherwise a runner object must be supplied.
    81        options (~apache_beam.options.pipeline_options.PipelineOptions):
    82          A configured
    83          :class:`~apache_beam.options.pipeline_options.PipelineOptions`
    84          object containing arguments that should be used for running the
    85          pipeline job.
    86        argv (List[str]): A list of arguments (such as :data:`sys.argv`) to be
    87          used for building a
    88          :class:`~apache_beam.options.pipeline_options.PipelineOptions` object.
    89          This will only be used if argument **options** is :data:`None`.
    90        is_integration_test (bool): :data:`True` if the test is an integration
    91          test, :data:`False` otherwise.
    92        blocking (bool): Run method will wait until pipeline execution is
    93          completed.
    94        additional_pipeline_args (List[str]): additional pipeline arguments to be
    95          included when construction the pipeline options object.
    96  
    97      Raises:
    98        ValueError: if either the runner or options argument is not
    99          of the expected type.
   100      """
   101      self.is_integration_test = is_integration_test
   102      self.not_use_test_runner_api = False
   103      additional_pipeline_args = additional_pipeline_args or []
   104      self.options_list = (
   105          self._parse_test_option_args(argv) + additional_pipeline_args)
   106      self.blocking = blocking
   107      if options is None:
   108        options = PipelineOptions(self.options_list)
   109      super().__init__(runner, options)
   110  
   111    def run(self, test_runner_api=True):
   112      result = super().run(
   113          test_runner_api=(
   114              False if self.not_use_test_runner_api else test_runner_api))
   115      if self.blocking:
   116        state = result.wait_until_finish()
   117        assert state in (PipelineState.DONE, PipelineState.CANCELLED), \
   118            "Pipeline execution failed."
   119  
   120      return result
   121  
   122    def get_pipeline_options(self):
   123      return self._options
   124  
   125    def _parse_test_option_args(self, argv):
   126      """Parse value of command line argument: --test-pipeline-options to get
   127      pipeline options.
   128  
   129      Args:
   130        argv: An iterable of command line arguments to be used. If not specified
   131          then sys.argv will be used as input for parsing arguments.
   132  
   133      Returns:
   134        An argument list of options that can be parsed by argparser or directly
   135        build a pipeline option.
   136      """
   137      parser = argparse.ArgumentParser()
   138      parser.add_argument(
   139          '--test-pipeline-options',
   140          type=str,
   141          action='store',
   142          help='only run tests providing service options')
   143      parser.add_argument(
   144          '--not-use-test-runner-api',
   145          action='store_true',
   146          default=False,
   147          help='whether not to use test-runner-api')
   148      known, unused_argv = parser.parse_known_args(argv)
   149      test_pipeline_options = known.test_pipeline_options or \
   150                              TestPipeline.pytest_test_pipeline_options
   151      if self.is_integration_test and not test_pipeline_options:
   152        # Skip integration test when argument '--test-pipeline-options' is not
   153        # specified since nose calls integration tests when runs unit test by
   154        # 'setup.py test'.
   155        raise SkipTest(
   156            'IT is skipped because --test-pipeline-options '
   157            'is not specified')
   158  
   159      self.not_use_test_runner_api = known.not_use_test_runner_api
   160      return shlex.split(test_pipeline_options) \
   161        if test_pipeline_options else []
   162  
   163    def get_full_options_as_args(self, **extra_opts):
   164      """Get full pipeline options as an argument list.
   165  
   166      Append extra pipeline options to existing option list if provided.
   167      Test verifier (if contains in extra options) should be pickled before
   168      appending, and will be unpickled later in the TestRunner.
   169      """
   170      options = list(self.options_list)
   171      for k, v in extra_opts.items():
   172        if not v:
   173          continue
   174        elif isinstance(v, bool) and v:
   175          options.append('--%s' % k)
   176        elif 'matcher' in k:
   177          options.append('--%s=%s' % (k, pickler.dumps(v).decode()))
   178        else:
   179          options.append('--%s=%s' % (k, v))
   180      return options
   181  
   182    def get_option(self, opt_name, bool_option=False):
   183      """Get a pipeline option value by name
   184  
   185      Args:
   186        opt_name: The name of the pipeline option.
   187  
   188      Returns:
   189        None if option is not found in existing option list which is generated
   190        by parsing value of argument `test-pipeline-options`.
   191      """
   192      parser = argparse.ArgumentParser()
   193      opt_name = opt_name[:2] if opt_name[:2] == '--' else opt_name
   194      # Option name should start with '--' when it's used for parsing.
   195      if bool_option:
   196        parser.add_argument('--' + opt_name, action='store_true')
   197      else:
   198        parser.add_argument('--' + opt_name, type=str, action='store')
   199      known, _ = parser.parse_known_args(self.options_list)
   200      return getattr(known, opt_name) if hasattr(known, opt_name) else None