github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/options/pipeline_options_validator_test.py

github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/options/pipeline_options_validator_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Unit tests for the pipeline options validator module."""
    19  
    20  # pytype: skip-file
    21  
    22  import logging
    23  import unittest
    24  
    25  from hamcrest import assert_that
    26  from hamcrest import contains_string
    27  from hamcrest import only_contains
    28  from hamcrest.core.base_matcher import BaseMatcher
    29  
    30  from apache_beam.internal import pickler
    31  from apache_beam.options.pipeline_options import DebugOptions
    32  from apache_beam.options.pipeline_options import GoogleCloudOptions
    33  from apache_beam.options.pipeline_options import PipelineOptions
    34  from apache_beam.options.pipeline_options import WorkerOptions
    35  from apache_beam.options.pipeline_options_validator import PipelineOptionsValidator
    36  
    37  
    38  # Mock runners to use for validations.
    39  class MockRunners(object):
    40    class DataflowRunner(object):
    41      def get_default_gcp_region(self):
    42        # Return a default so we don't have to specify --region in every test
    43        # (unless specifically testing it).
    44        return 'us-central1'
    45  
    46    class TestDataflowRunner(DataflowRunner):
    47      pass
    48  
    49    class OtherRunner(object):
    50      pass
    51  
    52  
    53  # Matcher that always passes for testing on_success_matcher option
    54  class AlwaysPassMatcher(BaseMatcher):
    55    def _matches(self, item):
    56      return True
    57  
    58  
    59  class SetupTest(unittest.TestCase):
    60    def check_errors_for_arguments(self, errors, args):
    61      """Checks that there is exactly one error for each given argument."""
    62      missing = []
    63      remaining = list(errors)
    64  
    65      for arg in args:
    66        found = False
    67        for error in remaining:
    68          if arg in error:
    69            remaining.remove(error)
    70            found = True
    71            break
    72        if not found:
    73          missing.append('Missing error for: %s.' % arg)
    74  
    75      # Return missing and remaining (not matched) errors.
    76      return missing + remaining
    77  
    78    def test_local_runner(self):
    79      runner = MockRunners.OtherRunner()
    80      options = PipelineOptions([])
    81      validator = PipelineOptionsValidator(options, runner)
    82      errors = validator.validate()
    83      self.assertEqual(len(errors), 0)
    84  
    85    def test_missing_required_options(self):
    86      options = PipelineOptions([''])
    87      runner = MockRunners.DataflowRunner()
    88      # Remove default region for this test.
    89      runner.get_default_gcp_region = lambda: None
    90      validator = PipelineOptionsValidator(options, runner)
    91      errors = validator.validate()
    92  
    93      self.assertEqual(
    94          self.check_errors_for_arguments(
    95              errors, ['project', 'staging_location', 'temp_location', 'region']),
    96          [])
    97  
    98    def test_gcs_path(self):
    99      def get_validator(temp_location, staging_location):
   100        options = ['--project=example:example', '--job_name=job']
   101  
   102        if temp_location is not None:
   103          options.append('--temp_location=' + temp_location)
   104  
   105        if staging_location is not None:
   106          options.append('--staging_location=' + staging_location)
   107  
   108        pipeline_options = PipelineOptions(options)
   109        runner = MockRunners.DataflowRunner()
   110        validator = PipelineOptionsValidator(pipeline_options, runner)
   111        return validator
   112  
   113      test_cases = [
   114          {
   115              'temp_location': None,
   116              'staging_location': 'gs://foo/bar',
   117              'errors': ['temp_location']
   118          },
   119          {
   120              'temp_location': None,
   121              'staging_location': None,
   122              'errors': ['staging_location', 'temp_location']
   123          },
   124          {
   125              'temp_location': 'gs://foo/bar',
   126              'staging_location': None,
   127              'errors': []
   128          },
   129          {
   130              'temp_location': 'gs://foo/bar',
   131              'staging_location': 'gs://ABC/bar',
   132              'errors': ['staging_location']
   133          },
   134          {
   135              'temp_location': 'gcs:/foo/bar',
   136              'staging_location': 'gs://foo/bar',
   137              'errors': ['temp_location']
   138          },
   139          {
   140              'temp_location': 'gs:/foo/bar',
   141              'staging_location': 'gs://foo/bar',
   142              'errors': ['temp_location']
   143          },
   144          {
   145              'temp_location': 'gs://ABC/bar',
   146              'staging_location': 'gs://foo/bar',
   147              'errors': ['temp_location']
   148          },
   149          {
   150              'temp_location': 'gs://ABC/bar',
   151              'staging_location': 'gs://foo/bar',
   152              'errors': ['temp_location']
   153          },
   154          {
   155              'temp_location': 'gs://foo',
   156              'staging_location': 'gs://foo/bar',
   157              'errors': ['temp_location']
   158          },
   159          {
   160              'temp_location': 'gs://foo/',
   161              'staging_location': 'gs://foo/bar',
   162              'errors': []
   163          },
   164          {
   165              'temp_location': 'gs://foo/bar',
   166              'staging_location': 'gs://foo/bar',
   167              'errors': []
   168          },
   169      ]
   170  
   171      for case in test_cases:
   172        errors = get_validator(case['temp_location'],
   173                               case['staging_location']).validate()
   174        self.assertEqual(
   175            self.check_errors_for_arguments(errors, case['errors']), [])
   176  
   177    def test_project(self):
   178      def get_validator(project):
   179        options = [
   180            '--job_name=job',
   181            '--staging_location=gs://foo/bar',
   182            '--temp_location=gs://foo/bar'
   183        ]
   184  
   185        if project is not None:
   186          options.append('--project=' + project)
   187  
   188        pipeline_options = PipelineOptions(options)
   189        runner = MockRunners.DataflowRunner()
   190        validator = PipelineOptionsValidator(pipeline_options, runner)
   191        return validator
   192  
   193      test_cases = [
   194          {
   195              'project': None, 'errors': ['project']
   196          },
   197          {
   198              'project': '12345', 'errors': ['project']
   199          },
   200          {
   201              'project': 'FOO', 'errors': ['project']
   202          },
   203          {
   204              'project': 'foo:BAR', 'errors': ['project']
   205          },
   206          {
   207              'project': 'fo', 'errors': ['project']
   208          },
   209          {
   210              'project': 'foo', 'errors': []
   211          },
   212          {
   213              'project': 'foo:bar', 'errors': []
   214          },
   215      ]
   216  
   217      for case in test_cases:
   218        errors = get_validator(case['project']).validate()
   219        self.assertEqual(
   220            self.check_errors_for_arguments(errors, case['errors']), [])
   221  
   222    def test_job_name(self):
   223      def get_validator(job_name):
   224        options = [
   225            '--project=example:example',
   226            '--staging_location=gs://foo/bar',
   227            '--temp_location=gs://foo/bar'
   228        ]
   229  
   230        if job_name is not None:
   231          options.append('--job_name=' + job_name)
   232  
   233        pipeline_options = PipelineOptions(options)
   234        runner = MockRunners.DataflowRunner()
   235        validator = PipelineOptionsValidator(pipeline_options, runner)
   236        return validator
   237  
   238      test_cases = [
   239          {
   240              'job_name': None, 'errors': []
   241          },
   242          {
   243              'job_name': '12345', 'errors': ['job_name']
   244          },
   245          {
   246              'job_name': 'FOO', 'errors': ['job_name']
   247          },
   248          {
   249              'job_name': 'foo:bar', 'errors': ['job_name']
   250          },
   251          {
   252              'job_name': 'fo', 'errors': []
   253          },
   254          {
   255              'job_name': 'foo', 'errors': []
   256          },
   257      ]
   258  
   259      for case in test_cases:
   260        errors = get_validator(case['job_name']).validate()
   261        self.assertEqual(
   262            self.check_errors_for_arguments(errors, case['errors']), [])
   263  
   264    def test_num_workers(self):
   265      def get_validator(num_workers):
   266        options = [
   267            '--project=example:example',
   268            '--job_name=job',
   269            '--staging_location=gs://foo/bar',
   270            '--temp_location=gs://foo/bar'
   271        ]
   272  
   273        if num_workers is not None:
   274          options.append('--num_workers=' + num_workers)
   275  
   276        pipeline_options = PipelineOptions(options)
   277        runner = MockRunners.DataflowRunner()
   278        validator = PipelineOptionsValidator(pipeline_options, runner)
   279        return validator
   280  
   281      test_cases = [
   282          {
   283              'num_workers': None, 'errors': []
   284          },
   285          {
   286              'num_workers': '1', 'errors': []
   287          },
   288          {
   289              'num_workers': '0', 'errors': ['num_workers']
   290          },
   291          {
   292              'num_workers': '-1', 'errors': ['num_workers']
   293          },
   294      ]
   295  
   296      for case in test_cases:
   297        errors = get_validator(case['num_workers']).validate()
   298        self.assertEqual(
   299            self.check_errors_for_arguments(errors, case['errors']), [])
   300  
   301    def test_is_service_runner(self):
   302      test_cases = [
   303          {
   304              'runner': MockRunners.OtherRunner(),
   305              'options': [],
   306              'expected': False,
   307          },
   308          {
   309              'runner': MockRunners.OtherRunner(),
   310              'options': ['--dataflow_endpoint=https://dataflow.googleapis.com'],
   311              'expected': False,
   312          },
   313          {
   314              'runner': MockRunners.OtherRunner(),
   315              'options': ['--dataflow_endpoint=https://dataflow.googleapis.com/'],
   316              'expected': False,
   317          },
   318          {
   319              'runner': MockRunners.DataflowRunner(),
   320              'options': ['--dataflow_endpoint=https://another.service.com'],
   321              'expected': False,
   322          },
   323          {
   324              'runner': MockRunners.DataflowRunner(),
   325              'options': ['--dataflow_endpoint=https://another.service.com/'],
   326              'expected': False,
   327          },
   328          {
   329              'runner': MockRunners.DataflowRunner(),
   330              'options': ['--dataflow_endpoint=https://dataflow.googleapis.com'],
   331              'expected': True,
   332          },
   333          {
   334              'runner': MockRunners.DataflowRunner(),
   335              'options': ['--dataflow_endpoint=https://dataflow.googleapis.com/'],
   336              'expected': True,
   337          },
   338          {
   339              'runner': MockRunners.DataflowRunner(),
   340              'options': [],
   341              'expected': True,
   342          },
   343      ]
   344  
   345      for case in test_cases:
   346        validator = PipelineOptionsValidator(
   347            PipelineOptions(case['options']), case['runner'])
   348        self.assertEqual(validator.is_service_runner(), case['expected'])
   349  
   350    def test_dataflow_job_file_and_template_location_mutually_exclusive(self):
   351      runner = MockRunners.OtherRunner()
   352      options = PipelineOptions(
   353          ['--template_location', 'abc', '--dataflow_job_file', 'def'])
   354      validator = PipelineOptionsValidator(options, runner)
   355      errors = validator.validate()
   356      self.assertTrue(errors)
   357  
   358    def test_validate_template_location(self):
   359      runner = MockRunners.OtherRunner()
   360      options = PipelineOptions([
   361          '--template_location',
   362          'abc',
   363      ])
   364      validator = PipelineOptionsValidator(options, runner)
   365      errors = validator.validate()
   366      self.assertFalse(errors)
   367  
   368    def test_validate_dataflow_job_file(self):
   369      runner = MockRunners.OtherRunner()
   370      options = PipelineOptions(['--dataflow_job_file', 'abc'])
   371      validator = PipelineOptionsValidator(options, runner)
   372      errors = validator.validate()
   373      self.assertFalse(errors)
   374  
   375    def test_num_workers_is_positive(self):
   376      runner = MockRunners.DataflowRunner()
   377      options = PipelineOptions([
   378          '--num_workers=-1',
   379          '--worker_region=us-east1',
   380          '--project=example:example',
   381          '--temp_location=gs://foo/bar',
   382      ])
   383      validator = PipelineOptionsValidator(options, runner)
   384      errors = validator.validate()
   385      self.assertEqual(len(errors), 1)
   386      self.assertIn('num_workers', errors[0])
   387      self.assertIn('-1', errors[0])
   388  
   389    def test_max_num_workers_is_positive(self):
   390      runner = MockRunners.DataflowRunner()
   391      options = PipelineOptions([
   392          '--max_num_workers=-1',
   393          '--worker_region=us-east1',
   394          '--project=example:example',
   395          '--temp_location=gs://foo/bar',
   396      ])
   397      validator = PipelineOptionsValidator(options, runner)
   398      errors = validator.validate()
   399      self.assertEqual(len(errors), 1)
   400      self.assertIn('max_num_workers', errors[0])
   401      self.assertIn('-1', errors[0])
   402  
   403    def test_num_workers_cannot_exceed_max_num_workers(self):
   404      runner = MockRunners.DataflowRunner()
   405      options = PipelineOptions([
   406          '--num_workers=43',
   407          '--max_num_workers=42',
   408          '--worker_region=us-east1',
   409          '--project=example:example',
   410          '--temp_location=gs://foo/bar',
   411      ])
   412      validator = PipelineOptionsValidator(options, runner)
   413      errors = validator.validate()
   414      self.assertEqual(len(errors), 1)
   415      self.assertIn('num_workers', errors[0])
   416      self.assertIn('43', errors[0])
   417      self.assertIn('max_num_workers', errors[0])
   418      self.assertIn('42', errors[0])
   419  
   420    def test_num_workers_can_equal_max_num_workers(self):
   421      runner = MockRunners.DataflowRunner()
   422      options = PipelineOptions([
   423          '--num_workers=42',
   424          '--max_num_workers=42',
   425          '--worker_region=us-east1',
   426          '--project=example:example',
   427          '--temp_location=gs://foo/bar',
   428      ])
   429      validator = PipelineOptionsValidator(options, runner)
   430      errors = validator.validate()
   431      self.assertEqual(len(errors), 0)
   432  
   433    def test_zone_and_worker_region_mutually_exclusive(self):
   434      runner = MockRunners.DataflowRunner()
   435      options = PipelineOptions([
   436          '--zone',
   437          'us-east1-b',
   438          '--worker_region',
   439          'us-east1',
   440          '--project=example:example',
   441          '--temp_location=gs://foo/bar',
   442      ])
   443      validator = PipelineOptionsValidator(options, runner)
   444      errors = validator.validate()
   445      self.assertEqual(len(errors), 1)
   446      self.assertIn('zone', errors[0])
   447      self.assertIn('worker_region', errors[0])
   448  
   449    def test_zone_and_worker_zone_mutually_exclusive(self):
   450      runner = MockRunners.DataflowRunner()
   451      options = PipelineOptions([
   452          '--zone',
   453          'us-east1-b',
   454          '--worker_zone',
   455          'us-east1-c',
   456          '--project=example:example',
   457          '--temp_location=gs://foo/bar',
   458      ])
   459      validator = PipelineOptionsValidator(options, runner)
   460      errors = validator.validate()
   461      self.assertEqual(len(errors), 1)
   462      self.assertIn('zone', errors[0])
   463      self.assertIn('worker_zone', errors[0])
   464  
   465    def test_experiment_region_and_worker_region_mutually_exclusive(self):
   466      runner = MockRunners.DataflowRunner()
   467      options = PipelineOptions([
   468          '--experiments',
   469          'worker_region=us-west1',
   470          '--worker_region',
   471          'us-east1',
   472          '--project=example:example',
   473          '--temp_location=gs://foo/bar',
   474      ])
   475      validator = PipelineOptionsValidator(options, runner)
   476      errors = validator.validate()
   477      self.assertEqual(len(errors), 1)
   478      self.assertIn('experiment', errors[0])
   479      self.assertIn('worker_region', errors[0])
   480  
   481    def test_experiment_region_and_worker_zone_mutually_exclusive(self):
   482      runner = MockRunners.DataflowRunner()
   483      options = PipelineOptions([
   484          '--experiments',
   485          'worker_region=us-west1',
   486          '--worker_zone',
   487          'us-east1-b',
   488          '--project=example:example',
   489          '--temp_location=gs://foo/bar',
   490      ])
   491      validator = PipelineOptionsValidator(options, runner)
   492      errors = validator.validate()
   493      self.assertEqual(len(errors), 1)
   494      self.assertIn('experiment', errors[0])
   495      self.assertIn('worker_region', errors[0])
   496      self.assertIn('worker_zone', errors[0])
   497  
   498    def test_programmatically_set_experiment_passed_as_string(self):
   499      runner = MockRunners.DataflowRunner()
   500      options = PipelineOptions(
   501          project='example.com:example',
   502          temp_location='gs://foo/bar/',
   503          experiments='enable_prime',
   504          dataflow_service_options='use_runner_v2',
   505      )
   506      validator = PipelineOptionsValidator(options, runner)
   507      errors = validator.validate()
   508      self.assertEqual(len(errors), 2)
   509      self.assertIn('experiments', errors[0])
   510      self.assertIn('dataflow_service_options', errors[1])
   511  
   512    def test_programmatically_set_experiment_passed_as_list(self):
   513      runner = MockRunners.DataflowRunner()
   514      options = PipelineOptions(
   515          project='example.com:example',
   516          temp_location='gs://foo/bar/',
   517          experiments=['enable_prime'],
   518          dataflow_service_options=['use_runner_v2'],
   519      )
   520      validator = PipelineOptionsValidator(options, runner)
   521      errors = validator.validate()
   522      self.assertEqual(len(errors), 0)
   523      self.assertEqual(
   524          options.view_as(DebugOptions).experiments, ['enable_prime'])
   525      self.assertEqual(
   526          options.view_as(GoogleCloudOptions).dataflow_service_options,
   527          ['use_runner_v2'])
   528  
   529    def test_worker_region_and_worker_zone_mutually_exclusive(self):
   530      runner = MockRunners.DataflowRunner()
   531      options = PipelineOptions([
   532          '--worker_region',
   533          'us-east1',
   534          '--worker_zone',
   535          'us-east1-b',
   536          '--project=example:example',
   537          '--temp_location=gs://foo/bar',
   538      ])
   539      validator = PipelineOptionsValidator(options, runner)
   540      errors = validator.validate()
   541      self.assertEqual(len(errors), 1)
   542      self.assertIn('worker_region', errors[0])
   543      self.assertIn('worker_zone', errors[0])
   544  
   545    def test_zone_alias_worker_zone(self):
   546      runner = MockRunners.DataflowRunner()
   547      options = PipelineOptions([
   548          '--zone=us-east1-b',
   549          '--project=example:example',
   550          '--temp_location=gs://foo/bar',
   551      ])
   552      validator = PipelineOptionsValidator(options, runner)
   553      errors = validator.validate()
   554      self.assertEqual(len(errors), 0)
   555      self.assertIsNone(options.view_as(WorkerOptions).zone)
   556      self.assertEqual(options.view_as(WorkerOptions).worker_zone, 'us-east1-b')
   557  
   558    def test_region_optional_for_non_service_runner(self):
   559      runner = MockRunners.DataflowRunner()
   560      # Remove default region for this test.
   561      runner.get_default_gcp_region = lambda: None
   562      options = PipelineOptions([
   563          '--project=example:example',
   564          '--temp_location=gs://foo/bar',
   565          '--dataflow_endpoint=http://localhost:20281',
   566      ])
   567      validator = PipelineOptionsValidator(options, runner)
   568      errors = validator.validate()
   569      self.assertEqual(len(errors), 0)
   570  
   571    def test_alias_sdk_container_to_worker_harness(self):
   572      runner = MockRunners.DataflowRunner()
   573      test_image = "SDK_IMAGE"
   574      options = PipelineOptions([
   575          '--sdk_container_image=%s' % test_image,
   576          '--project=example:example',
   577          '--temp_location=gs://foo/bar',
   578      ])
   579      validator = PipelineOptionsValidator(options, runner)
   580      errors = validator.validate()
   581      self.assertEqual(len(errors), 0)
   582      self.assertEqual(
   583          options.view_as(WorkerOptions).worker_harness_container_image,
   584          test_image)
   585      self.assertEqual(
   586          options.view_as(WorkerOptions).sdk_container_image, test_image)
   587  
   588    def test_alias_worker_harness_sdk_container_image(self):
   589      runner = MockRunners.DataflowRunner()
   590      test_image = "WORKER_HARNESS"
   591      options = PipelineOptions([
   592          '--worker_harness_container_image=%s' % test_image,
   593          '--project=example:example',
   594          '--temp_location=gs://foo/bar',
   595      ])
   596      validator = PipelineOptionsValidator(options, runner)
   597      errors = validator.validate()
   598      self.assertEqual(len(errors), 0)
   599      self.assertEqual(
   600          options.view_as(WorkerOptions).worker_harness_container_image,
   601          test_image)
   602      self.assertEqual(
   603          options.view_as(WorkerOptions).sdk_container_image, test_image)
   604  
   605    def test_worker_harness_sdk_container_image_mutually_exclusive(self):
   606      runner = MockRunners.DataflowRunner()
   607      options = PipelineOptions([
   608          '--worker_harness_container_image=WORKER',
   609          '--sdk_container_image=SDK_ONLY',
   610          '--project=example:example',
   611          '--temp_location=gs://foo/bar',
   612      ])
   613      validator = PipelineOptionsValidator(options, runner)
   614      errors = validator.validate()
   615      self.assertEqual(len(errors), 1)
   616      self.assertIn('sdk_container_image', errors[0])
   617      self.assertIn('worker_harness_container_image', errors[0])
   618  
   619    def test_prebuild_sdk_container_base_image_disallowed(self):
   620      runner = MockRunners.DataflowRunner()
   621      options = PipelineOptions([
   622          '--project=example:example',
   623          '--temp_location=gs://foo/bar',
   624          '--prebuild_sdk_container_base_image=gcr.io/foo:bar'
   625      ])
   626      validator = PipelineOptionsValidator(options, runner)
   627      errors = validator.validate()
   628      self.assertEqual(len(errors), 1)
   629      self.assertIn('prebuild_sdk_container_base_image', errors[0])
   630      self.assertIn('sdk_container_image', errors[0])
   631  
   632    def test_prebuild_sdk_container_base_allowed_if_matches_custom_image(self):
   633      runner = MockRunners.DataflowRunner()
   634      options = PipelineOptions([
   635          '--project=example:example',
   636          '--temp_location=gs://foo/bar',
   637          '--sdk_container_image=gcr.io/foo:bar',
   638          '--prebuild_sdk_container_base_image=gcr.io/foo:bar'
   639      ])
   640      validator = PipelineOptionsValidator(options, runner)
   641      errors = validator.validate()
   642      self.assertEqual(len(errors), 0)
   643  
   644    def test_test_matcher(self):
   645      def get_validator(matcher):
   646        options = [
   647            '--project=example:example',
   648            '--job_name=job',
   649            '--staging_location=gs://foo/bar',
   650            '--temp_location=gs://foo/bar',
   651        ]
   652        if matcher:
   653          options.append('%s=%s' % ('--on_success_matcher', matcher.decode()))
   654  
   655        pipeline_options = PipelineOptions(options)
   656        runner = MockRunners.TestDataflowRunner()
   657        return PipelineOptionsValidator(pipeline_options, runner)
   658  
   659      test_case = [
   660          {
   661              'on_success_matcher': None, 'errors': []
   662          },
   663          {
   664              'on_success_matcher': pickler.dumps(AlwaysPassMatcher()),
   665              'errors': []
   666          },
   667          {
   668              'on_success_matcher': b'abc', 'errors': ['on_success_matcher']
   669          },
   670          {
   671              'on_success_matcher': pickler.dumps(object),
   672              'errors': ['on_success_matcher']
   673          },
   674      ]
   675  
   676      for case in test_case:
   677        errors = get_validator(case['on_success_matcher']).validate()
   678        self.assertEqual(
   679            self.check_errors_for_arguments(errors, case['errors']), [])
   680  
   681    def test_transform_name_mapping_without_update(self):
   682      options = [
   683          '--project=example:example',
   684          '--staging_location=gs://foo/bar',
   685          '--temp_location=gs://foo/bar',
   686          '--transform_name_mapping={\"fromPardo\":\"toPardo\"}'
   687      ]
   688  
   689      pipeline_options = PipelineOptions(options)
   690      runner = MockRunners.DataflowRunner()
   691      validator = PipelineOptionsValidator(pipeline_options, runner)
   692      errors = validator.validate()
   693      assert_that(
   694          errors,
   695          only_contains(
   696              contains_string(
   697                  'Transform name mapping option is only useful when '
   698                  '--update and --streaming is specified')))
   699  
   700    def test_transform_name_mapping_invalid_format(self):
   701      options = [
   702          '--project=example:example',
   703          '--staging_location=gs://foo/bar',
   704          '--temp_location=gs://foo/bar',
   705          '--update',
   706          '--job_name=test',
   707          '--streaming',
   708          '--transform_name_mapping={\"fromPardo\":123}'
   709      ]
   710  
   711      pipeline_options = PipelineOptions(options)
   712      runner = MockRunners.DataflowRunner()
   713      validator = PipelineOptionsValidator(pipeline_options, runner)
   714      errors = validator.validate()
   715      assert_that(
   716          errors,
   717          only_contains(
   718              contains_string('Invalid transform name mapping format.')))
   719  
   720    def test_type_check_additional(self):
   721      runner = MockRunners.OtherRunner()
   722      options = PipelineOptions(['--type_check_additional=all'])
   723      validator = PipelineOptionsValidator(options, runner)
   724      errors = validator.validate()
   725      self.assertFalse(errors)
   726  
   727      options = PipelineOptions(['--type_check_additional='])
   728      validator = PipelineOptionsValidator(options, runner)
   729      errors = validator.validate()
   730      self.assertFalse(errors)
   731  
   732    def test_type_check_additional_unrecognized_feature(self):
   733      runner = MockRunners.OtherRunner()
   734      options = PipelineOptions(['--type_check_additional=all,dfgdf'])
   735      validator = PipelineOptionsValidator(options, runner)
   736      errors = validator.validate()
   737      self.assertTrue(errors)
   738  
   739    def test_environment_options(self):
   740      test_cases = [
   741          {
   742              'options': ['--environment_type=dOcKeR'], 'errors': []
   743          },
   744          {
   745              'options': [
   746                  '--environment_type=dOcKeR',
   747                  '--environment_options=docker_container_image=foo'
   748              ],
   749              'errors': []
   750          },
   751          {
   752              'options': [
   753                  '--environment_type=dOcKeR', '--environment_config=foo'
   754              ],
   755              'errors': []
   756          },
   757          {
   758              'options': [
   759                  '--environment_type=dOcKeR',
   760                  '--environment_options=docker_container_image=foo',
   761                  '--environment_config=foo'
   762              ],
   763              'errors': ['environment_config']
   764          },
   765          {
   766              'options': [
   767                  '--environment_type=dOcKeR',
   768                  '--environment_options=process_command=foo',
   769                  '--environment_options=process_variables=foo=bar',
   770                  '--environment_options=external_service_address=foo'
   771              ],
   772              'errors': [
   773                  'process_command',
   774                  'process_variables',
   775                  'external_service_address'
   776              ]
   777          },
   778          {
   779              'options': ['--environment_type=pRoCeSs'],
   780              'errors': ['process_command']
   781          },
   782          {
   783              'options': [
   784                  '--environment_type=pRoCeSs',
   785                  '--environment_options=process_command=foo'
   786              ],
   787              'errors': []
   788          },
   789          {
   790              'options': [
   791                  '--environment_type=pRoCeSs', '--environment_config=foo'
   792              ],
   793              'errors': []
   794          },
   795          {
   796              'options': [
   797                  '--environment_type=pRoCeSs',
   798                  '--environment_options=process_command=foo',
   799                  '--environment_config=foo'
   800              ],
   801              'errors': ['environment_config']
   802          },
   803          {
   804              'options': [
   805                  '--environment_type=pRoCeSs',
   806                  '--environment_options=process_command=foo',
   807                  '--environment_options=process_variables=foo=bar',
   808                  '--environment_options=docker_container_image=foo',
   809                  '--environment_options=external_service_address=foo'
   810              ],
   811              'errors': ['docker_container_image', 'external_service_address']
   812          },
   813          {
   814              'options': ['--environment_type=eXtErNaL'],
   815              'errors': ['external_service_address']
   816          },
   817          {
   818              'options': [
   819                  '--environment_type=eXtErNaL',
   820                  '--environment_options=external_service_address=foo'
   821              ],
   822              'errors': []
   823          },
   824          {
   825              'options': [
   826                  '--environment_type=eXtErNaL', '--environment_config=foo'
   827              ],
   828              'errors': []
   829          },
   830          {
   831              'options': [
   832                  '--environment_type=eXtErNaL',
   833                  '--environment_options=external_service_address=foo',
   834                  '--environment_config=foo'
   835              ],
   836              'errors': ['environment_config']
   837          },
   838          {
   839              'options': [
   840                  '--environment_type=eXtErNaL',
   841                  '--environment_options=external_service_address=foo',
   842                  '--environment_options=process_command=foo',
   843                  '--environment_options=process_variables=foo=bar',
   844                  '--environment_options=docker_container_image=foo',
   845              ],
   846              'errors': [
   847                  'process_command',
   848                  'process_variables',
   849                  'docker_container_image'
   850              ]
   851          },
   852          {
   853              'options': ['--environment_type=lOoPbACk'], 'errors': []
   854          },
   855          {
   856              'options': [
   857                  '--environment_type=lOoPbACk', '--environment_config=foo'
   858              ],
   859              'errors': ['environment_config']
   860          },
   861          {
   862              'options': [
   863                  '--environment_type=lOoPbACk',
   864                  '--environment_options=docker_container_image=foo',
   865                  '--environment_options=process_command=foo',
   866                  '--environment_options=process_variables=foo=bar',
   867                  '--environment_options=external_service_address=foo',
   868              ],
   869              'errors': [
   870                  'docker_container_image',
   871                  'process_command',
   872                  'process_variables',
   873                  'external_service_address'
   874              ]
   875          },
   876          {
   877              'options': ['--environment_type=beam:env:foo:v1'], 'errors': []
   878          },
   879          {
   880              'options': [
   881                  '--environment_type=beam:env:foo:v1',
   882                  '--environment_config=foo'
   883              ],
   884              'errors': []
   885          },
   886          {
   887              'options': [
   888                  '--environment_type=beam:env:foo:v1',
   889                  '--environment_options=docker_container_image=foo',
   890                  '--environment_options=process_command=foo',
   891                  '--environment_options=process_variables=foo=bar',
   892                  '--environment_options=external_service_address=foo',
   893              ],
   894              'errors': [
   895                  'docker_container_image',
   896                  'process_command',
   897                  'process_variables',
   898                  'external_service_address'
   899              ]
   900          },
   901          {
   902              'options': [
   903                  '--environment_options=docker_container_image=foo',
   904                  '--environment_options=process_command=foo',
   905                  '--environment_options=process_variables=foo=bar',
   906                  '--environment_options=external_service_address=foo',
   907              ],
   908              'errors': [
   909                  'docker_container_image',
   910                  'process_command',
   911                  'process_variables',
   912                  'external_service_address'
   913              ]
   914          },
   915      ]
   916      errors = []
   917      for case in test_cases:
   918        validator = PipelineOptionsValidator(
   919            PipelineOptions(case['options']), MockRunners.OtherRunner())
   920        validation_result = validator.validate()
   921        validation_errors = self.check_errors_for_arguments(
   922            validation_result, case['errors'])
   923        if validation_errors:
   924          errors.append(
   925              'Options "%s" had unexpected validation results: "%s"' %
   926              (' '.join(case['options']), ' '.join(validation_errors)))
   927      self.assertEqual(errors, [])
   928  
   929  
   930  if __name__ == '__main__':
   931    logging.getLogger().setLevel(logging.INFO)
   932    unittest.main()