github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/interactive/interactive_beam_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Tests for apache_beam.runners.interactive.interactive_beam."""
    19  # pytype: skip-file
    20  
    21  import dataclasses
    22  import importlib
    23  import sys
    24  import time
    25  import unittest
    26  from typing import NamedTuple
    27  from unittest.mock import patch
    28  
    29  import apache_beam as beam
    30  from apache_beam import dataframe as frames
    31  from apache_beam.options.pipeline_options import FlinkRunnerOptions
    32  from apache_beam.options.pipeline_options import PipelineOptions
    33  from apache_beam.runners.interactive import interactive_beam as ib
    34  from apache_beam.runners.interactive import interactive_environment as ie
    35  from apache_beam.runners.interactive import interactive_runner as ir
    36  from apache_beam.runners.interactive.dataproc.dataproc_cluster_manager import DataprocClusterManager
    37  from apache_beam.runners.interactive.dataproc.types import ClusterMetadata
    38  from apache_beam.runners.interactive.options.capture_limiters import Limiter
    39  from apache_beam.runners.interactive.testing.mock_env import isolated_env
    40  from apache_beam.runners.runner import PipelineState
    41  from apache_beam.testing.test_stream import TestStream
    42  
    43  
    44  @dataclasses.dataclass
    45  class MockClusterMetadata:
    46    master_url = 'mock_url'
    47  
    48  
    49  class Record(NamedTuple):
    50    order_id: int
    51    product_id: int
    52    quantity: int
    53  
    54  
    55  # The module name is also a variable in module.
    56  _module_name = 'apache_beam.runners.interactive.interactive_beam_test'
    57  
    58  
    59  def _get_watched_pcollections_with_variable_names():
    60    watched_pcollections = {}
    61    for watching in ie.current_env().watching():
    62      for key, val in watching:
    63        if hasattr(val, '__class__') and isinstance(val, beam.pvalue.PCollection):
    64          watched_pcollections[val] = key
    65    return watched_pcollections
    66  
    67  
    68  @isolated_env
    69  class InteractiveBeamTest(unittest.TestCase):
    70    def setUp(self):
    71      self._var_in_class_instance = 'a var in class instance, not directly used'
    72  
    73    def tearDown(self):
    74      ib.options.capture_control.set_limiters_for_test([])
    75  
    76    def test_watch_main_by_default(self):
    77      test_env = ie.InteractiveEnvironment()
    78      # Current Interactive Beam env fetched and the test env are 2 instances.
    79      self.assertNotEqual(id(ie.current_env()), id(test_env))
    80      self.assertEqual(ie.current_env().watching(), test_env.watching())
    81  
    82    def test_watch_a_module_by_name(self):
    83      test_env = ie.InteractiveEnvironment()
    84      ib.watch(_module_name)
    85      test_env.watch(_module_name)
    86      self.assertEqual(ie.current_env().watching(), test_env.watching())
    87  
    88    def test_watch_a_module_by_module_object(self):
    89      test_env = ie.InteractiveEnvironment()
    90      module = importlib.import_module(_module_name)
    91      ib.watch(module)
    92      test_env.watch(module)
    93      self.assertEqual(ie.current_env().watching(), test_env.watching())
    94  
    95    def test_watch_locals(self):
    96      # test_env serves as local var too.
    97      test_env = ie.InteractiveEnvironment()
    98      ib.watch(locals())
    99      test_env.watch(locals())
   100      self.assertEqual(ie.current_env().watching(), test_env.watching())
   101  
   102    def test_watch_class_instance(self):
   103      test_env = ie.InteractiveEnvironment()
   104      ib.watch(self)
   105      test_env.watch(self)
   106      self.assertEqual(ie.current_env().watching(), test_env.watching())
   107  
   108    @unittest.skipIf(sys.platform == "win32", "[BEAM-10627]")
   109    def test_show_always_watch_given_pcolls(self):
   110      p = beam.Pipeline(ir.InteractiveRunner())
   111      # pylint: disable=bad-option-value
   112      pcoll = p | 'Create' >> beam.Create(range(10))
   113      # The pcoll is not watched since watch(locals()) is not explicitly called.
   114      self.assertFalse(pcoll in _get_watched_pcollections_with_variable_names())
   115      # The call of show watches pcoll.
   116      ib.watch({'p': p})
   117      ie.current_env().track_user_pipelines()
   118      ib.show(pcoll)
   119      self.assertTrue(pcoll in _get_watched_pcollections_with_variable_names())
   120  
   121    @unittest.skipIf(sys.platform == "win32", "[BEAM-10627]")
   122    def test_show_mark_pcolls_computed_when_done(self):
   123      p = beam.Pipeline(ir.InteractiveRunner())
   124      # pylint: disable=bad-option-value
   125      pcoll = p | 'Create' >> beam.Create(range(10))
   126      self.assertFalse(pcoll in ie.current_env().computed_pcollections)
   127      # The call of show marks pcoll computed.
   128      ib.watch(locals())
   129      ie.current_env().track_user_pipelines()
   130      ib.show(pcoll)
   131      self.assertTrue(pcoll in ie.current_env().computed_pcollections)
   132  
   133    @patch((
   134        'apache_beam.runners.interactive.interactive_beam.'
   135        'visualize_computed_pcoll'))
   136    def test_show_handles_dict_of_pcolls(self, mocked_visualize):
   137      p = beam.Pipeline(ir.InteractiveRunner())
   138      # pylint: disable=bad-option-value
   139      pcoll = p | 'Create' >> beam.Create(range(10))
   140      ib.watch(locals())
   141      ie.current_env().track_user_pipelines()
   142      ie.current_env().mark_pcollection_computed([pcoll])
   143      ie.current_env()._is_in_ipython = True
   144      ie.current_env()._is_in_notebook = True
   145      ib.show({'pcoll': pcoll})
   146      mocked_visualize.assert_called_once()
   147  
   148    @patch((
   149        'apache_beam.runners.interactive.interactive_beam.'
   150        'visualize_computed_pcoll'))
   151    def test_show_handles_iterable_of_pcolls(self, mocked_visualize):
   152      p = beam.Pipeline(ir.InteractiveRunner())
   153      # pylint: disable=bad-option-value
   154      pcoll = p | 'Create' >> beam.Create(range(10))
   155      ib.watch(locals())
   156      ie.current_env().track_user_pipelines()
   157      ie.current_env().mark_pcollection_computed([pcoll])
   158      ie.current_env()._is_in_ipython = True
   159      ie.current_env()._is_in_notebook = True
   160      ib.show([pcoll])
   161      mocked_visualize.assert_called_once()
   162  
   163    @patch('apache_beam.runners.interactive.interactive_beam.visualize')
   164    def test_show_handles_deferred_dataframes(self, mocked_visualize):
   165      p = beam.Pipeline(ir.InteractiveRunner())
   166  
   167      deferred = frames.convert.to_dataframe(p | beam.Create([Record(0, 0, 0)]))
   168  
   169      ib.watch(locals())
   170      ie.current_env().track_user_pipelines()
   171      ie.current_env()._is_in_ipython = True
   172      ie.current_env()._is_in_notebook = True
   173      ib.show(deferred)
   174      mocked_visualize.assert_called_once()
   175  
   176    @patch((
   177        'apache_beam.runners.interactive.interactive_beam.'
   178        'visualize_computed_pcoll'))
   179    def test_show_noop_when_pcoll_container_is_invalid(self, mocked_visualize):
   180      class SomeRandomClass:
   181        def __init__(self, pcoll):
   182          self._pcoll = pcoll
   183  
   184      p = beam.Pipeline(ir.InteractiveRunner())
   185      # pylint: disable=bad-option-value
   186      pcoll = p | 'Create' >> beam.Create(range(10))
   187      ie.current_env().mark_pcollection_computed([pcoll])
   188      ie.current_env()._is_in_ipython = True
   189      ie.current_env()._is_in_notebook = True
   190      self.assertRaises(ValueError, ib.show, SomeRandomClass(pcoll))
   191      mocked_visualize.assert_not_called()
   192  
   193    def test_recordings_describe(self):
   194      """Tests that getting the description works."""
   195  
   196      # Create the pipelines to test.
   197      p1 = beam.Pipeline(ir.InteractiveRunner())
   198      p2 = beam.Pipeline(ir.InteractiveRunner())
   199  
   200      ib.watch(locals())
   201  
   202      # Get the descriptions. This test is simple as there isn't much logic in the
   203      # method.
   204      self.assertEqual(ib.recordings.describe(p1)['size'], 0)
   205      self.assertEqual(ib.recordings.describe(p2)['size'], 0)
   206  
   207      all_descriptions = ib.recordings.describe()
   208      self.assertEqual(all_descriptions[p1]['size'], 0)
   209      self.assertEqual(all_descriptions[p2]['size'], 0)
   210  
   211      # Ensure that the variable name for the pipeline is set correctly.
   212      self.assertEqual(all_descriptions[p1]['pipeline_var'], 'p1')
   213      self.assertEqual(all_descriptions[p2]['pipeline_var'], 'p2')
   214  
   215    def test_recordings_clear(self):
   216      """Tests that clearing the pipeline is correctly forwarded."""
   217  
   218      # Create a basic pipeline to store something in the cache.
   219      p = beam.Pipeline(ir.InteractiveRunner())
   220      elem = p | beam.Create([1])
   221      ib.watch(locals())
   222      ie.current_env().track_user_pipelines()
   223  
   224      # This records the pipeline so that the cache size is > 0.
   225      ib.collect(elem)
   226      self.assertGreater(ib.recordings.describe(p)['size'], 0)
   227  
   228      # After clearing, the cache should be empty.
   229      ib.recordings.clear(p)
   230      self.assertEqual(ib.recordings.describe(p)['size'], 0)
   231  
   232    def test_recordings_record(self):
   233      """Tests that recording pipeline succeeds."""
   234  
   235      # Add the TestStream so that it can be cached.
   236      ib.options.recordable_sources.add(TestStream)
   237  
   238      # Create a pipeline with an arbitrary amonunt of elements.
   239      p = beam.Pipeline(
   240          ir.InteractiveRunner(), options=PipelineOptions(streaming=True))
   241      # pylint: disable=unused-variable
   242      _ = (p
   243           | TestStream()
   244               .advance_watermark_to(0)
   245               .advance_processing_time(1)
   246               .add_elements(list(range(10)))
   247               .advance_processing_time(1))  # yapf: disable
   248      ib.watch(locals())
   249      ie.current_env().track_user_pipelines()
   250  
   251      # Assert that the pipeline starts in a good state.
   252      self.assertEqual(ib.recordings.describe(p)['state'], PipelineState.STOPPED)
   253      self.assertEqual(ib.recordings.describe(p)['size'], 0)
   254  
   255      # Create a lmiter that stops the background caching job when something is
   256      # written to cache. This is used to make ensure that the pipeline is
   257      # functioning properly and that there are no data races with the test.
   258      class SizeLimiter(Limiter):
   259        def __init__(self, pipeline):
   260          self.pipeline = pipeline
   261          self.should_trigger = False
   262  
   263        def is_triggered(self):
   264          return (
   265              ib.recordings.describe(self.pipeline)['size'] > 0 and
   266              self.should_trigger)
   267  
   268      limiter = SizeLimiter(p)
   269      ib.options.capture_control.set_limiters_for_test([limiter])
   270  
   271      # Assert that a recording can be started only once.
   272      self.assertTrue(ib.recordings.record(p))
   273      self.assertFalse(ib.recordings.record(p))
   274      self.assertEqual(ib.recordings.describe(p)['state'], PipelineState.RUNNING)
   275  
   276      # Wait for the pipeline to start and write something to cache.
   277      limiter.should_trigger = True
   278      for _ in range(60):
   279        if limiter.is_triggered():
   280          break
   281        time.sleep(1)
   282      self.assertTrue(
   283          limiter.is_triggered(),
   284          'Test timed out waiting for limiter to be triggered. This indicates '
   285          'that the BackgroundCachingJob did not cache anything.')
   286  
   287      # Assert that a recording can be stopped and can't be started again until
   288      # after the cache is cleared.
   289      ib.recordings.stop(p)
   290      self.assertEqual(ib.recordings.describe(p)['state'], PipelineState.STOPPED)
   291      self.assertFalse(ib.recordings.record(p))
   292      ib.recordings.clear(p)
   293      self.assertTrue(ib.recordings.record(p))
   294      ib.recordings.stop(p)
   295  
   296  
   297  @unittest.skipIf(
   298      not ie.current_env().is_interactive_ready,
   299      '[interactive] dependency is not installed.')
   300  @isolated_env
   301  class InteractiveBeamClustersTest(unittest.TestCase):
   302    def setUp(self):
   303      self.current_env.options.cache_root = 'gs://fake'
   304      self.clusters = self.current_env.clusters
   305  
   306    def tearDown(self):
   307      self.current_env.options.cache_root = None
   308  
   309    def test_cluster_metadata_pass_through_metadata(self):
   310      cid = ClusterMetadata(project_id='test-project')
   311      meta = self.clusters.cluster_metadata(cid)
   312      self.assertIs(meta, cid)
   313  
   314    def test_cluster_metadata_identifies_pipeline(self):
   315      cid = beam.Pipeline()
   316      known_meta = ClusterMetadata(project_id='test-project')
   317      dcm = DataprocClusterManager(known_meta)
   318      self.clusters.pipelines[cid] = dcm
   319  
   320      meta = self.clusters.cluster_metadata(cid)
   321      self.assertIs(meta, known_meta)
   322  
   323    def test_cluster_metadata_identifies_master_url(self):
   324      cid = 'test-url'
   325      known_meta = ClusterMetadata(project_id='test-project')
   326      _ = DataprocClusterManager(known_meta)
   327      self.clusters.master_urls[cid] = known_meta
   328  
   329      meta = self.clusters.cluster_metadata(cid)
   330      self.assertIs(meta, known_meta)
   331  
   332    def test_cluster_metadata_default_value(self):
   333      cid_none = None
   334      cid_unknown_p = beam.Pipeline()
   335      cid_unknown_master_url = 'test-url'
   336      default_meta = ClusterMetadata(project_id='test-project')
   337      self.clusters.set_default_cluster(default_meta)
   338  
   339      self.assertIs(default_meta, self.clusters.cluster_metadata(cid_none))
   340      self.assertIs(default_meta, self.clusters.cluster_metadata(cid_unknown_p))
   341      self.assertIs(
   342          default_meta, self.clusters.cluster_metadata(cid_unknown_master_url))
   343  
   344    def test_create_a_new_cluster(self):
   345      meta = ClusterMetadata(project_id='test-project')
   346      _ = self.clusters.create(meta)
   347  
   348      # Derived fields are populated.
   349      self.assertTrue(meta.master_url.startswith('test-url'))
   350      self.assertEqual(meta.dashboard, 'test-dashboard')
   351      # The cluster is known.
   352      self.assertIn(meta, self.clusters.dataproc_cluster_managers)
   353      self.assertIn(meta.master_url, self.clusters.master_urls)
   354      # The default cluster is updated to the created cluster.
   355      self.assertIs(meta, self.clusters.default_cluster_metadata)
   356  
   357    def test_create_but_reuse_a_known_cluster(self):
   358      known_meta = ClusterMetadata(
   359          project_id='test-project', region='test-region')
   360      known_dcm = DataprocClusterManager(known_meta)
   361      known_meta.master_url = 'test-url'
   362      self.clusters.set_default_cluster(known_meta)
   363      self.clusters.dataproc_cluster_managers[known_meta] = known_dcm
   364      self.clusters.master_urls[known_meta.master_url] = known_meta
   365  
   366      # Use an equivalent meta as the identifier to create a cluster.
   367      cid_meta = ClusterMetadata(
   368          project_id=known_meta.project_id,
   369          region=known_meta.region,
   370          cluster_name=known_meta.cluster_name)
   371      dcm = self.clusters.create(cid_meta)
   372      # The known cluster manager is returned.
   373      self.assertIs(dcm, known_dcm)
   374  
   375      # Then use an equivalent master_url as the identifier.
   376      cid_master_url = known_meta.master_url
   377      dcm = self.clusters.create(cid_master_url)
   378      self.assertIs(dcm, known_dcm)
   379  
   380    def test_cleanup_by_a_pipeline(self):
   381      meta = ClusterMetadata(project_id='test-project')
   382      dcm = self.clusters.create(meta)
   383  
   384      # Set up the association between a pipeline and a cluster.
   385      # In real code, it's set by the runner the 1st time a pipeline is executed.
   386      options = PipelineOptions()
   387      options.view_as(FlinkRunnerOptions).flink_master = meta.master_url
   388      p = beam.Pipeline(options=options)
   389      self.clusters.pipelines[p] = dcm
   390      dcm.pipelines.add(p)
   391  
   392      self.clusters.cleanup(p)
   393      # Delete the cluster.
   394      self.m_delete_cluster.assert_called_once()
   395      # Pipeline association is cleaned up.
   396      self.assertNotIn(p, self.clusters.pipelines)
   397      self.assertNotIn(p, dcm.pipelines)
   398      self.assertEqual(options.view_as(FlinkRunnerOptions).flink_master, '[auto]')
   399      # The cluster is unknown now.
   400      self.assertNotIn(meta, self.clusters.dataproc_cluster_managers)
   401      self.assertNotIn(meta.master_url, self.clusters.master_urls)
   402      # The cleaned up cluster is also the default cluster. Clean the default.
   403      self.assertIsNone(self.clusters.default_cluster_metadata)
   404  
   405    def test_not_cleanup_if_multiple_pipelines_share_a_manager(self):
   406      meta = ClusterMetadata(project_id='test-project')
   407      dcm = self.clusters.create(meta)
   408  
   409      options = PipelineOptions()
   410      options.view_as(FlinkRunnerOptions).flink_master = meta.master_url
   411      options2 = PipelineOptions()
   412      options2.view_as(FlinkRunnerOptions).flink_master = meta.master_url
   413      p = beam.Pipeline(options=options)
   414      p2 = beam.Pipeline(options=options2)
   415      self.clusters.pipelines[p] = dcm
   416      self.clusters.pipelines[p2] = dcm
   417      dcm.pipelines.add(p)
   418      dcm.pipelines.add(p2)
   419  
   420      self.clusters.cleanup(p)
   421      # No cluster deleted.
   422      self.m_delete_cluster.assert_not_called()
   423      # Pipeline association of p is cleaned up.
   424      self.assertNotIn(p, self.clusters.pipelines)
   425      self.assertNotIn(p, dcm.pipelines)
   426      self.assertEqual(options.view_as(FlinkRunnerOptions).flink_master, '[auto]')
   427      # Pipeline association of p2 still presents.
   428      self.assertIn(p2, self.clusters.pipelines)
   429      self.assertIn(p2, dcm.pipelines)
   430      self.assertEqual(
   431          options2.view_as(FlinkRunnerOptions).flink_master, meta.master_url)
   432      # The cluster is still known.
   433      self.assertIn(meta, self.clusters.dataproc_cluster_managers)
   434      self.assertIn(meta.master_url, self.clusters.master_urls)
   435      # The default cluster still presents.
   436      self.assertIs(meta, self.clusters.default_cluster_metadata)
   437  
   438    def test_cleanup_by_a_master_url(self):
   439      meta = ClusterMetadata(project_id='test-project')
   440      _ = self.clusters.create(meta)
   441  
   442      self.clusters.cleanup(meta.master_url)
   443      self.m_delete_cluster.assert_called_once()
   444      self.assertNotIn(meta, self.clusters.dataproc_cluster_managers)
   445      self.assertNotIn(meta.master_url, self.clusters.master_urls)
   446      self.assertIsNone(self.clusters.default_cluster_metadata)
   447  
   448    def test_cleanup_by_meta(self):
   449      known_meta = ClusterMetadata(
   450          project_id='test-project', region='test-region')
   451      _ = self.clusters.create(known_meta)
   452  
   453      meta = ClusterMetadata(
   454          project_id=known_meta.project_id,
   455          region=known_meta.region,
   456          cluster_name=known_meta.cluster_name)
   457      self.clusters.cleanup(meta)
   458      self.m_delete_cluster.assert_called_once()
   459      self.assertNotIn(known_meta, self.clusters.dataproc_cluster_managers)
   460      self.assertNotIn(known_meta.master_url, self.clusters.master_urls)
   461      self.assertIsNone(self.clusters.default_cluster_metadata)
   462  
   463    def test_force_cleanup_everything(self):
   464      meta = ClusterMetadata(project_id='test-project')
   465      meta2 = ClusterMetadata(project_id='test-project-2')
   466      _ = self.clusters.create(meta)
   467      _ = self.clusters.create(meta2)
   468  
   469      self.clusters.cleanup(force=True)
   470      self.assertEqual(self.m_delete_cluster.call_count, 2)
   471      self.assertNotIn(meta, self.clusters.dataproc_cluster_managers)
   472      self.assertNotIn(meta2, self.clusters.dataproc_cluster_managers)
   473      self.assertIsNone(self.clusters.default_cluster_metadata)
   474  
   475    def test_cleanup_noop_for_no_cluster_identifier(self):
   476      meta = ClusterMetadata(project_id='test-project')
   477      _ = self.clusters.create(meta)
   478  
   479      self.clusters.cleanup()
   480      self.m_delete_cluster.assert_not_called()
   481  
   482    def test_cleanup_noop_unknown_cluster(self):
   483      meta = ClusterMetadata(project_id='test-project')
   484      dcm = self.clusters.create(meta)
   485      p = beam.Pipeline()
   486      self.clusters.pipelines[p] = dcm
   487      dcm.pipelines.add(p)
   488  
   489      cid_pipeline = beam.Pipeline()
   490      self.clusters.cleanup(cid_pipeline)
   491      self.m_delete_cluster.assert_not_called()
   492  
   493      cid_master_url = 'some-random-url'
   494      self.clusters.cleanup(cid_master_url)
   495      self.m_delete_cluster.assert_not_called()
   496  
   497      cid_meta = ClusterMetadata(project_id='random-project')
   498      self.clusters.cleanup(cid_meta)
   499      self.m_delete_cluster.assert_not_called()
   500  
   501      self.assertIn(meta, self.clusters.dataproc_cluster_managers)
   502      self.assertIn(meta.master_url, self.clusters.master_urls)
   503      self.assertIs(meta, self.clusters.default_cluster_metadata)
   504      self.assertIn(p, self.clusters.pipelines)
   505      self.assertIn(p, dcm.pipelines)
   506  
   507    def test_describe_everything(self):
   508      meta = ClusterMetadata(project_id='test-project')
   509      meta2 = ClusterMetadata(
   510          project_id='test-project', region='some-other-region')
   511      _ = self.clusters.create(meta)
   512      _ = self.clusters.create(meta2)
   513  
   514      meta_list = self.clusters.describe()
   515      self.assertEqual([meta, meta2], meta_list)
   516  
   517    def test_describe_by_cluster_identifier(self):
   518      known_meta = ClusterMetadata(project_id='test-project')
   519      known_meta2 = ClusterMetadata(
   520          project_id='test-project', region='some-other-region')
   521      dcm = self.clusters.create(known_meta)
   522      dcm2 = self.clusters.create(known_meta2)
   523      p = beam.Pipeline()
   524      p2 = beam.Pipeline()
   525      self.clusters.pipelines[p] = dcm
   526      dcm.pipelines.add(p)
   527      self.clusters.pipelines[p2] = dcm2
   528      dcm.pipelines.add(p2)
   529  
   530      cid_pipeline = p
   531      meta = self.clusters.describe(cid_pipeline)
   532      self.assertIs(meta, known_meta)
   533  
   534      cid_master_url = known_meta.master_url
   535      meta = self.clusters.describe(cid_master_url)
   536      self.assertIs(meta, known_meta)
   537  
   538      cid_meta = ClusterMetadata(
   539          project_id=known_meta.project_id,
   540          region=known_meta.region,
   541          cluster_name=known_meta.cluster_name)
   542      meta = self.clusters.describe(cid_meta)
   543      self.assertIs(meta, known_meta)
   544  
   545    def test_describe_everything_when_cluster_identifer_unknown(self):
   546      known_meta = ClusterMetadata(project_id='test-project')
   547      known_meta2 = ClusterMetadata(
   548          project_id='test-project', region='some-other-region')
   549      dcm = self.clusters.create(known_meta)
   550      dcm2 = self.clusters.create(known_meta2)
   551      p = beam.Pipeline()
   552      p2 = beam.Pipeline()
   553      self.clusters.pipelines[p] = dcm
   554      dcm.pipelines.add(p)
   555      self.clusters.pipelines[p2] = dcm2
   556      dcm.pipelines.add(p2)
   557  
   558      cid_pipeline = beam.Pipeline()
   559      meta_list = self.clusters.describe(cid_pipeline)
   560      self.assertEqual([known_meta, known_meta2], meta_list)
   561  
   562      cid_master_url = 'some-random-url'
   563      meta_list = self.clusters.describe(cid_master_url)
   564      self.assertEqual([known_meta, known_meta2], meta_list)
   565  
   566      cid_meta = ClusterMetadata(project_id='some-random-project')
   567      meta_list = self.clusters.describe(cid_meta)
   568      self.assertEqual([known_meta, known_meta2], meta_list)
   569  
   570    def test_default_value_for_invalid_worker_number(self):
   571      meta = ClusterMetadata(project_id='test-project', num_workers=1)
   572      self.clusters.create(meta)
   573  
   574      self.assertEqual(meta.num_workers, 2)
   575  
   576  
   577  if __name__ == '__main__':
   578    unittest.main()