github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/interactive/options/capture_control_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Tests for apache_beam.runners.interactive.options.capture_control."""
    19  
    20  # pytype: skip-file
    21  
    22  import unittest
    23  from unittest.mock import patch
    24  
    25  import apache_beam as beam
    26  from apache_beam import coders
    27  from apache_beam.portability.api import beam_interactive_api_pb2
    28  from apache_beam.portability.api import beam_runner_api_pb2
    29  from apache_beam.runners import runner
    30  from apache_beam.runners.interactive import background_caching_job as bcj
    31  from apache_beam.runners.interactive import interactive_beam as ib
    32  from apache_beam.runners.interactive import interactive_environment as ie
    33  from apache_beam.runners.interactive import interactive_runner
    34  from apache_beam.runners.interactive.caching.streaming_cache import StreamingCache
    35  from apache_beam.runners.interactive.options import capture_control
    36  from apache_beam.runners.interactive.options import capture_limiters
    37  from apache_beam.testing.test_stream_service import TestStreamServiceController
    38  
    39  
    40  def _build_an_empty_streaming_pipeline():
    41    from apache_beam.options.pipeline_options import PipelineOptions
    42    from apache_beam.options.pipeline_options import StandardOptions
    43    pipeline_options = PipelineOptions()
    44    pipeline_options.view_as(StandardOptions).streaming = True
    45    p = beam.Pipeline(
    46        interactive_runner.InteractiveRunner(), options=pipeline_options)
    47    ib.watch({'pipeline': p})
    48    return p
    49  
    50  
    51  def _fake_a_running_test_stream_service(pipeline):
    52    class FakeReader:
    53      def read_multiple(self):
    54        yield 1
    55  
    56    test_stream_service = TestStreamServiceController(FakeReader())
    57    test_stream_service.start()
    58    ie.current_env().set_test_stream_service_controller(
    59        pipeline, test_stream_service)
    60  
    61  
    62  @unittest.skipIf(
    63      not ie.current_env().is_interactive_ready,
    64      '[interactive] dependency is not installed.')
    65  class CaptureControlTest(unittest.TestCase):
    66    def setUp(self):
    67      ie.new_env()
    68  
    69    @patch(
    70        'apache_beam.runners.interactive.background_caching_job'
    71        '.BackgroundCachingJob.cancel')
    72    @patch(
    73        'apache_beam.testing.test_stream_service.TestStreamServiceController'
    74        '.stop')
    75    def test_capture_control_evict_captured_data(
    76        self,
    77        mocked_test_stream_service_stop,
    78        mocked_background_caching_job_cancel):
    79      p = _build_an_empty_streaming_pipeline()
    80      ie.current_env().track_user_pipelines()
    81      self.assertFalse(ie.current_env().tracked_user_pipelines == set())
    82  
    83      background_caching_job = bcj.BackgroundCachingJob(
    84          runner.PipelineResult(runner.PipelineState.RUNNING), limiters=[])
    85      ie.current_env().set_background_caching_job(p, background_caching_job)
    86  
    87      _fake_a_running_test_stream_service(p)
    88      # Fake the canceling state of the main job.
    89      background_caching_job._pipeline_result = runner.PipelineResult(
    90          runner.PipelineState.CANCELLING)
    91      self.assertIsNotNone(ie.current_env().get_test_stream_service_controller(p))
    92      ie.current_env().set_cached_source_signature(p, 'a signature')
    93      ie.current_env().mark_pcollection_computed(['fake_pcoll'])
    94      capture_control.evict_captured_data()
    95      mocked_background_caching_job_cancel.assert_called()
    96      mocked_test_stream_service_stop.assert_called_once()
    97      # Neither timer nor capture size limit is reached, thus, the cancelling
    98      # main job's background caching job is not considered as done.
    99      self.assertFalse(background_caching_job.is_done())
   100      self.assertIsNone(ie.current_env().get_test_stream_service_controller(p))
   101      self.assertTrue(ie.current_env().computed_pcollections == set())
   102      self.assertTrue(ie.current_env().get_cached_source_signature(p) == set())
   103  
   104    def test_capture_size_limit_not_reached_when_no_cache(self):
   105      self.assertEqual(len(ie.current_env()._cache_managers), 0)
   106      limiter = capture_limiters.SizeLimiter(1)
   107      self.assertFalse(limiter.is_triggered())
   108  
   109    def test_capture_size_limit_not_reached_when_no_file(self):
   110      cache = StreamingCache(cache_dir=None)
   111      self.assertFalse(cache.exists('my_label'))
   112      ie.current_env().set_cache_manager(cache, 'dummy pipeline')
   113  
   114      limiter = capture_limiters.SizeLimiter(1)
   115      self.assertFalse(limiter.is_triggered())
   116  
   117    def test_capture_size_limit_not_reached_when_file_size_under_limit(self):
   118      ib.options.capture_size_limit = 100
   119      cache = StreamingCache(cache_dir=None)
   120      # Build a sink object to track the label as a capture in the test.
   121      cache.sink(['my_label'], is_capture=True)
   122      cache.write([beam_interactive_api_pb2.TestStreamFileRecord()], 'my_label')
   123      self.assertTrue(cache.exists('my_label'))
   124      ie.current_env().set_cache_manager(cache, 'dummy pipeline')
   125  
   126      limiter = capture_limiters.SizeLimiter(ib.options.capture_size_limit)
   127      self.assertFalse(limiter.is_triggered())
   128  
   129    def test_capture_size_limit_reached_when_file_size_above_limit(self):
   130      ib.options.capture_size_limit = 1
   131      cache = StreamingCache(cache_dir=None)
   132      cache.sink(['my_label'], is_capture=True)
   133      cache.write([
   134          beam_interactive_api_pb2.TestStreamFileRecord(
   135              recorded_event=beam_runner_api_pb2.TestStreamPayload.Event(
   136                  element_event=beam_runner_api_pb2.TestStreamPayload.Event.
   137                  AddElements(
   138                      elements=[
   139                          beam_runner_api_pb2.TestStreamPayload.
   140                          TimestampedElement(
   141                              encoded_element=coders.FastPrimitivesCoder().encode(
   142                                  'a'),
   143                              timestamp=0)
   144                      ])))
   145      ],
   146                  'my_label')
   147      self.assertTrue(cache.exists('my_label'))
   148      p = _build_an_empty_streaming_pipeline()
   149      ie.current_env().set_cache_manager(cache, p)
   150  
   151      limiter = capture_limiters.SizeLimiter(1)
   152      self.assertTrue(limiter.is_triggered())
   153  
   154    def test_timer_terminates_capture_size_checker(self):
   155      p = _build_an_empty_streaming_pipeline()
   156  
   157      class FakeLimiter(capture_limiters.Limiter):
   158        def __init__(self):
   159          self.trigger = False
   160  
   161        def is_triggered(self):
   162          return self.trigger
   163  
   164      limiter = FakeLimiter()
   165      background_caching_job = bcj.BackgroundCachingJob(
   166          runner.PipelineResult(runner.PipelineState.CANCELLING),
   167          limiters=[limiter])
   168      ie.current_env().set_background_caching_job(p, background_caching_job)
   169  
   170      self.assertFalse(background_caching_job.is_done())
   171  
   172      limiter.trigger = True
   173      self.assertTrue(background_caching_job.is_done())
   174  
   175  
   176  if __name__ == '__main__':
   177    unittest.main()