github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/interactive/options/capture_control_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Tests for apache_beam.runners.interactive.options.capture_control.""" 19 20 # pytype: skip-file 21 22 import unittest 23 from unittest.mock import patch 24 25 import apache_beam as beam 26 from apache_beam import coders 27 from apache_beam.portability.api import beam_interactive_api_pb2 28 from apache_beam.portability.api import beam_runner_api_pb2 29 from apache_beam.runners import runner 30 from apache_beam.runners.interactive import background_caching_job as bcj 31 from apache_beam.runners.interactive import interactive_beam as ib 32 from apache_beam.runners.interactive import interactive_environment as ie 33 from apache_beam.runners.interactive import interactive_runner 34 from apache_beam.runners.interactive.caching.streaming_cache import StreamingCache 35 from apache_beam.runners.interactive.options import capture_control 36 from apache_beam.runners.interactive.options import capture_limiters 37 from apache_beam.testing.test_stream_service import TestStreamServiceController 38 39 40 def _build_an_empty_streaming_pipeline(): 41 from apache_beam.options.pipeline_options import PipelineOptions 42 from apache_beam.options.pipeline_options import StandardOptions 43 pipeline_options = PipelineOptions() 44 pipeline_options.view_as(StandardOptions).streaming = True 45 p = beam.Pipeline( 46 interactive_runner.InteractiveRunner(), options=pipeline_options) 47 ib.watch({'pipeline': p}) 48 return p 49 50 51 def _fake_a_running_test_stream_service(pipeline): 52 class FakeReader: 53 def read_multiple(self): 54 yield 1 55 56 test_stream_service = TestStreamServiceController(FakeReader()) 57 test_stream_service.start() 58 ie.current_env().set_test_stream_service_controller( 59 pipeline, test_stream_service) 60 61 62 @unittest.skipIf( 63 not ie.current_env().is_interactive_ready, 64 '[interactive] dependency is not installed.') 65 class CaptureControlTest(unittest.TestCase): 66 def setUp(self): 67 ie.new_env() 68 69 @patch( 70 'apache_beam.runners.interactive.background_caching_job' 71 '.BackgroundCachingJob.cancel') 72 @patch( 73 'apache_beam.testing.test_stream_service.TestStreamServiceController' 74 '.stop') 75 def test_capture_control_evict_captured_data( 76 self, 77 mocked_test_stream_service_stop, 78 mocked_background_caching_job_cancel): 79 p = _build_an_empty_streaming_pipeline() 80 ie.current_env().track_user_pipelines() 81 self.assertFalse(ie.current_env().tracked_user_pipelines == set()) 82 83 background_caching_job = bcj.BackgroundCachingJob( 84 runner.PipelineResult(runner.PipelineState.RUNNING), limiters=[]) 85 ie.current_env().set_background_caching_job(p, background_caching_job) 86 87 _fake_a_running_test_stream_service(p) 88 # Fake the canceling state of the main job. 89 background_caching_job._pipeline_result = runner.PipelineResult( 90 runner.PipelineState.CANCELLING) 91 self.assertIsNotNone(ie.current_env().get_test_stream_service_controller(p)) 92 ie.current_env().set_cached_source_signature(p, 'a signature') 93 ie.current_env().mark_pcollection_computed(['fake_pcoll']) 94 capture_control.evict_captured_data() 95 mocked_background_caching_job_cancel.assert_called() 96 mocked_test_stream_service_stop.assert_called_once() 97 # Neither timer nor capture size limit is reached, thus, the cancelling 98 # main job's background caching job is not considered as done. 99 self.assertFalse(background_caching_job.is_done()) 100 self.assertIsNone(ie.current_env().get_test_stream_service_controller(p)) 101 self.assertTrue(ie.current_env().computed_pcollections == set()) 102 self.assertTrue(ie.current_env().get_cached_source_signature(p) == set()) 103 104 def test_capture_size_limit_not_reached_when_no_cache(self): 105 self.assertEqual(len(ie.current_env()._cache_managers), 0) 106 limiter = capture_limiters.SizeLimiter(1) 107 self.assertFalse(limiter.is_triggered()) 108 109 def test_capture_size_limit_not_reached_when_no_file(self): 110 cache = StreamingCache(cache_dir=None) 111 self.assertFalse(cache.exists('my_label')) 112 ie.current_env().set_cache_manager(cache, 'dummy pipeline') 113 114 limiter = capture_limiters.SizeLimiter(1) 115 self.assertFalse(limiter.is_triggered()) 116 117 def test_capture_size_limit_not_reached_when_file_size_under_limit(self): 118 ib.options.capture_size_limit = 100 119 cache = StreamingCache(cache_dir=None) 120 # Build a sink object to track the label as a capture in the test. 121 cache.sink(['my_label'], is_capture=True) 122 cache.write([beam_interactive_api_pb2.TestStreamFileRecord()], 'my_label') 123 self.assertTrue(cache.exists('my_label')) 124 ie.current_env().set_cache_manager(cache, 'dummy pipeline') 125 126 limiter = capture_limiters.SizeLimiter(ib.options.capture_size_limit) 127 self.assertFalse(limiter.is_triggered()) 128 129 def test_capture_size_limit_reached_when_file_size_above_limit(self): 130 ib.options.capture_size_limit = 1 131 cache = StreamingCache(cache_dir=None) 132 cache.sink(['my_label'], is_capture=True) 133 cache.write([ 134 beam_interactive_api_pb2.TestStreamFileRecord( 135 recorded_event=beam_runner_api_pb2.TestStreamPayload.Event( 136 element_event=beam_runner_api_pb2.TestStreamPayload.Event. 137 AddElements( 138 elements=[ 139 beam_runner_api_pb2.TestStreamPayload. 140 TimestampedElement( 141 encoded_element=coders.FastPrimitivesCoder().encode( 142 'a'), 143 timestamp=0) 144 ]))) 145 ], 146 'my_label') 147 self.assertTrue(cache.exists('my_label')) 148 p = _build_an_empty_streaming_pipeline() 149 ie.current_env().set_cache_manager(cache, p) 150 151 limiter = capture_limiters.SizeLimiter(1) 152 self.assertTrue(limiter.is_triggered()) 153 154 def test_timer_terminates_capture_size_checker(self): 155 p = _build_an_empty_streaming_pipeline() 156 157 class FakeLimiter(capture_limiters.Limiter): 158 def __init__(self): 159 self.trigger = False 160 161 def is_triggered(self): 162 return self.trigger 163 164 limiter = FakeLimiter() 165 background_caching_job = bcj.BackgroundCachingJob( 166 runner.PipelineResult(runner.PipelineState.CANCELLING), 167 limiters=[limiter]) 168 ie.current_env().set_background_caching_job(p, background_caching_job) 169 170 self.assertFalse(background_caching_job.is_done()) 171 172 limiter.trigger = True 173 self.assertTrue(background_caching_job.is_done()) 174 175 176 if __name__ == '__main__': 177 unittest.main()