github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/interactive/display/pcoll_visualization_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Tests for apache_beam.runners.interactive.display.pcoll_visualization.""" 19 # pytype: skip-file 20 21 import unittest 22 from unittest.mock import ANY 23 from unittest.mock import PropertyMock 24 from unittest.mock import patch 25 26 import pytz 27 28 import apache_beam as beam 29 from apache_beam.runners import runner 30 from apache_beam.runners.interactive import interactive_beam as ib 31 from apache_beam.runners.interactive import interactive_environment as ie 32 from apache_beam.runners.interactive import interactive_runner as ir 33 from apache_beam.runners.interactive.display import pcoll_visualization as pv 34 from apache_beam.runners.interactive.recording_manager import RecordingManager 35 from apache_beam.runners.interactive.testing.mock_ipython import mock_get_ipython 36 from apache_beam.transforms.window import GlobalWindow 37 from apache_beam.transforms.window import IntervalWindow 38 from apache_beam.utils.windowed_value import PaneInfo 39 from apache_beam.utils.windowed_value import PaneInfoTiming 40 41 try: 42 import timeloop 43 except ImportError: 44 pass 45 46 47 @unittest.skipIf( 48 not ie.current_env().is_interactive_ready, 49 '[interactive] dependency is not installed.') 50 class PCollectionVisualizationTest(unittest.TestCase): 51 def setUp(self): 52 ie.new_env() 53 # Allow unit test to run outside of ipython kernel since we don't test the 54 # frontend rendering in unit tests. 55 pv._pcoll_visualization_ready = True 56 # Generally test the logic where notebook is connected to the assumed 57 # ipython kernel by forcefully setting notebook check to True. 58 ie.current_env()._is_in_notebook = True 59 ib.options.display_timezone = pytz.timezone('US/Pacific') 60 61 self._p = beam.Pipeline(ir.InteractiveRunner()) 62 # pylint: disable=bad-option-value 63 self._pcoll = self._p | 'Create' >> beam.Create(range(5)) 64 65 ib.watch(self) 66 ie.current_env().track_user_pipelines() 67 68 recording_manager = RecordingManager(self._p) 69 recording = recording_manager.record([self._pcoll], 5, 5) 70 self._stream = recording.stream(self._pcoll) 71 72 def test_pcoll_visualization_generate_unique_display_id(self): 73 pv_1 = pv.PCollectionVisualization(self._stream) 74 pv_2 = pv.PCollectionVisualization(self._stream) 75 self.assertNotEqual(pv_1._dive_display_id, pv_2._dive_display_id) 76 self.assertNotEqual(pv_1._overview_display_id, pv_2._overview_display_id) 77 self.assertNotEqual(pv_1._df_display_id, pv_2._df_display_id) 78 79 @patch('IPython.get_ipython', new_callable=mock_get_ipython) 80 @patch( 81 'apache_beam.runners.interactive.interactive_environment' 82 '.InteractiveEnvironment.is_in_notebook', 83 new_callable=PropertyMock) 84 def test_one_shot_visualization_not_return_handle( 85 self, mocked_is_in_notebook, unused): 86 mocked_is_in_notebook.return_value = True 87 self.assertIsNone(pv.visualize(self._stream, display_facets=True)) 88 89 @patch('IPython.get_ipython', new_callable=mock_get_ipython) 90 @patch( 91 'apache_beam.runners.interactive.interactive_environment' 92 '.InteractiveEnvironment.is_in_notebook', 93 new_callable=PropertyMock) 94 def test_dynamic_plotting_return_handle(self, mocked_is_in_notebook, unused): 95 mocked_is_in_notebook.return_value = True 96 h = pv.visualize( 97 self._stream, dynamic_plotting_interval=1, display_facets=True) 98 self.assertIsInstance(h, timeloop.Timeloop) 99 h.stop() 100 101 @patch('IPython.get_ipython', new_callable=mock_get_ipython) 102 @patch( 103 'apache_beam.runners.interactive.interactive_environment' 104 '.InteractiveEnvironment.is_in_notebook', 105 new_callable=PropertyMock) 106 def test_no_dynamic_plotting_when_not_in_notebook( 107 self, mocked_is_in_notebook, unused): 108 mocked_is_in_notebook.return_value = False 109 h = pv.visualize( 110 self._stream, dynamic_plotting_interval=1, display_facets=True) 111 self.assertIsNone(h) 112 113 @patch( 114 'apache_beam.runners.interactive.display.pcoll_visualization' 115 '.PCollectionVisualization._display_dive') 116 @patch( 117 'apache_beam.runners.interactive.display.pcoll_visualization' 118 '.PCollectionVisualization._display_overview') 119 @patch( 120 'apache_beam.runners.interactive.display.pcoll_visualization' 121 '.PCollectionVisualization._display_dataframe') 122 def test_dynamic_plotting_updates_same_display( 123 self, 124 mocked_display_dataframe, 125 mocked_display_overview, 126 mocked_display_dive): 127 original_pcollection_visualization = pv.PCollectionVisualization( 128 self._stream, display_facets=True) 129 # Dynamic plotting always creates a new PCollectionVisualization. 130 new_pcollection_visualization = pv.PCollectionVisualization( 131 self._stream, display_facets=True) 132 # The display uses ANY data the moment display is invoked, and updates 133 # web elements with ids fetched from the given updating_pv. 134 new_pcollection_visualization.display( 135 updating_pv=original_pcollection_visualization) 136 mocked_display_dataframe.assert_called_once_with( 137 ANY, original_pcollection_visualization) 138 # Below assertions are still true without newer calls. 139 mocked_display_overview.assert_called_once_with( 140 ANY, original_pcollection_visualization) 141 mocked_display_dive.assert_called_once_with( 142 ANY, original_pcollection_visualization) 143 144 def test_auto_stop_dynamic_plotting_when_job_is_terminated(self): 145 fake_pipeline_result = runner.PipelineResult(runner.PipelineState.RUNNING) 146 ie.current_env().set_pipeline_result(self._p, fake_pipeline_result) 147 # When job is running, the dynamic plotting will not be stopped. 148 self.assertFalse(ie.current_env().is_terminated(self._p)) 149 150 fake_pipeline_result = runner.PipelineResult(runner.PipelineState.DONE) 151 ie.current_env().set_pipeline_result(self._p, fake_pipeline_result) 152 # When job is done, the dynamic plotting will be stopped. 153 self.assertTrue(ie.current_env().is_terminated(self._p)) 154 155 @patch('pandas.DataFrame.head') 156 def test_display_plain_text_when_kernel_has_no_frontend(self, _mocked_head): 157 # Resets the notebook check to False. 158 ie.current_env()._is_in_notebook = False 159 self.assertIsNone(pv.visualize(self._stream, display_facets=True)) 160 _mocked_head.assert_called_once() 161 162 def test_event_time_formatter(self): 163 # In microseconds: Monday, March 2, 2020 3:14:54 PM GMT-08:00 164 event_time_us = 1583190894000000 165 self.assertEqual( 166 '2020-03-02 15:14:54.000000-0800', 167 pv.event_time_formatter(event_time_us)) 168 169 def test_event_time_formatter_overflow_lower_bound(self): 170 # A relatively small negative event time, which could be valid in Beam but 171 # has no meaning when visualized. 172 event_time_us = -100000000000000000 173 self.assertEqual('Min Timestamp', pv.event_time_formatter(event_time_us)) 174 175 def test_event_time_formatter_overflow_upper_bound(self): 176 # A relatively large event time, which exceeds the upper bound of unix time 177 # Year 2038. It could mean infinite future in Beam but has no meaning 178 # when visualized. 179 # The value in test is supposed to be year 10000. 180 event_time_us = 253402300800000000 181 self.assertEqual('Max Timestamp', pv.event_time_formatter(event_time_us)) 182 183 def test_windows_formatter_global(self): 184 gw = GlobalWindow() 185 self.assertEqual(str(gw), pv.windows_formatter([gw])) 186 187 def test_windows_formatter_interval(self): 188 # The unit is second. 189 iw = IntervalWindow(start=1583190894, end=1583200000) 190 self.assertEqual( 191 '2020-03-02 15:14:54.000000-0800 (2h 31m 46s)', 192 pv.windows_formatter([iw])) 193 194 def test_pane_info_formatter(self): 195 self.assertEqual( 196 'Pane 0: Final Early', 197 pv.pane_info_formatter( 198 PaneInfo( 199 is_first=False, 200 is_last=True, 201 timing=PaneInfoTiming.EARLY, 202 index=0, 203 nonspeculative_index=0))) 204 205 206 if __name__ == '__main__': 207 unittest.main()