github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/transforms/periodicsequence_it_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Integration tests for cross-language transform expansion.""" 19 20 # pytype: skip-file 21 22 import time 23 import unittest 24 25 import pytest 26 27 import apache_beam as beam 28 from apache_beam.options.pipeline_options import StandardOptions 29 from apache_beam.testing.test_pipeline import TestPipeline 30 from apache_beam.testing.util import assert_that 31 from apache_beam.testing.util import is_empty 32 from apache_beam.transforms import trigger 33 from apache_beam.transforms import window 34 from apache_beam.transforms.core import DoFn 35 from apache_beam.transforms.periodicsequence import PeriodicSequence 36 37 38 @unittest.skipIf( 39 not TestPipeline().get_pipeline_options().view_as( 40 StandardOptions).streaming, 41 "Watermark tests are only valid for streaming jobs.") 42 class PeriodicSequenceIT(unittest.TestCase): 43 def setUp(self): 44 self.test_pipeline = TestPipeline(is_integration_test=True) 45 46 @pytest.mark.it_postcommit 47 @pytest.mark.sickbay_direct 48 @pytest.mark.sickbay_spark 49 @pytest.mark.timeout( 50 1800) # Timeout after 30 minutes to give Dataflow some extra time 51 def test_periodicsequence_outputs_valid_watermarks_it(self): 52 """Tests periodic sequence with watermarks on dataflow. 53 For testing that watermarks are being correctly emitted, 54 we make sure that there's not a long gap between an element being 55 emitted and being correctly aggregated. 56 """ 57 class FindLongGaps(DoFn): 58 def process(self, element): 59 emitted_at, unused_count = element 60 processed_at = time.time() 61 if processed_at - emitted_at > 25: 62 yield ( 63 'Elements emitted took too long to process.', 64 emitted_at, 65 processed_at) 66 67 start_time = time.time() 68 # Run long enough for Dataflow to start up 69 duration_sec = 540 70 end_time = start_time + duration_sec 71 interval = 1 72 73 res = ( 74 self.test_pipeline 75 | 'ImpulseElement' >> beam.Create([(start_time, end_time, interval)]) 76 | 'ImpulseSeqGen' >> PeriodicSequence() 77 | 'MapToCurrentTime' >> beam.Map(lambda element: time.time()) 78 | 'window_into' >> beam.WindowInto( 79 window.FixedWindows(2), 80 accumulation_mode=trigger.AccumulationMode.DISCARDING) 81 | beam.combiners.Count.PerElement() 82 | beam.ParDo(FindLongGaps())) 83 assert_that(res, is_empty()) 84 85 self.test_pipeline.run().wait_until_finish() 86 87 88 if __name__ == '__main__': 89 unittest.main()