github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/transforms/timeutil.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Timestamp utilities.""" 19 20 # pytype: skip-file 21 22 from abc import ABCMeta 23 from abc import abstractmethod 24 25 from apache_beam.portability.api import beam_runner_api_pb2 26 27 __all__ = [ 28 'TimeDomain', 29 ] 30 31 32 class TimeDomain(object): 33 """Time domain for streaming timers.""" 34 35 WATERMARK = 'WATERMARK' 36 REAL_TIME = 'REAL_TIME' 37 DEPENDENT_REAL_TIME = 'DEPENDENT_REAL_TIME' 38 39 _RUNNER_API_MAPPING = { 40 WATERMARK: beam_runner_api_pb2.TimeDomain.EVENT_TIME, 41 REAL_TIME: beam_runner_api_pb2.TimeDomain.PROCESSING_TIME, 42 } 43 44 @staticmethod 45 def from_string(domain): 46 if domain in (TimeDomain.WATERMARK, 47 TimeDomain.REAL_TIME, 48 TimeDomain.DEPENDENT_REAL_TIME): 49 return domain 50 raise ValueError('Unknown time domain: %s' % domain) 51 52 @staticmethod 53 def to_runner_api(domain): 54 return TimeDomain._RUNNER_API_MAPPING[domain] 55 56 @staticmethod 57 def is_event_time(domain): 58 return TimeDomain.from_string(domain) == TimeDomain.WATERMARK 59 60 61 class TimestampCombinerImpl(metaclass=ABCMeta): 62 """Implementation of TimestampCombiner.""" 63 @abstractmethod 64 def assign_output_time(self, window, input_timestamp): 65 raise NotImplementedError 66 67 @abstractmethod 68 def combine(self, output_timestamp, other_output_timestamp): 69 raise NotImplementedError 70 71 def combine_all(self, merging_timestamps): 72 """Apply combine to list of timestamps.""" 73 combined_output_time = None 74 for output_time in merging_timestamps: 75 if combined_output_time is None: 76 combined_output_time = output_time 77 elif output_time is not None: 78 combined_output_time = self.combine(combined_output_time, output_time) 79 return combined_output_time 80 81 def merge(self, unused_result_window, merging_timestamps): 82 """Default to returning the result of combine_all.""" 83 return self.combine_all(merging_timestamps) 84 85 86 class DependsOnlyOnWindow(TimestampCombinerImpl, metaclass=ABCMeta): 87 """TimestampCombinerImpl that only depends on the window.""" 88 def merge(self, result_window, unused_merging_timestamps): 89 # Since we know that the result only depends on the window, we can ignore 90 # the given timestamps. 91 return self.assign_output_time(result_window, None) 92 93 94 class OutputAtEarliestInputTimestampImpl(TimestampCombinerImpl): 95 """TimestampCombinerImpl outputting at earliest input timestamp.""" 96 def assign_output_time(self, window, input_timestamp): 97 return input_timestamp 98 99 def combine(self, output_timestamp, other_output_timestamp): 100 """Default to returning the earlier of two timestamps.""" 101 return min(output_timestamp, other_output_timestamp) 102 103 104 class OutputAtEarliestTransformedInputTimestampImpl(TimestampCombinerImpl): 105 """TimestampCombinerImpl outputting at earliest input timestamp.""" 106 def __init__(self, window_fn): 107 self.window_fn = window_fn 108 109 def assign_output_time(self, window, input_timestamp): 110 return self.window_fn.get_transformed_output_time(window, input_timestamp) 111 112 def combine(self, output_timestamp, other_output_timestamp): 113 return min(output_timestamp, other_output_timestamp) 114 115 116 class OutputAtLatestInputTimestampImpl(TimestampCombinerImpl): 117 """TimestampCombinerImpl outputting at latest input timestamp.""" 118 def assign_output_time(self, window, input_timestamp): 119 return input_timestamp 120 121 def combine(self, output_timestamp, other_output_timestamp): 122 return max(output_timestamp, other_output_timestamp) 123 124 125 class OutputAtEndOfWindowImpl(DependsOnlyOnWindow): 126 """TimestampCombinerImpl outputting at end of window.""" 127 def assign_output_time(self, window, unused_input_timestamp): 128 return window.max_timestamp() 129 130 def combine(self, output_timestamp, other_output_timestamp): 131 return max(output_timestamp, other_output_timestamp)