github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/transforms/timeutil.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Timestamp utilities."""
    19  
    20  # pytype: skip-file
    21  
    22  from abc import ABCMeta
    23  from abc import abstractmethod
    24  
    25  from apache_beam.portability.api import beam_runner_api_pb2
    26  
    27  __all__ = [
    28      'TimeDomain',
    29  ]
    30  
    31  
    32  class TimeDomain(object):
    33    """Time domain for streaming timers."""
    34  
    35    WATERMARK = 'WATERMARK'
    36    REAL_TIME = 'REAL_TIME'
    37    DEPENDENT_REAL_TIME = 'DEPENDENT_REAL_TIME'
    38  
    39    _RUNNER_API_MAPPING = {
    40        WATERMARK: beam_runner_api_pb2.TimeDomain.EVENT_TIME,
    41        REAL_TIME: beam_runner_api_pb2.TimeDomain.PROCESSING_TIME,
    42    }
    43  
    44    @staticmethod
    45    def from_string(domain):
    46      if domain in (TimeDomain.WATERMARK,
    47                    TimeDomain.REAL_TIME,
    48                    TimeDomain.DEPENDENT_REAL_TIME):
    49        return domain
    50      raise ValueError('Unknown time domain: %s' % domain)
    51  
    52    @staticmethod
    53    def to_runner_api(domain):
    54      return TimeDomain._RUNNER_API_MAPPING[domain]
    55  
    56    @staticmethod
    57    def is_event_time(domain):
    58      return TimeDomain.from_string(domain) == TimeDomain.WATERMARK
    59  
    60  
    61  class TimestampCombinerImpl(metaclass=ABCMeta):
    62    """Implementation of TimestampCombiner."""
    63    @abstractmethod
    64    def assign_output_time(self, window, input_timestamp):
    65      raise NotImplementedError
    66  
    67    @abstractmethod
    68    def combine(self, output_timestamp, other_output_timestamp):
    69      raise NotImplementedError
    70  
    71    def combine_all(self, merging_timestamps):
    72      """Apply combine to list of timestamps."""
    73      combined_output_time = None
    74      for output_time in merging_timestamps:
    75        if combined_output_time is None:
    76          combined_output_time = output_time
    77        elif output_time is not None:
    78          combined_output_time = self.combine(combined_output_time, output_time)
    79      return combined_output_time
    80  
    81    def merge(self, unused_result_window, merging_timestamps):
    82      """Default to returning the result of combine_all."""
    83      return self.combine_all(merging_timestamps)
    84  
    85  
    86  class DependsOnlyOnWindow(TimestampCombinerImpl, metaclass=ABCMeta):
    87    """TimestampCombinerImpl that only depends on the window."""
    88    def merge(self, result_window, unused_merging_timestamps):
    89      # Since we know that the result only depends on the window, we can ignore
    90      # the given timestamps.
    91      return self.assign_output_time(result_window, None)
    92  
    93  
    94  class OutputAtEarliestInputTimestampImpl(TimestampCombinerImpl):
    95    """TimestampCombinerImpl outputting at earliest input timestamp."""
    96    def assign_output_time(self, window, input_timestamp):
    97      return input_timestamp
    98  
    99    def combine(self, output_timestamp, other_output_timestamp):
   100      """Default to returning the earlier of two timestamps."""
   101      return min(output_timestamp, other_output_timestamp)
   102  
   103  
   104  class OutputAtEarliestTransformedInputTimestampImpl(TimestampCombinerImpl):
   105    """TimestampCombinerImpl outputting at earliest input timestamp."""
   106    def __init__(self, window_fn):
   107      self.window_fn = window_fn
   108  
   109    def assign_output_time(self, window, input_timestamp):
   110      return self.window_fn.get_transformed_output_time(window, input_timestamp)
   111  
   112    def combine(self, output_timestamp, other_output_timestamp):
   113      return min(output_timestamp, other_output_timestamp)
   114  
   115  
   116  class OutputAtLatestInputTimestampImpl(TimestampCombinerImpl):
   117    """TimestampCombinerImpl outputting at latest input timestamp."""
   118    def assign_output_time(self, window, input_timestamp):
   119      return input_timestamp
   120  
   121    def combine(self, output_timestamp, other_output_timestamp):
   122      return max(output_timestamp, other_output_timestamp)
   123  
   124  
   125  class OutputAtEndOfWindowImpl(DependsOnlyOnWindow):
   126    """TimestampCombinerImpl outputting at end of window."""
   127    def assign_output_time(self, window, unused_input_timestamp):
   128      return window.max_timestamp()
   129  
   130    def combine(self, output_timestamp, other_output_timestamp):
   131      return max(output_timestamp, other_output_timestamp)