github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/test_stream.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Provides TestStream for verifying streaming runner semantics.
    19  
    20  For internal use only; no backwards-compatibility guarantees.
    21  """
    22  # pytype: skip-file
    23  
    24  from abc import ABCMeta
    25  from abc import abstractmethod
    26  from enum import Enum
    27  from functools import total_ordering
    28  
    29  import apache_beam as beam
    30  from apache_beam import coders
    31  from apache_beam import pvalue
    32  from apache_beam.portability import common_urns
    33  from apache_beam.portability.api import beam_interactive_api_pb2
    34  from apache_beam.portability.api import beam_runner_api_pb2
    35  from apache_beam.portability.api import endpoints_pb2
    36  from apache_beam.transforms import PTransform
    37  from apache_beam.transforms import core
    38  from apache_beam.transforms import window
    39  from apache_beam.transforms.timeutil import TimeDomain
    40  from apache_beam.transforms.userstate import TimerSpec
    41  from apache_beam.transforms.userstate import on_timer
    42  from apache_beam.transforms.window import TimestampedValue
    43  from apache_beam.utils import timestamp
    44  from apache_beam.utils.timestamp import MIN_TIMESTAMP
    45  from apache_beam.utils.timestamp import Duration
    46  from apache_beam.utils.timestamp import Timestamp
    47  from apache_beam.utils.windowed_value import WindowedValue
    48  
    49  __all__ = [
    50      'Event',
    51      'ElementEvent',
    52      'WatermarkEvent',
    53      'ProcessingTimeEvent',
    54      'TestStream',
    55  ]
    56  
    57  
    58  @total_ordering
    59  class Event(metaclass=ABCMeta):  # type: ignore[misc]
    60    """Test stream event to be emitted during execution of a TestStream."""
    61    @abstractmethod
    62    def __eq__(self, other):
    63      raise NotImplementedError
    64  
    65    @abstractmethod
    66    def __hash__(self):
    67      raise NotImplementedError
    68  
    69    @abstractmethod
    70    def __lt__(self, other):
    71      raise NotImplementedError
    72  
    73    @abstractmethod
    74    def to_runner_api(self, element_coder):
    75      raise NotImplementedError
    76  
    77    @staticmethod
    78    def from_runner_api(proto, element_coder):
    79      if proto.HasField('element_event'):
    80        event = proto.element_event
    81        tag = None if event.tag == 'None' else event.tag
    82        return ElementEvent([
    83            TimestampedValue(
    84                element_coder.decode(tv.encoded_element),
    85                Timestamp(micros=1000 * tv.timestamp))
    86            for tv in proto.element_event.elements
    87        ], tag=tag) # yapf: disable
    88      elif proto.HasField('watermark_event'):
    89        event = proto.watermark_event
    90        tag = None if event.tag == 'None' else event.tag
    91        return WatermarkEvent(
    92            Timestamp(micros=1000 * proto.watermark_event.new_watermark), tag=tag)
    93      elif proto.HasField('processing_time_event'):
    94        return ProcessingTimeEvent(
    95            timestamp.Duration(
    96                micros=1000 * proto.processing_time_event.advance_duration))
    97      else:
    98        raise ValueError(
    99            'Unknown TestStream Event type: %s' % proto.WhichOneof('event'))
   100  
   101  
   102  class ElementEvent(Event):
   103    """Element-producing test stream event."""
   104    def __init__(self, timestamped_values, tag=None):
   105      self.timestamped_values = timestamped_values
   106      self.tag = tag
   107  
   108    def __eq__(self, other):
   109      if not isinstance(other, ElementEvent):
   110        return False
   111  
   112      return (
   113          self.timestamped_values == other.timestamped_values and
   114          self.tag == other.tag)
   115  
   116    def __hash__(self):
   117      return hash(self.timestamped_values)
   118  
   119    def __lt__(self, other):
   120      if not isinstance(other, ElementEvent):
   121        raise TypeError
   122  
   123      return self.timestamped_values < other.timestamped_values
   124  
   125    def to_runner_api(self, element_coder):
   126      tag = 'None' if self.tag is None else self.tag
   127      return beam_runner_api_pb2.TestStreamPayload.Event(
   128          element_event=beam_runner_api_pb2.TestStreamPayload.Event.AddElements(
   129              elements=[
   130                  beam_runner_api_pb2.TestStreamPayload.TimestampedElement(
   131                      encoded_element=element_coder.encode(tv.value),
   132                      timestamp=tv.timestamp.micros // 1000)
   133                  for tv in self.timestamped_values
   134              ],
   135              tag=tag))
   136  
   137    def __repr__(self):
   138      return 'ElementEvent: <{}, {}>'.format([(e.value, e.timestamp)
   139                                              for e in self.timestamped_values],
   140                                             self.tag)
   141  
   142  
   143  class WatermarkEvent(Event):
   144    """Watermark-advancing test stream event."""
   145    def __init__(self, new_watermark, tag=None):
   146      self.new_watermark = Timestamp.of(new_watermark)
   147      self.tag = tag
   148  
   149    def __eq__(self, other):
   150      if not isinstance(other, WatermarkEvent):
   151        return False
   152  
   153      return self.new_watermark == other.new_watermark and self.tag == other.tag
   154  
   155    def __hash__(self):
   156      return hash(str(self.new_watermark) + str(self.tag))
   157  
   158    def __lt__(self, other):
   159      if not isinstance(other, WatermarkEvent):
   160        raise TypeError
   161  
   162      return self.new_watermark < other.new_watermark
   163  
   164    def to_runner_api(self, unused_element_coder):
   165      tag = 'None' if self.tag is None else self.tag
   166  
   167      # Assert that no precision is lost.
   168      assert self.new_watermark.micros % 1000 == 0
   169      return beam_runner_api_pb2.TestStreamPayload.Event(
   170          watermark_event=beam_runner_api_pb2.TestStreamPayload.Event.
   171          AdvanceWatermark(
   172              new_watermark=self.new_watermark.micros // 1000, tag=tag))
   173  
   174    def __repr__(self):
   175      return 'WatermarkEvent: <{}, {}>'.format(self.new_watermark, self.tag)
   176  
   177  
   178  class ProcessingTimeEvent(Event):
   179    """Processing time-advancing test stream event."""
   180    def __init__(self, advance_by):
   181      self.advance_by = Duration.of(advance_by)
   182  
   183    def __eq__(self, other):
   184      if not isinstance(other, ProcessingTimeEvent):
   185        return False
   186  
   187      return self.advance_by == other.advance_by
   188  
   189    def __hash__(self):
   190      return hash(self.advance_by)
   191  
   192    def __lt__(self, other):
   193      if not isinstance(other, ProcessingTimeEvent):
   194        raise TypeError
   195  
   196      return self.advance_by < other.advance_by
   197  
   198    def to_runner_api(self, unused_element_coder):
   199      assert self.advance_by.micros % 1000 == 0
   200      return beam_runner_api_pb2.TestStreamPayload.Event(
   201          processing_time_event=beam_runner_api_pb2.TestStreamPayload.Event.
   202          AdvanceProcessingTime(advance_duration=self.advance_by.micros // 1000))
   203  
   204    def __repr__(self):
   205      return 'ProcessingTimeEvent: <{}>'.format(self.advance_by)
   206  
   207  
   208  class WindowedValueHolderMeta(type):
   209    """A metaclass that overrides the isinstance check for WindowedValueHolder.
   210  
   211    Python does a quick test for exact match. If an instance is exactly of
   212    type WindowedValueHolder, the overridden isinstance check is omitted.
   213    The override is needed because WindowedValueHolder elements encoded then
   214    decoded become Row elements.
   215    """
   216    def __instancecheck__(cls, other):
   217      """Checks if a beam.Row typed instance is a WindowedValueHolder.
   218      """
   219      return (
   220          isinstance(other, beam.Row) and hasattr(other, 'windowed_value') and
   221          hasattr(other, 'urn') and
   222          isinstance(other.windowed_value, WindowedValue) and
   223          other.urn == common_urns.coders.ROW.urn)
   224  
   225  
   226  class WindowedValueHolder(beam.Row, metaclass=WindowedValueHolderMeta):
   227    """A class that holds a WindowedValue.
   228  
   229    This is a special class that can be used by the runner that implements the
   230    TestStream as a signal that the underlying value should be unreified to the
   231    specified window.
   232    """
   233    # Register WindowedValueHolder to always use RowCoder.
   234    coders.registry.register_coder(WindowedValueHolderMeta, coders.RowCoder)
   235  
   236    def __init__(self, windowed_value):
   237      assert isinstance(windowed_value, WindowedValue), (
   238          'WindowedValueHolder can only hold %s type. Instead, %s is given.') % (
   239              WindowedValue, windowed_value)
   240      super().__init__(
   241          **{
   242              'windowed_value': windowed_value, 'urn': common_urns.coders.ROW.urn
   243          })
   244  
   245    @classmethod
   246    def from_row(cls, row):
   247      """Converts a beam.Row typed instance to WindowedValueHolder.
   248      """
   249      if isinstance(row, WindowedValueHolder):
   250        return WindowedValueHolder(row.windowed_value)
   251      assert isinstance(row, beam.Row), 'The given row %s must be a %s type' % (
   252          row, beam.Row)
   253      assert hasattr(row, 'windowed_value'), (
   254          'The given %s must have a windowed_value attribute.') % row
   255      assert isinstance(row.windowed_value, WindowedValue), (
   256          'The windowed_value attribute of %s must be a %s type') % (
   257              row, WindowedValue)
   258  
   259  
   260  class TestStream(PTransform):
   261    """Test stream that generates events on an unbounded PCollection of elements.
   262  
   263    Each event emits elements, advances the watermark or advances the processing
   264    time. After all of the specified elements are emitted, ceases to produce
   265    output.
   266  
   267    Applying the PTransform will return a single PCollection if only the default
   268    output or only one output tag has been used. Otherwise a dictionary of output
   269    names to PCollections will be returned.
   270    """
   271    def __init__(
   272        self,
   273        coder=coders.FastPrimitivesCoder(),
   274        events=None,
   275        output_tags=None,
   276        endpoint=None):
   277      """
   278      Args:
   279        coder: (apache_beam.Coder) the coder to encode/decode elements.
   280        events: (List[Event]) a list of instructions for the TestStream to
   281          execute. If specified, the events tags must exist in the output_tags.
   282        output_tags: (List[str]) Initial set of outputs. If no event references an
   283          output tag, no output will be produced for that tag.
   284        endpoint: (str) a URL locating a TestStreamService.
   285      """
   286  
   287      super().__init__()
   288      assert coder is not None
   289  
   290      self.coder = coder
   291      self.watermarks = {None: timestamp.MIN_TIMESTAMP}
   292      self.output_tags = set(output_tags) if output_tags else set()
   293      self._events = [] if events is None else list(events)
   294      self._endpoint = endpoint
   295  
   296      event_tags = set(
   297          e.tag for e in self._events
   298          if isinstance(e, (WatermarkEvent, ElementEvent)))
   299      assert event_tags.issubset(self.output_tags), \
   300          '{} is not a subset of {}'.format(event_tags, output_tags)
   301      assert not (self._events and self._endpoint), \
   302          'Only either events or an endpoint can be given at once.'
   303  
   304    def get_windowing(self, unused_inputs):
   305      return core.Windowing(window.GlobalWindows())
   306  
   307    def _infer_output_coder(self, input_type=None, input_coder=None):
   308      return self.coder
   309  
   310    def expand(self, pbegin):
   311      assert isinstance(pbegin, pvalue.PBegin)
   312      self.pipeline = pbegin.pipeline
   313      if not self.output_tags:
   314        self.output_tags = {None}
   315  
   316      # For backwards compatibility return a single PCollection.
   317      if self.output_tags == {None}:
   318        return pvalue.PCollection(
   319            self.pipeline, is_bounded=False, tag=list(self.output_tags)[0])
   320      return {
   321          tag: pvalue.PCollection(self.pipeline, is_bounded=False, tag=tag)
   322          for tag in self.output_tags
   323      }
   324  
   325    def _add(self, event):
   326      if isinstance(event, ElementEvent):
   327        for tv in event.timestamped_values:
   328          assert tv.timestamp < timestamp.MAX_TIMESTAMP, (
   329              'Element timestamp must be before timestamp.MAX_TIMESTAMP.')
   330      elif isinstance(event, WatermarkEvent):
   331        if event.tag not in self.watermarks:
   332          self.watermarks[event.tag] = timestamp.MIN_TIMESTAMP
   333        assert event.new_watermark > self.watermarks[event.tag], (
   334            'Watermark must strictly-monotonically advance.')
   335        self.watermarks[event.tag] = event.new_watermark
   336      elif isinstance(event, ProcessingTimeEvent):
   337        assert event.advance_by > 0, (
   338            'Must advance processing time by positive amount.')
   339      else:
   340        raise ValueError('Unknown event: %s' % event)
   341      self._events.append(event)
   342  
   343    def add_elements(self, elements, tag=None, event_timestamp=None):
   344      """Add elements to the TestStream.
   345  
   346      Elements added to the TestStream will be produced during pipeline execution.
   347      These elements can be TimestampedValue, WindowedValue or raw unwrapped
   348      elements that are serializable using the TestStream's specified Coder.  When
   349      a TimestampedValue or a WindowedValue element is used, the timestamp of the
   350      TimestampedValue or WindowedValue will be the timestamp of the produced
   351      element; otherwise, the current watermark timestamp will be used for that
   352      element.  The windows of a given WindowedValue are ignored by the
   353      TestStream.
   354      """
   355      self.output_tags.add(tag)
   356      timestamped_values = []
   357      if tag not in self.watermarks:
   358        self.watermarks[tag] = timestamp.MIN_TIMESTAMP
   359  
   360      for element in elements:
   361        if isinstance(element, TimestampedValue):
   362          timestamped_values.append(element)
   363        elif isinstance(element, WindowedValue):
   364          # Drop windows for elements in test stream.
   365          timestamped_values.append(
   366              TimestampedValue(element.value, element.timestamp))
   367        else:
   368          # Add elements with timestamp equal to current watermark.
   369          if event_timestamp is None:
   370            event_timestamp = self.watermarks[tag]
   371          timestamped_values.append(TimestampedValue(element, event_timestamp))
   372      self._add(ElementEvent(timestamped_values, tag))
   373      return self
   374  
   375    def advance_watermark_to(self, new_watermark, tag=None):
   376      """Advance the watermark to a given Unix timestamp.
   377  
   378      The Unix timestamp value used must be later than the previous watermark
   379      value and should be given as an int, float or utils.timestamp.Timestamp
   380      object.
   381      """
   382      self.output_tags.add(tag)
   383      self._add(WatermarkEvent(new_watermark, tag))
   384      return self
   385  
   386    def advance_watermark_to_infinity(self, tag=None):
   387      """Advance the watermark to the end of time, completing this TestStream."""
   388      self.advance_watermark_to(timestamp.MAX_TIMESTAMP, tag)
   389      return self
   390  
   391    def advance_processing_time(self, advance_by):
   392      """Advance the current processing time by a given duration in seconds.
   393  
   394      The duration must be a positive second duration and should be given as an
   395      int, float or utils.timestamp.Duration object.
   396      """
   397      self._add(ProcessingTimeEvent(advance_by))
   398      return self
   399  
   400    def to_runner_api_parameter(self, context):
   401      # Sort the output tags so that the order is deterministic and we are able
   402      # to test equality on a roundtrip through the to/from proto apis.
   403      return (
   404          common_urns.primitives.TEST_STREAM.urn,
   405          beam_runner_api_pb2.TestStreamPayload(
   406              coder_id=context.coders.get_id(self.coder),
   407              events=[e.to_runner_api(self.coder) for e in self._events],
   408              endpoint=endpoints_pb2.ApiServiceDescriptor(url=self._endpoint)))
   409  
   410    @staticmethod
   411    @PTransform.register_urn(
   412        common_urns.primitives.TEST_STREAM.urn,
   413        beam_runner_api_pb2.TestStreamPayload)
   414    def from_runner_api_parameter(ptransform, payload, context):
   415      coder = context.coders.get_by_id(payload.coder_id)
   416      output_tags = set(
   417          None if k == 'None' else k for k in ptransform.outputs.keys())
   418      return TestStream(
   419          coder=coder,
   420          events=[Event.from_runner_api(e, coder) for e in payload.events],
   421          output_tags=output_tags,
   422          endpoint=payload.endpoint.url)
   423  
   424  
   425  class TimingInfo(object):
   426    def __init__(self, processing_time, watermark):
   427      self._processing_time = Timestamp.of(processing_time)
   428      self._watermark = Timestamp.of(watermark)
   429  
   430    @property
   431    def processing_time(self):
   432      return self._processing_time
   433  
   434    @property
   435    def watermark(self):
   436      return self._watermark
   437  
   438    def __repr__(self):
   439      return '({}, {})'.format(self.processing_time, self.watermark)
   440  
   441  
   442  class PairWithTiming(PTransform):
   443    """Pairs the input element with timing information.
   444  
   445    Input: element; output: KV(element, timing information)
   446    Where timing information := (processing time, watermark)
   447  
   448    This is used in the ReverseTestStream implementation to replay watermark
   449    advancements.
   450    """
   451  
   452    URN = "beam:transform:pair_with_timing:v1"
   453  
   454    def expand(self, pcoll):
   455      return pvalue.PCollection.from_(pcoll)
   456  
   457  
   458  class OutputFormat(Enum):
   459    TEST_STREAM_EVENTS = 1
   460    TEST_STREAM_FILE_RECORDS = 2
   461    SERIALIZED_TEST_STREAM_FILE_RECORDS = 3
   462  
   463  
   464  class ReverseTestStream(PTransform):
   465    """A Transform that can create TestStream events from a stream of elements.
   466  
   467    This currently assumes that this the pipeline being run on a single machine
   468    and elements come in order and are outputted in the same order that they came
   469    in.
   470    """
   471    def __init__(
   472        self, sample_resolution_sec, output_tag, coder=None, output_format=None):
   473      self._sample_resolution_sec = sample_resolution_sec
   474      self._output_tag = output_tag
   475      self._output_format = output_format if output_format \
   476                            else OutputFormat.TEST_STREAM_EVENTS
   477      self._coder = coder if coder else beam.coders.FastPrimitivesCoder()
   478  
   479    def expand(self, pcoll):
   480      ret = (
   481          pcoll
   482          | beam.WindowInto(beam.window.GlobalWindows())
   483  
   484          # First get the initial timing information. This will be used to start
   485          # the periodic timers which will generate processing time and watermark
   486          # advancements every `sample_resolution_sec`.
   487          | 'initial timing' >> PairWithTiming()
   488  
   489          # Next, map every element to the same key so that only a single timer is
   490          # started for this given ReverseTestStream.
   491          | 'first key' >> beam.Map(lambda x: (0, x))
   492  
   493          # Next, pass-through each element which will be paired with its timing
   494          # info in the next step. Also, start the periodic timers. We use timers
   495          # in this situation to capture watermark advancements that occur when
   496          # there are no elements being produced upstream.
   497          | beam.ParDo(
   498              _TimingEventGenerator(
   499                  output_tag=self._output_tag,
   500                  sample_resolution_sec=self._sample_resolution_sec))
   501  
   502          # Next, retrieve the timing information for watermark events that were
   503          # generated in the previous step. This is because elements generated
   504          # through the timers don't have their timing information yet.
   505          | 'timing info for watermarks' >> PairWithTiming()
   506  
   507          # Re-key to the same key to keep global state.
   508          | 'second key' >> beam.Map(lambda x: (0, x))
   509  
   510          # Format the events properly.
   511          | beam.ParDo(_TestStreamFormatter(self._coder, self._output_format)))
   512  
   513      if self._output_format == OutputFormat.SERIALIZED_TEST_STREAM_FILE_RECORDS:
   514  
   515        def serializer(e):
   516          return e.SerializeToString()
   517  
   518        ret = ret | 'serializer' >> beam.Map(serializer)
   519  
   520      return ret
   521  
   522  
   523  class _TimingEventGenerator(beam.DoFn):
   524    """Generates ProcessingTimeEvents and WatermarkEvents at a regular cadence.
   525  
   526    The runner keeps the state of the clock (which may be faked) and the
   527    watermarks, which are inaccessible to SDKs. This DoFn generates
   528    ProcessingTimeEvents and WatermarkEvents at a specified sampling rate to
   529    capture any clock or watermark advancements between elements.
   530    """
   531  
   532    # Used to return the initial timing information.
   533    EXECUTE_ONCE_STATE = beam.transforms.userstate.BagStateSpec(
   534        name='execute_once_state', coder=beam.coders.FastPrimitivesCoder())
   535  
   536    # A processing time timer in an infinite loop that generates the events that
   537    # will be paired with the TimingInfo from the runner.
   538    TIMING_SAMPLER = TimerSpec('timing_sampler', TimeDomain.REAL_TIME)
   539  
   540    def __init__(self, output_tag, sample_resolution_sec=0.1):
   541      self._output_tag = output_tag
   542      self._sample_resolution_sec = sample_resolution_sec
   543  
   544    @on_timer(TIMING_SAMPLER)
   545    def on_timing_sampler(
   546        self,
   547        timestamp=beam.DoFn.TimestampParam,
   548        window=beam.DoFn.WindowParam,
   549        timing_sampler=beam.DoFn.TimerParam(TIMING_SAMPLER)):
   550      """Yields an unbounded stream of ProcessingTimeEvents and WatermarkEvents.
   551  
   552      The returned events will be paired with the TimingInfo. This loop's only
   553      purpose is to generate these events even when there are no elements.
   554      """
   555      next_sample_time = (timestamp.micros * 1e-6) + self._sample_resolution_sec
   556      timing_sampler.set(next_sample_time)
   557  
   558      # Generate two events, the delta since the last sample and a place-holder
   559      # WatermarkEvent. This is a placeholder because we can't otherwise add the
   560      # watermark from the runner to the event.
   561      yield ProcessingTimeEvent(self._sample_resolution_sec)
   562      yield WatermarkEvent(MIN_TIMESTAMP)
   563  
   564    def process(
   565        self,
   566        e,
   567        timestamp=beam.DoFn.TimestampParam,
   568        window=beam.DoFn.WindowParam,
   569        timing_sampler=beam.DoFn.TimerParam(TIMING_SAMPLER),
   570        execute_once_state=beam.DoFn.StateParam(EXECUTE_ONCE_STATE)):
   571  
   572      _, (element, timing_info) = e
   573  
   574      # Only set the timers once and only send the header once.
   575      first_time = next(execute_once_state.read(), True)
   576      if first_time:
   577        # Generate the initial timing events.
   578        execute_once_state.add(False)
   579        now_sec = timing_info.processing_time.micros * 1e-6
   580        timing_sampler.set(now_sec + self._sample_resolution_sec)
   581  
   582        # Here we capture the initial time offset and initial watermark. This is
   583        # where we emit the TestStreamFileHeader.
   584        yield beam_interactive_api_pb2.TestStreamFileHeader(tag=self._output_tag)
   585        yield ProcessingTimeEvent(
   586            Duration(micros=timing_info.processing_time.micros))
   587        yield WatermarkEvent(MIN_TIMESTAMP)
   588      yield element
   589  
   590  
   591  class _TestStreamFormatter(beam.DoFn):
   592    """Formats the events to the specified output format.
   593    """
   594  
   595    # In order to generate the processing time deltas, we need to keep track of
   596    # the previous clock time we got from the runner.
   597    PREV_SAMPLE_TIME_STATE = beam.transforms.userstate.BagStateSpec(
   598        name='prev_sample_time_state', coder=beam.coders.FastPrimitivesCoder())
   599  
   600    def __init__(self, coder, output_format):
   601      self._coder = coder
   602      self._output_format = output_format
   603  
   604    def start_bundle(self):
   605      self.elements = []
   606      self.timing_events = []
   607      self.header = None
   608  
   609    def finish_bundle(self):
   610      """Outputs all the buffered elements.
   611      """
   612      if self._output_format == OutputFormat.TEST_STREAM_EVENTS:
   613        return self._output_as_events()
   614      return self._output_as_records()
   615  
   616    def process(
   617        self,
   618        e,
   619        timestamp=beam.DoFn.TimestampParam,
   620        prev_sample_time_state=beam.DoFn.StateParam(PREV_SAMPLE_TIME_STATE)):
   621      """Buffers elements until the end of the bundle.
   622  
   623      This buffers elements instead of emitting them immediately to keep elements
   624      that come in the same bundle to be outputted in the same bundle.
   625      """
   626      _, (element, timing_info) = e
   627  
   628      if isinstance(element, beam_interactive_api_pb2.TestStreamFileHeader):
   629        self.header = element
   630      elif isinstance(element, WatermarkEvent):
   631        # WatermarkEvents come in with a watermark of MIN_TIMESTAMP. Fill in the
   632        # correct watermark from the runner here.
   633        element.new_watermark = timing_info.watermark.micros
   634        if element not in self.timing_events:
   635          self.timing_events.append(element)
   636  
   637      elif isinstance(element, ProcessingTimeEvent):
   638        # Because the runner holds the clock, calculate the processing time delta
   639        # here. The TestStream may have faked out the clock, and thus the
   640        # delta calculated in the SDK with time.time() will be wrong.
   641        prev_sample = next(prev_sample_time_state.read(), Timestamp())
   642        prev_sample_time_state.clear()
   643        prev_sample_time_state.add(timing_info.processing_time)
   644  
   645        advance_by = timing_info.processing_time - prev_sample
   646  
   647        element.advance_by = advance_by
   648        self.timing_events.append(element)
   649      else:
   650        self.elements.append(TimestampedValue(element, timestamp))
   651  
   652    def _output_as_events(self):
   653      """Outputs buffered elements as TestStream events.
   654      """
   655      if self.timing_events:
   656        yield WindowedValue(
   657            self.timing_events, timestamp=0, windows=[beam.window.GlobalWindow()])
   658  
   659      if self.elements:
   660        yield WindowedValue([ElementEvent(self.elements)],
   661                            timestamp=0,
   662                            windows=[beam.window.GlobalWindow()])
   663  
   664    def _output_as_records(self):
   665      """Outputs buffered elements as TestStreamFileRecords.
   666      """
   667      if self.header:
   668        yield WindowedValue(
   669            self.header, timestamp=0, windows=[beam.window.GlobalWindow()])
   670  
   671      if self.timing_events:
   672        timing_events = self._timing_events_to_records(self.timing_events)
   673        for r in timing_events:
   674          yield WindowedValue(
   675              r, timestamp=0, windows=[beam.window.GlobalWindow()])
   676  
   677      if self.elements:
   678        elements = self._elements_to_record(self.elements)
   679        yield WindowedValue(
   680            elements, timestamp=0, windows=[beam.window.GlobalWindow()])
   681  
   682    def _timing_events_to_records(self, timing_events):
   683      """Returns given timing_events as TestStreamFileRecords.
   684      """
   685      records = []
   686      for e in self.timing_events:
   687        if isinstance(e, ProcessingTimeEvent):
   688          processing_time_event = beam_runner_api_pb2.\
   689              TestStreamPayload.Event.AdvanceProcessingTime(
   690              advance_duration=e.advance_by.micros)
   691          records.append(
   692              beam_interactive_api_pb2.TestStreamFileRecord(
   693                  recorded_event=beam_runner_api_pb2.TestStreamPayload.Event(
   694                      processing_time_event=processing_time_event)))
   695  
   696        elif isinstance(e, WatermarkEvent):
   697          watermark_event = beam_runner_api_pb2.\
   698              TestStreamPayload.Event.AdvanceWatermark(
   699              new_watermark=int(e.new_watermark))
   700          records.append(
   701              beam_interactive_api_pb2.TestStreamFileRecord(
   702                  recorded_event=beam_runner_api_pb2.TestStreamPayload.Event(
   703                      watermark_event=watermark_event)))
   704  
   705      return records
   706  
   707    def _elements_to_record(self, elements):
   708      """Returns elements as TestStreamFileRecords.
   709      """
   710      elements = []
   711      for tv in self.elements:
   712        element_timestamp = tv.timestamp.micros
   713        element = beam_runner_api_pb2.TestStreamPayload.TimestampedElement(
   714            encoded_element=self._coder.encode(tv.value),
   715            timestamp=element_timestamp)
   716        elements.append(element)
   717  
   718      element_event = beam_runner_api_pb2.TestStreamPayload.Event.AddElements(
   719          elements=elements)
   720      return beam_interactive_api_pb2.TestStreamFileRecord(
   721          recorded_event=beam_runner_api_pb2.TestStreamPayload.Event(
   722              element_event=element_event))