github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/test_stream.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Provides TestStream for verifying streaming runner semantics. 19 20 For internal use only; no backwards-compatibility guarantees. 21 """ 22 # pytype: skip-file 23 24 from abc import ABCMeta 25 from abc import abstractmethod 26 from enum import Enum 27 from functools import total_ordering 28 29 import apache_beam as beam 30 from apache_beam import coders 31 from apache_beam import pvalue 32 from apache_beam.portability import common_urns 33 from apache_beam.portability.api import beam_interactive_api_pb2 34 from apache_beam.portability.api import beam_runner_api_pb2 35 from apache_beam.portability.api import endpoints_pb2 36 from apache_beam.transforms import PTransform 37 from apache_beam.transforms import core 38 from apache_beam.transforms import window 39 from apache_beam.transforms.timeutil import TimeDomain 40 from apache_beam.transforms.userstate import TimerSpec 41 from apache_beam.transforms.userstate import on_timer 42 from apache_beam.transforms.window import TimestampedValue 43 from apache_beam.utils import timestamp 44 from apache_beam.utils.timestamp import MIN_TIMESTAMP 45 from apache_beam.utils.timestamp import Duration 46 from apache_beam.utils.timestamp import Timestamp 47 from apache_beam.utils.windowed_value import WindowedValue 48 49 __all__ = [ 50 'Event', 51 'ElementEvent', 52 'WatermarkEvent', 53 'ProcessingTimeEvent', 54 'TestStream', 55 ] 56 57 58 @total_ordering 59 class Event(metaclass=ABCMeta): # type: ignore[misc] 60 """Test stream event to be emitted during execution of a TestStream.""" 61 @abstractmethod 62 def __eq__(self, other): 63 raise NotImplementedError 64 65 @abstractmethod 66 def __hash__(self): 67 raise NotImplementedError 68 69 @abstractmethod 70 def __lt__(self, other): 71 raise NotImplementedError 72 73 @abstractmethod 74 def to_runner_api(self, element_coder): 75 raise NotImplementedError 76 77 @staticmethod 78 def from_runner_api(proto, element_coder): 79 if proto.HasField('element_event'): 80 event = proto.element_event 81 tag = None if event.tag == 'None' else event.tag 82 return ElementEvent([ 83 TimestampedValue( 84 element_coder.decode(tv.encoded_element), 85 Timestamp(micros=1000 * tv.timestamp)) 86 for tv in proto.element_event.elements 87 ], tag=tag) # yapf: disable 88 elif proto.HasField('watermark_event'): 89 event = proto.watermark_event 90 tag = None if event.tag == 'None' else event.tag 91 return WatermarkEvent( 92 Timestamp(micros=1000 * proto.watermark_event.new_watermark), tag=tag) 93 elif proto.HasField('processing_time_event'): 94 return ProcessingTimeEvent( 95 timestamp.Duration( 96 micros=1000 * proto.processing_time_event.advance_duration)) 97 else: 98 raise ValueError( 99 'Unknown TestStream Event type: %s' % proto.WhichOneof('event')) 100 101 102 class ElementEvent(Event): 103 """Element-producing test stream event.""" 104 def __init__(self, timestamped_values, tag=None): 105 self.timestamped_values = timestamped_values 106 self.tag = tag 107 108 def __eq__(self, other): 109 if not isinstance(other, ElementEvent): 110 return False 111 112 return ( 113 self.timestamped_values == other.timestamped_values and 114 self.tag == other.tag) 115 116 def __hash__(self): 117 return hash(self.timestamped_values) 118 119 def __lt__(self, other): 120 if not isinstance(other, ElementEvent): 121 raise TypeError 122 123 return self.timestamped_values < other.timestamped_values 124 125 def to_runner_api(self, element_coder): 126 tag = 'None' if self.tag is None else self.tag 127 return beam_runner_api_pb2.TestStreamPayload.Event( 128 element_event=beam_runner_api_pb2.TestStreamPayload.Event.AddElements( 129 elements=[ 130 beam_runner_api_pb2.TestStreamPayload.TimestampedElement( 131 encoded_element=element_coder.encode(tv.value), 132 timestamp=tv.timestamp.micros // 1000) 133 for tv in self.timestamped_values 134 ], 135 tag=tag)) 136 137 def __repr__(self): 138 return 'ElementEvent: <{}, {}>'.format([(e.value, e.timestamp) 139 for e in self.timestamped_values], 140 self.tag) 141 142 143 class WatermarkEvent(Event): 144 """Watermark-advancing test stream event.""" 145 def __init__(self, new_watermark, tag=None): 146 self.new_watermark = Timestamp.of(new_watermark) 147 self.tag = tag 148 149 def __eq__(self, other): 150 if not isinstance(other, WatermarkEvent): 151 return False 152 153 return self.new_watermark == other.new_watermark and self.tag == other.tag 154 155 def __hash__(self): 156 return hash(str(self.new_watermark) + str(self.tag)) 157 158 def __lt__(self, other): 159 if not isinstance(other, WatermarkEvent): 160 raise TypeError 161 162 return self.new_watermark < other.new_watermark 163 164 def to_runner_api(self, unused_element_coder): 165 tag = 'None' if self.tag is None else self.tag 166 167 # Assert that no precision is lost. 168 assert self.new_watermark.micros % 1000 == 0 169 return beam_runner_api_pb2.TestStreamPayload.Event( 170 watermark_event=beam_runner_api_pb2.TestStreamPayload.Event. 171 AdvanceWatermark( 172 new_watermark=self.new_watermark.micros // 1000, tag=tag)) 173 174 def __repr__(self): 175 return 'WatermarkEvent: <{}, {}>'.format(self.new_watermark, self.tag) 176 177 178 class ProcessingTimeEvent(Event): 179 """Processing time-advancing test stream event.""" 180 def __init__(self, advance_by): 181 self.advance_by = Duration.of(advance_by) 182 183 def __eq__(self, other): 184 if not isinstance(other, ProcessingTimeEvent): 185 return False 186 187 return self.advance_by == other.advance_by 188 189 def __hash__(self): 190 return hash(self.advance_by) 191 192 def __lt__(self, other): 193 if not isinstance(other, ProcessingTimeEvent): 194 raise TypeError 195 196 return self.advance_by < other.advance_by 197 198 def to_runner_api(self, unused_element_coder): 199 assert self.advance_by.micros % 1000 == 0 200 return beam_runner_api_pb2.TestStreamPayload.Event( 201 processing_time_event=beam_runner_api_pb2.TestStreamPayload.Event. 202 AdvanceProcessingTime(advance_duration=self.advance_by.micros // 1000)) 203 204 def __repr__(self): 205 return 'ProcessingTimeEvent: <{}>'.format(self.advance_by) 206 207 208 class WindowedValueHolderMeta(type): 209 """A metaclass that overrides the isinstance check for WindowedValueHolder. 210 211 Python does a quick test for exact match. If an instance is exactly of 212 type WindowedValueHolder, the overridden isinstance check is omitted. 213 The override is needed because WindowedValueHolder elements encoded then 214 decoded become Row elements. 215 """ 216 def __instancecheck__(cls, other): 217 """Checks if a beam.Row typed instance is a WindowedValueHolder. 218 """ 219 return ( 220 isinstance(other, beam.Row) and hasattr(other, 'windowed_value') and 221 hasattr(other, 'urn') and 222 isinstance(other.windowed_value, WindowedValue) and 223 other.urn == common_urns.coders.ROW.urn) 224 225 226 class WindowedValueHolder(beam.Row, metaclass=WindowedValueHolderMeta): 227 """A class that holds a WindowedValue. 228 229 This is a special class that can be used by the runner that implements the 230 TestStream as a signal that the underlying value should be unreified to the 231 specified window. 232 """ 233 # Register WindowedValueHolder to always use RowCoder. 234 coders.registry.register_coder(WindowedValueHolderMeta, coders.RowCoder) 235 236 def __init__(self, windowed_value): 237 assert isinstance(windowed_value, WindowedValue), ( 238 'WindowedValueHolder can only hold %s type. Instead, %s is given.') % ( 239 WindowedValue, windowed_value) 240 super().__init__( 241 **{ 242 'windowed_value': windowed_value, 'urn': common_urns.coders.ROW.urn 243 }) 244 245 @classmethod 246 def from_row(cls, row): 247 """Converts a beam.Row typed instance to WindowedValueHolder. 248 """ 249 if isinstance(row, WindowedValueHolder): 250 return WindowedValueHolder(row.windowed_value) 251 assert isinstance(row, beam.Row), 'The given row %s must be a %s type' % ( 252 row, beam.Row) 253 assert hasattr(row, 'windowed_value'), ( 254 'The given %s must have a windowed_value attribute.') % row 255 assert isinstance(row.windowed_value, WindowedValue), ( 256 'The windowed_value attribute of %s must be a %s type') % ( 257 row, WindowedValue) 258 259 260 class TestStream(PTransform): 261 """Test stream that generates events on an unbounded PCollection of elements. 262 263 Each event emits elements, advances the watermark or advances the processing 264 time. After all of the specified elements are emitted, ceases to produce 265 output. 266 267 Applying the PTransform will return a single PCollection if only the default 268 output or only one output tag has been used. Otherwise a dictionary of output 269 names to PCollections will be returned. 270 """ 271 def __init__( 272 self, 273 coder=coders.FastPrimitivesCoder(), 274 events=None, 275 output_tags=None, 276 endpoint=None): 277 """ 278 Args: 279 coder: (apache_beam.Coder) the coder to encode/decode elements. 280 events: (List[Event]) a list of instructions for the TestStream to 281 execute. If specified, the events tags must exist in the output_tags. 282 output_tags: (List[str]) Initial set of outputs. If no event references an 283 output tag, no output will be produced for that tag. 284 endpoint: (str) a URL locating a TestStreamService. 285 """ 286 287 super().__init__() 288 assert coder is not None 289 290 self.coder = coder 291 self.watermarks = {None: timestamp.MIN_TIMESTAMP} 292 self.output_tags = set(output_tags) if output_tags else set() 293 self._events = [] if events is None else list(events) 294 self._endpoint = endpoint 295 296 event_tags = set( 297 e.tag for e in self._events 298 if isinstance(e, (WatermarkEvent, ElementEvent))) 299 assert event_tags.issubset(self.output_tags), \ 300 '{} is not a subset of {}'.format(event_tags, output_tags) 301 assert not (self._events and self._endpoint), \ 302 'Only either events or an endpoint can be given at once.' 303 304 def get_windowing(self, unused_inputs): 305 return core.Windowing(window.GlobalWindows()) 306 307 def _infer_output_coder(self, input_type=None, input_coder=None): 308 return self.coder 309 310 def expand(self, pbegin): 311 assert isinstance(pbegin, pvalue.PBegin) 312 self.pipeline = pbegin.pipeline 313 if not self.output_tags: 314 self.output_tags = {None} 315 316 # For backwards compatibility return a single PCollection. 317 if self.output_tags == {None}: 318 return pvalue.PCollection( 319 self.pipeline, is_bounded=False, tag=list(self.output_tags)[0]) 320 return { 321 tag: pvalue.PCollection(self.pipeline, is_bounded=False, tag=tag) 322 for tag in self.output_tags 323 } 324 325 def _add(self, event): 326 if isinstance(event, ElementEvent): 327 for tv in event.timestamped_values: 328 assert tv.timestamp < timestamp.MAX_TIMESTAMP, ( 329 'Element timestamp must be before timestamp.MAX_TIMESTAMP.') 330 elif isinstance(event, WatermarkEvent): 331 if event.tag not in self.watermarks: 332 self.watermarks[event.tag] = timestamp.MIN_TIMESTAMP 333 assert event.new_watermark > self.watermarks[event.tag], ( 334 'Watermark must strictly-monotonically advance.') 335 self.watermarks[event.tag] = event.new_watermark 336 elif isinstance(event, ProcessingTimeEvent): 337 assert event.advance_by > 0, ( 338 'Must advance processing time by positive amount.') 339 else: 340 raise ValueError('Unknown event: %s' % event) 341 self._events.append(event) 342 343 def add_elements(self, elements, tag=None, event_timestamp=None): 344 """Add elements to the TestStream. 345 346 Elements added to the TestStream will be produced during pipeline execution. 347 These elements can be TimestampedValue, WindowedValue or raw unwrapped 348 elements that are serializable using the TestStream's specified Coder. When 349 a TimestampedValue or a WindowedValue element is used, the timestamp of the 350 TimestampedValue or WindowedValue will be the timestamp of the produced 351 element; otherwise, the current watermark timestamp will be used for that 352 element. The windows of a given WindowedValue are ignored by the 353 TestStream. 354 """ 355 self.output_tags.add(tag) 356 timestamped_values = [] 357 if tag not in self.watermarks: 358 self.watermarks[tag] = timestamp.MIN_TIMESTAMP 359 360 for element in elements: 361 if isinstance(element, TimestampedValue): 362 timestamped_values.append(element) 363 elif isinstance(element, WindowedValue): 364 # Drop windows for elements in test stream. 365 timestamped_values.append( 366 TimestampedValue(element.value, element.timestamp)) 367 else: 368 # Add elements with timestamp equal to current watermark. 369 if event_timestamp is None: 370 event_timestamp = self.watermarks[tag] 371 timestamped_values.append(TimestampedValue(element, event_timestamp)) 372 self._add(ElementEvent(timestamped_values, tag)) 373 return self 374 375 def advance_watermark_to(self, new_watermark, tag=None): 376 """Advance the watermark to a given Unix timestamp. 377 378 The Unix timestamp value used must be later than the previous watermark 379 value and should be given as an int, float or utils.timestamp.Timestamp 380 object. 381 """ 382 self.output_tags.add(tag) 383 self._add(WatermarkEvent(new_watermark, tag)) 384 return self 385 386 def advance_watermark_to_infinity(self, tag=None): 387 """Advance the watermark to the end of time, completing this TestStream.""" 388 self.advance_watermark_to(timestamp.MAX_TIMESTAMP, tag) 389 return self 390 391 def advance_processing_time(self, advance_by): 392 """Advance the current processing time by a given duration in seconds. 393 394 The duration must be a positive second duration and should be given as an 395 int, float or utils.timestamp.Duration object. 396 """ 397 self._add(ProcessingTimeEvent(advance_by)) 398 return self 399 400 def to_runner_api_parameter(self, context): 401 # Sort the output tags so that the order is deterministic and we are able 402 # to test equality on a roundtrip through the to/from proto apis. 403 return ( 404 common_urns.primitives.TEST_STREAM.urn, 405 beam_runner_api_pb2.TestStreamPayload( 406 coder_id=context.coders.get_id(self.coder), 407 events=[e.to_runner_api(self.coder) for e in self._events], 408 endpoint=endpoints_pb2.ApiServiceDescriptor(url=self._endpoint))) 409 410 @staticmethod 411 @PTransform.register_urn( 412 common_urns.primitives.TEST_STREAM.urn, 413 beam_runner_api_pb2.TestStreamPayload) 414 def from_runner_api_parameter(ptransform, payload, context): 415 coder = context.coders.get_by_id(payload.coder_id) 416 output_tags = set( 417 None if k == 'None' else k for k in ptransform.outputs.keys()) 418 return TestStream( 419 coder=coder, 420 events=[Event.from_runner_api(e, coder) for e in payload.events], 421 output_tags=output_tags, 422 endpoint=payload.endpoint.url) 423 424 425 class TimingInfo(object): 426 def __init__(self, processing_time, watermark): 427 self._processing_time = Timestamp.of(processing_time) 428 self._watermark = Timestamp.of(watermark) 429 430 @property 431 def processing_time(self): 432 return self._processing_time 433 434 @property 435 def watermark(self): 436 return self._watermark 437 438 def __repr__(self): 439 return '({}, {})'.format(self.processing_time, self.watermark) 440 441 442 class PairWithTiming(PTransform): 443 """Pairs the input element with timing information. 444 445 Input: element; output: KV(element, timing information) 446 Where timing information := (processing time, watermark) 447 448 This is used in the ReverseTestStream implementation to replay watermark 449 advancements. 450 """ 451 452 URN = "beam:transform:pair_with_timing:v1" 453 454 def expand(self, pcoll): 455 return pvalue.PCollection.from_(pcoll) 456 457 458 class OutputFormat(Enum): 459 TEST_STREAM_EVENTS = 1 460 TEST_STREAM_FILE_RECORDS = 2 461 SERIALIZED_TEST_STREAM_FILE_RECORDS = 3 462 463 464 class ReverseTestStream(PTransform): 465 """A Transform that can create TestStream events from a stream of elements. 466 467 This currently assumes that this the pipeline being run on a single machine 468 and elements come in order and are outputted in the same order that they came 469 in. 470 """ 471 def __init__( 472 self, sample_resolution_sec, output_tag, coder=None, output_format=None): 473 self._sample_resolution_sec = sample_resolution_sec 474 self._output_tag = output_tag 475 self._output_format = output_format if output_format \ 476 else OutputFormat.TEST_STREAM_EVENTS 477 self._coder = coder if coder else beam.coders.FastPrimitivesCoder() 478 479 def expand(self, pcoll): 480 ret = ( 481 pcoll 482 | beam.WindowInto(beam.window.GlobalWindows()) 483 484 # First get the initial timing information. This will be used to start 485 # the periodic timers which will generate processing time and watermark 486 # advancements every `sample_resolution_sec`. 487 | 'initial timing' >> PairWithTiming() 488 489 # Next, map every element to the same key so that only a single timer is 490 # started for this given ReverseTestStream. 491 | 'first key' >> beam.Map(lambda x: (0, x)) 492 493 # Next, pass-through each element which will be paired with its timing 494 # info in the next step. Also, start the periodic timers. We use timers 495 # in this situation to capture watermark advancements that occur when 496 # there are no elements being produced upstream. 497 | beam.ParDo( 498 _TimingEventGenerator( 499 output_tag=self._output_tag, 500 sample_resolution_sec=self._sample_resolution_sec)) 501 502 # Next, retrieve the timing information for watermark events that were 503 # generated in the previous step. This is because elements generated 504 # through the timers don't have their timing information yet. 505 | 'timing info for watermarks' >> PairWithTiming() 506 507 # Re-key to the same key to keep global state. 508 | 'second key' >> beam.Map(lambda x: (0, x)) 509 510 # Format the events properly. 511 | beam.ParDo(_TestStreamFormatter(self._coder, self._output_format))) 512 513 if self._output_format == OutputFormat.SERIALIZED_TEST_STREAM_FILE_RECORDS: 514 515 def serializer(e): 516 return e.SerializeToString() 517 518 ret = ret | 'serializer' >> beam.Map(serializer) 519 520 return ret 521 522 523 class _TimingEventGenerator(beam.DoFn): 524 """Generates ProcessingTimeEvents and WatermarkEvents at a regular cadence. 525 526 The runner keeps the state of the clock (which may be faked) and the 527 watermarks, which are inaccessible to SDKs. This DoFn generates 528 ProcessingTimeEvents and WatermarkEvents at a specified sampling rate to 529 capture any clock or watermark advancements between elements. 530 """ 531 532 # Used to return the initial timing information. 533 EXECUTE_ONCE_STATE = beam.transforms.userstate.BagStateSpec( 534 name='execute_once_state', coder=beam.coders.FastPrimitivesCoder()) 535 536 # A processing time timer in an infinite loop that generates the events that 537 # will be paired with the TimingInfo from the runner. 538 TIMING_SAMPLER = TimerSpec('timing_sampler', TimeDomain.REAL_TIME) 539 540 def __init__(self, output_tag, sample_resolution_sec=0.1): 541 self._output_tag = output_tag 542 self._sample_resolution_sec = sample_resolution_sec 543 544 @on_timer(TIMING_SAMPLER) 545 def on_timing_sampler( 546 self, 547 timestamp=beam.DoFn.TimestampParam, 548 window=beam.DoFn.WindowParam, 549 timing_sampler=beam.DoFn.TimerParam(TIMING_SAMPLER)): 550 """Yields an unbounded stream of ProcessingTimeEvents and WatermarkEvents. 551 552 The returned events will be paired with the TimingInfo. This loop's only 553 purpose is to generate these events even when there are no elements. 554 """ 555 next_sample_time = (timestamp.micros * 1e-6) + self._sample_resolution_sec 556 timing_sampler.set(next_sample_time) 557 558 # Generate two events, the delta since the last sample and a place-holder 559 # WatermarkEvent. This is a placeholder because we can't otherwise add the 560 # watermark from the runner to the event. 561 yield ProcessingTimeEvent(self._sample_resolution_sec) 562 yield WatermarkEvent(MIN_TIMESTAMP) 563 564 def process( 565 self, 566 e, 567 timestamp=beam.DoFn.TimestampParam, 568 window=beam.DoFn.WindowParam, 569 timing_sampler=beam.DoFn.TimerParam(TIMING_SAMPLER), 570 execute_once_state=beam.DoFn.StateParam(EXECUTE_ONCE_STATE)): 571 572 _, (element, timing_info) = e 573 574 # Only set the timers once and only send the header once. 575 first_time = next(execute_once_state.read(), True) 576 if first_time: 577 # Generate the initial timing events. 578 execute_once_state.add(False) 579 now_sec = timing_info.processing_time.micros * 1e-6 580 timing_sampler.set(now_sec + self._sample_resolution_sec) 581 582 # Here we capture the initial time offset and initial watermark. This is 583 # where we emit the TestStreamFileHeader. 584 yield beam_interactive_api_pb2.TestStreamFileHeader(tag=self._output_tag) 585 yield ProcessingTimeEvent( 586 Duration(micros=timing_info.processing_time.micros)) 587 yield WatermarkEvent(MIN_TIMESTAMP) 588 yield element 589 590 591 class _TestStreamFormatter(beam.DoFn): 592 """Formats the events to the specified output format. 593 """ 594 595 # In order to generate the processing time deltas, we need to keep track of 596 # the previous clock time we got from the runner. 597 PREV_SAMPLE_TIME_STATE = beam.transforms.userstate.BagStateSpec( 598 name='prev_sample_time_state', coder=beam.coders.FastPrimitivesCoder()) 599 600 def __init__(self, coder, output_format): 601 self._coder = coder 602 self._output_format = output_format 603 604 def start_bundle(self): 605 self.elements = [] 606 self.timing_events = [] 607 self.header = None 608 609 def finish_bundle(self): 610 """Outputs all the buffered elements. 611 """ 612 if self._output_format == OutputFormat.TEST_STREAM_EVENTS: 613 return self._output_as_events() 614 return self._output_as_records() 615 616 def process( 617 self, 618 e, 619 timestamp=beam.DoFn.TimestampParam, 620 prev_sample_time_state=beam.DoFn.StateParam(PREV_SAMPLE_TIME_STATE)): 621 """Buffers elements until the end of the bundle. 622 623 This buffers elements instead of emitting them immediately to keep elements 624 that come in the same bundle to be outputted in the same bundle. 625 """ 626 _, (element, timing_info) = e 627 628 if isinstance(element, beam_interactive_api_pb2.TestStreamFileHeader): 629 self.header = element 630 elif isinstance(element, WatermarkEvent): 631 # WatermarkEvents come in with a watermark of MIN_TIMESTAMP. Fill in the 632 # correct watermark from the runner here. 633 element.new_watermark = timing_info.watermark.micros 634 if element not in self.timing_events: 635 self.timing_events.append(element) 636 637 elif isinstance(element, ProcessingTimeEvent): 638 # Because the runner holds the clock, calculate the processing time delta 639 # here. The TestStream may have faked out the clock, and thus the 640 # delta calculated in the SDK with time.time() will be wrong. 641 prev_sample = next(prev_sample_time_state.read(), Timestamp()) 642 prev_sample_time_state.clear() 643 prev_sample_time_state.add(timing_info.processing_time) 644 645 advance_by = timing_info.processing_time - prev_sample 646 647 element.advance_by = advance_by 648 self.timing_events.append(element) 649 else: 650 self.elements.append(TimestampedValue(element, timestamp)) 651 652 def _output_as_events(self): 653 """Outputs buffered elements as TestStream events. 654 """ 655 if self.timing_events: 656 yield WindowedValue( 657 self.timing_events, timestamp=0, windows=[beam.window.GlobalWindow()]) 658 659 if self.elements: 660 yield WindowedValue([ElementEvent(self.elements)], 661 timestamp=0, 662 windows=[beam.window.GlobalWindow()]) 663 664 def _output_as_records(self): 665 """Outputs buffered elements as TestStreamFileRecords. 666 """ 667 if self.header: 668 yield WindowedValue( 669 self.header, timestamp=0, windows=[beam.window.GlobalWindow()]) 670 671 if self.timing_events: 672 timing_events = self._timing_events_to_records(self.timing_events) 673 for r in timing_events: 674 yield WindowedValue( 675 r, timestamp=0, windows=[beam.window.GlobalWindow()]) 676 677 if self.elements: 678 elements = self._elements_to_record(self.elements) 679 yield WindowedValue( 680 elements, timestamp=0, windows=[beam.window.GlobalWindow()]) 681 682 def _timing_events_to_records(self, timing_events): 683 """Returns given timing_events as TestStreamFileRecords. 684 """ 685 records = [] 686 for e in self.timing_events: 687 if isinstance(e, ProcessingTimeEvent): 688 processing_time_event = beam_runner_api_pb2.\ 689 TestStreamPayload.Event.AdvanceProcessingTime( 690 advance_duration=e.advance_by.micros) 691 records.append( 692 beam_interactive_api_pb2.TestStreamFileRecord( 693 recorded_event=beam_runner_api_pb2.TestStreamPayload.Event( 694 processing_time_event=processing_time_event))) 695 696 elif isinstance(e, WatermarkEvent): 697 watermark_event = beam_runner_api_pb2.\ 698 TestStreamPayload.Event.AdvanceWatermark( 699 new_watermark=int(e.new_watermark)) 700 records.append( 701 beam_interactive_api_pb2.TestStreamFileRecord( 702 recorded_event=beam_runner_api_pb2.TestStreamPayload.Event( 703 watermark_event=watermark_event))) 704 705 return records 706 707 def _elements_to_record(self, elements): 708 """Returns elements as TestStreamFileRecords. 709 """ 710 elements = [] 711 for tv in self.elements: 712 element_timestamp = tv.timestamp.micros 713 element = beam_runner_api_pb2.TestStreamPayload.TimestampedElement( 714 encoded_element=self._coder.encode(tv.value), 715 timestamp=element_timestamp) 716 elements.append(element) 717 718 element_event = beam_runner_api_pb2.TestStreamPayload.Event.AddElements( 719 elements=elements) 720 return beam_interactive_api_pb2.TestStreamFileRecord( 721 recorded_event=beam_runner_api_pb2.TestStreamPayload.Event( 722 element_event=element_event))