github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/data/trigger_transcripts.yaml (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  name: fixed_default
    19  window_fn: FixedWindows(10)       # Python names/syntax, unless otherwise noted.
    20  trigger_fn: Default               # Same. Empty () may be omitted.
    21  transcript:                       # Ordered list of events.
    22    - input: [1, 2, 3, 10, 11]      # The elements are the timestamps.
    23    - watermark: 25
    24    - expect:                       # Every expected output from the last action.
    25        - {window: [0, 9], values: [1, 2, 3], index: 0}
    26        - {window: [10, 19], values: [10, 11]}   # Partial match on attributes OK.
    27  
    28  ---
    29  name: fixed_default_late_data
    30  window_fn: FixedWindows(10)
    31  trigger_fn: Default
    32  timestamp_combiner: OUTPUT_AT_EOW
    33  allowed_lateness: 100
    34  transcript:
    35    - input: [1, 2, 3, 10, 11, 25]
    36    - watermark: 100
    37    - expect:
    38        - {window: [0, 9], values: [1, 2, 3], timestamp: 9, final: false}
    39        - {window: [10, 19], values: [10, 11], timestamp: 19}
    40        - {window: [20, 29], values: [25], timestamp: 29, late: false}
    41    - input: [7]
    42    - expect:
    43        - {window: [0, 9], values: [1, 2, 3, 7], timestamp: 9, late: true}
    44  
    45  ---
    46  name: fixed_drop_late_data_after_allowed_lateness
    47  window_fn: FixedWindows(10)
    48  trigger_fn: AfterWatermark(early=AfterCount(3), late=AfterCount(1))
    49  timestamp_combiner: OUTPUT_AT_EOW
    50  allowed_lateness: 20
    51  accumulation_mode: accumulating
    52  transcript:
    53    - input: [1, 2, 10, 11, 80, 81]
    54    - watermark: 100
    55    - expect:
    56        - {window: [0, 9], values: [1, 2], timestamp: 9}
    57        - {window: [10, 19], values: [10, 11], timestamp: 19}
    58        - {window: [80, 89], values: [80, 81], timestamp: 89, late: false}
    59    - input: [7, 8] # no output
    60    - input: [17, 18] # no output
    61    - input: [82]
    62    - expect:
    63        - {window: [80, 89], values: [80, 81, 82], timestamp: 89, late: true}
    64  
    65  ---
    66  name: timestamp_combiner_earliest
    67  window_fn: FixedWindows(10)
    68  trigger_fn: Default
    69  timestamp_combiner: OUTPUT_AT_EARLIEST
    70  transcript:
    71    - input: [1, 2, 3, 10, 11, 25]
    72    - watermark: 100
    73    - expect:
    74        - {window: [0, 9], values: [1, 2, 3], timestamp: 1}
    75        - {window: [10, 19], values: [10, 11], timestamp: 10}
    76        - {window: [20, 29], values: [25], timestamp: 25, late: false}
    77  
    78  ---
    79  name: timestamp_combiner_latest
    80  window_fn: FixedWindows(10)
    81  trigger_fn: Default
    82  timestamp_combiner: OUTPUT_AT_LATEST
    83  transcript:
    84    - input: [1, 2, 3, 10, 11, 25]
    85    - watermark: 100
    86    - expect:
    87        - {window: [0, 9], values: [1, 2, 3], timestamp: 3}
    88        - {window: [10, 19], values: [10, 11], timestamp: 11}
    89        - {window: [20, 29], values: [25], timestamp: 25, late: false}
    90  
    91  ---
    92  # Test that custom timestamping is not invoked.
    93  name: timestamp_combiner_custom_timestamping_eow
    94  window_fn: CustomTimestampingFixedWindowsWindowFn(10)
    95  trigger_fn: Default
    96  timestamp_combiner: OUTPUT_AT_EOW
    97  transcript:
    98    - input: [1, 2, 3, 10, 11, 25]
    99    - watermark: 100
   100    - expect:
   101        - {window: [0, 9], values: [1, 2, 3], timestamp: 9}
   102        - {window: [10, 19], values: [10, 11], timestamp: 19}
   103        - {window: [20, 29], values: [25], timestamp: 29, late: false}
   104  
   105  ---
   106  # Test that custom timestamping is not invoked.
   107  name: timestamp_combiner_custom_timestamping_earliest
   108  window_fn: CustomTimestampingFixedWindowsWindowFn(10)
   109  trigger_fn: Default
   110  timestamp_combiner: OUTPUT_AT_EARLIEST
   111  transcript:
   112    - input: [1, 2, 3, 10, 11, 25]
   113    - watermark: 100
   114    - expect:
   115        - {window: [0, 9], values: [1, 2, 3], timestamp: 1}
   116        - {window: [10, 19], values: [10, 11], timestamp: 10}
   117        - {window: [20, 29], values: [25], timestamp: 25, late: false}
   118  
   119  ---
   120  # Test that custom timestamping is in fact invoked.
   121  name: timestamp_combiner_custom_timestamping_earliest
   122  broken_on:
   123    - SwitchingDirectRunner  # unsupported OUTPUT_AT_EARLIEST_TRANSFORMED
   124  window_fn: CustomTimestampingFixedWindowsWindowFn(10)
   125  trigger_fn: Default
   126  timestamp_combiner: OUTPUT_AT_EARLIEST_TRANSFORMED
   127  transcript:
   128    - input: [1, 2, 3, 10, 11, 25]
   129    - watermark: 100
   130    - expect:
   131        - {window: [0, 9], values: [1, 2, 3], timestamp: 101}
   132        - {window: [10, 19], values: [10, 11], timestamp: 110}
   133        - {window: [20, 29], values: [25], timestamp: 125, late: false}
   134  
   135  ---
   136  name: early_late_sessions
   137  broken_on:
   138    # Watermark regresses, causing what should be late data to not be late.
   139    - SwitchingDirectRunner
   140  window_fn: Sessions(10)
   141  trigger_fn: AfterWatermark(early=AfterCount(2), late=AfterCount(3))
   142  allowed_lateness: 100
   143  timestamp_combiner: OUTPUT_AT_EOW
   144  transcript:
   145      - input: [1, 2]
   146      - expect:
   147          - {window: [1, 11], values: [1, 2], timestamp: 11, early: true, index: 0}
   148      - input: [3]    # no output
   149      - input: [4]
   150      - expect:
   151          - {window: [1, 13], values: [1, 2, 3, 4], timestamp: 13, early: true, index: 0}
   152      - input: [5]
   153      - watermark: 100
   154      - expect:
   155          - {window: [1, 14], values:[1, 2, 3, 4, 5], timestamp: 14,
   156             index: 0, nonspeculative_index: 0}
   157      - input: [1]
   158      - input: [3, 4]
   159      - expect:
   160          - {window: [1, 14], values: [1, 1, 2, 3, 3, 4, 4, 5], timestamp: 14,
   161             final: false, index: 1, nonspeculative_index: 1}
   162  
   163  ---
   164  name: discarding_early_fixed
   165  window_fn: FixedWindows(10)
   166  trigger_fn: AfterWatermark(early=AfterCount(2))
   167  timestamp_combiner: OUTPUT_AT_EOW
   168  accumulation_mode: discarding
   169  transcript:
   170  - input: [1, 2]
   171  - expect:
   172    - {window: [0, 9], values: [1, 2], timestamp: 9, early: true, index: 0}
   173  - input: [4]    # no output
   174  - input: [14]   # no output
   175  - input: [5]
   176  - expect:
   177    - {window: [0, 9], values: [4, 5], timestamp: 9, early: true, index: 1}
   178  - input: [18]
   179  - expect:
   180    - {window: [10, 19], values: [14, 18], timestamp: 19, early: true, index: 0}
   181  - input: [6]
   182  - watermark: 100
   183  - expect:
   184    - {window: [0, 9], values: [6], timestamp: 9, early: false, late: false,
   185       final: true, index: 2, nonspeculative_index: 0}
   186    - {window: [10, 19], values: [], timestamp: 19, early: false, late: false,
   187       final: true, index: 1, nonspeculative_index: 0}
   188  
   189  ---
   190  name: garbage_collection
   191  broken_on:
   192    - SwitchingDirectRunner  # claims pipeline stall
   193  window_fn: FixedWindows(10)
   194  trigger_fn: AfterCount(2)
   195  timestamp_combiner: OUTPUT_AT_EOW
   196  allowed_lateness: 10
   197  accumulation_mode: discarding
   198  transcript:
   199    - input: [1, 2, 3, 10, 11, 25]
   200    - expect:
   201        - {window: [0, 9], timestamp: 9}
   202        - {window: [10, 19], timestamp: 19}
   203    - state:
   204        present: [[20, 29]]
   205        absent: [[0, 9]]
   206        tombstone: [[10, 19]]
   207  
   208  ---
   209  name: known_late_data_watermark
   210  broken_on:
   211    - SwitchingDirectRunner  # bad timestamp
   212  window_fn: FixedWindows(10)
   213  trigger_fn: Default
   214  timestamp_combiner: OUTPUT_AT_EARLIEST
   215  transcript:
   216    - watermark: 5
   217    - input: [2, 3, 7, 8]
   218    - watermark: 11
   219    - expect:
   220        - {window: [0, 9], values: [2, 3, 7, 8], timestamp: 7}
   221  
   222  ---
   223  name: known_late_data_no_watermark_hold_possible
   224  broken_on:
   225    - SwitchingDirectRunner  # bad timestamp
   226  window_fn: FixedWindows(10)
   227  trigger_fn: Default
   228  timestamp_combiner: OUTPUT_AT_EARLIEST
   229  transcript:
   230    - watermark: 8
   231    - input: [2, 3, 7]
   232    - watermark: 11
   233    - expect:
   234        - {window: [0, 9], values: [2, 3, 7], timestamp: 9}
   235  
   236  # These next examples test that bad/incomplete transcripts are rejected.
   237  ---
   238  name: bad_output
   239  error: Unmatched output
   240  windowfn: FixedWindows(10)
   241  transcript:
   242    - input: [1, 2, 3]
   243    - expect:
   244        - {window: [0, 9], values: [1, 2, 3]}  # bad
   245    - watermark: 100
   246  
   247  ---
   248  name: bad_expected_values
   249  error: Unmatched output
   250  window_fn: FixedWindows(10)
   251  transcript:
   252    - input: [1, 2, 3]
   253    - watermark: 100
   254    - expect:
   255        - {window: [0, 9], values: [1, 2]}  # bad values
   256  
   257  ---
   258  name: bad_expected_window
   259  error: Unmatched output
   260  window_fn: FixedWindows(10)
   261  transcript:
   262    - input: [1, 2, 3]
   263    - watermark: 100
   264    - expect:
   265        - {window: [0, 19], values: [1, 2, 3]}  # bad window
   266  
   267  ---
   268  name: missing_output
   269  error: Unexpected output
   270  window_fn: FixedWindows(10)
   271  transcript:
   272     - input: [1, 2, 3]
   273     - watermark: 100
   274     # missing output
   275     - watermark: 200
   276  
   277  ---
   278  name: missing_output_at_end
   279  error: Unexpected output
   280  window_fn: FixedWindows(10)
   281  transcript:
   282     - input: [1, 2, 3]
   283     - watermark: 100
   284     # missing output