github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/data/trigger_transcripts.yaml (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 name: fixed_default 19 window_fn: FixedWindows(10) # Python names/syntax, unless otherwise noted. 20 trigger_fn: Default # Same. Empty () may be omitted. 21 transcript: # Ordered list of events. 22 - input: [1, 2, 3, 10, 11] # The elements are the timestamps. 23 - watermark: 25 24 - expect: # Every expected output from the last action. 25 - {window: [0, 9], values: [1, 2, 3], index: 0} 26 - {window: [10, 19], values: [10, 11]} # Partial match on attributes OK. 27 28 --- 29 name: fixed_default_late_data 30 window_fn: FixedWindows(10) 31 trigger_fn: Default 32 timestamp_combiner: OUTPUT_AT_EOW 33 allowed_lateness: 100 34 transcript: 35 - input: [1, 2, 3, 10, 11, 25] 36 - watermark: 100 37 - expect: 38 - {window: [0, 9], values: [1, 2, 3], timestamp: 9, final: false} 39 - {window: [10, 19], values: [10, 11], timestamp: 19} 40 - {window: [20, 29], values: [25], timestamp: 29, late: false} 41 - input: [7] 42 - expect: 43 - {window: [0, 9], values: [1, 2, 3, 7], timestamp: 9, late: true} 44 45 --- 46 name: fixed_drop_late_data_after_allowed_lateness 47 window_fn: FixedWindows(10) 48 trigger_fn: AfterWatermark(early=AfterCount(3), late=AfterCount(1)) 49 timestamp_combiner: OUTPUT_AT_EOW 50 allowed_lateness: 20 51 accumulation_mode: accumulating 52 transcript: 53 - input: [1, 2, 10, 11, 80, 81] 54 - watermark: 100 55 - expect: 56 - {window: [0, 9], values: [1, 2], timestamp: 9} 57 - {window: [10, 19], values: [10, 11], timestamp: 19} 58 - {window: [80, 89], values: [80, 81], timestamp: 89, late: false} 59 - input: [7, 8] # no output 60 - input: [17, 18] # no output 61 - input: [82] 62 - expect: 63 - {window: [80, 89], values: [80, 81, 82], timestamp: 89, late: true} 64 65 --- 66 name: timestamp_combiner_earliest 67 window_fn: FixedWindows(10) 68 trigger_fn: Default 69 timestamp_combiner: OUTPUT_AT_EARLIEST 70 transcript: 71 - input: [1, 2, 3, 10, 11, 25] 72 - watermark: 100 73 - expect: 74 - {window: [0, 9], values: [1, 2, 3], timestamp: 1} 75 - {window: [10, 19], values: [10, 11], timestamp: 10} 76 - {window: [20, 29], values: [25], timestamp: 25, late: false} 77 78 --- 79 name: timestamp_combiner_latest 80 window_fn: FixedWindows(10) 81 trigger_fn: Default 82 timestamp_combiner: OUTPUT_AT_LATEST 83 transcript: 84 - input: [1, 2, 3, 10, 11, 25] 85 - watermark: 100 86 - expect: 87 - {window: [0, 9], values: [1, 2, 3], timestamp: 3} 88 - {window: [10, 19], values: [10, 11], timestamp: 11} 89 - {window: [20, 29], values: [25], timestamp: 25, late: false} 90 91 --- 92 # Test that custom timestamping is not invoked. 93 name: timestamp_combiner_custom_timestamping_eow 94 window_fn: CustomTimestampingFixedWindowsWindowFn(10) 95 trigger_fn: Default 96 timestamp_combiner: OUTPUT_AT_EOW 97 transcript: 98 - input: [1, 2, 3, 10, 11, 25] 99 - watermark: 100 100 - expect: 101 - {window: [0, 9], values: [1, 2, 3], timestamp: 9} 102 - {window: [10, 19], values: [10, 11], timestamp: 19} 103 - {window: [20, 29], values: [25], timestamp: 29, late: false} 104 105 --- 106 # Test that custom timestamping is not invoked. 107 name: timestamp_combiner_custom_timestamping_earliest 108 window_fn: CustomTimestampingFixedWindowsWindowFn(10) 109 trigger_fn: Default 110 timestamp_combiner: OUTPUT_AT_EARLIEST 111 transcript: 112 - input: [1, 2, 3, 10, 11, 25] 113 - watermark: 100 114 - expect: 115 - {window: [0, 9], values: [1, 2, 3], timestamp: 1} 116 - {window: [10, 19], values: [10, 11], timestamp: 10} 117 - {window: [20, 29], values: [25], timestamp: 25, late: false} 118 119 --- 120 # Test that custom timestamping is in fact invoked. 121 name: timestamp_combiner_custom_timestamping_earliest 122 broken_on: 123 - SwitchingDirectRunner # unsupported OUTPUT_AT_EARLIEST_TRANSFORMED 124 window_fn: CustomTimestampingFixedWindowsWindowFn(10) 125 trigger_fn: Default 126 timestamp_combiner: OUTPUT_AT_EARLIEST_TRANSFORMED 127 transcript: 128 - input: [1, 2, 3, 10, 11, 25] 129 - watermark: 100 130 - expect: 131 - {window: [0, 9], values: [1, 2, 3], timestamp: 101} 132 - {window: [10, 19], values: [10, 11], timestamp: 110} 133 - {window: [20, 29], values: [25], timestamp: 125, late: false} 134 135 --- 136 name: early_late_sessions 137 broken_on: 138 # Watermark regresses, causing what should be late data to not be late. 139 - SwitchingDirectRunner 140 window_fn: Sessions(10) 141 trigger_fn: AfterWatermark(early=AfterCount(2), late=AfterCount(3)) 142 allowed_lateness: 100 143 timestamp_combiner: OUTPUT_AT_EOW 144 transcript: 145 - input: [1, 2] 146 - expect: 147 - {window: [1, 11], values: [1, 2], timestamp: 11, early: true, index: 0} 148 - input: [3] # no output 149 - input: [4] 150 - expect: 151 - {window: [1, 13], values: [1, 2, 3, 4], timestamp: 13, early: true, index: 0} 152 - input: [5] 153 - watermark: 100 154 - expect: 155 - {window: [1, 14], values:[1, 2, 3, 4, 5], timestamp: 14, 156 index: 0, nonspeculative_index: 0} 157 - input: [1] 158 - input: [3, 4] 159 - expect: 160 - {window: [1, 14], values: [1, 1, 2, 3, 3, 4, 4, 5], timestamp: 14, 161 final: false, index: 1, nonspeculative_index: 1} 162 163 --- 164 name: discarding_early_fixed 165 window_fn: FixedWindows(10) 166 trigger_fn: AfterWatermark(early=AfterCount(2)) 167 timestamp_combiner: OUTPUT_AT_EOW 168 accumulation_mode: discarding 169 transcript: 170 - input: [1, 2] 171 - expect: 172 - {window: [0, 9], values: [1, 2], timestamp: 9, early: true, index: 0} 173 - input: [4] # no output 174 - input: [14] # no output 175 - input: [5] 176 - expect: 177 - {window: [0, 9], values: [4, 5], timestamp: 9, early: true, index: 1} 178 - input: [18] 179 - expect: 180 - {window: [10, 19], values: [14, 18], timestamp: 19, early: true, index: 0} 181 - input: [6] 182 - watermark: 100 183 - expect: 184 - {window: [0, 9], values: [6], timestamp: 9, early: false, late: false, 185 final: true, index: 2, nonspeculative_index: 0} 186 - {window: [10, 19], values: [], timestamp: 19, early: false, late: false, 187 final: true, index: 1, nonspeculative_index: 0} 188 189 --- 190 name: garbage_collection 191 broken_on: 192 - SwitchingDirectRunner # claims pipeline stall 193 window_fn: FixedWindows(10) 194 trigger_fn: AfterCount(2) 195 timestamp_combiner: OUTPUT_AT_EOW 196 allowed_lateness: 10 197 accumulation_mode: discarding 198 transcript: 199 - input: [1, 2, 3, 10, 11, 25] 200 - expect: 201 - {window: [0, 9], timestamp: 9} 202 - {window: [10, 19], timestamp: 19} 203 - state: 204 present: [[20, 29]] 205 absent: [[0, 9]] 206 tombstone: [[10, 19]] 207 208 --- 209 name: known_late_data_watermark 210 broken_on: 211 - SwitchingDirectRunner # bad timestamp 212 window_fn: FixedWindows(10) 213 trigger_fn: Default 214 timestamp_combiner: OUTPUT_AT_EARLIEST 215 transcript: 216 - watermark: 5 217 - input: [2, 3, 7, 8] 218 - watermark: 11 219 - expect: 220 - {window: [0, 9], values: [2, 3, 7, 8], timestamp: 7} 221 222 --- 223 name: known_late_data_no_watermark_hold_possible 224 broken_on: 225 - SwitchingDirectRunner # bad timestamp 226 window_fn: FixedWindows(10) 227 trigger_fn: Default 228 timestamp_combiner: OUTPUT_AT_EARLIEST 229 transcript: 230 - watermark: 8 231 - input: [2, 3, 7] 232 - watermark: 11 233 - expect: 234 - {window: [0, 9], values: [2, 3, 7], timestamp: 9} 235 236 # These next examples test that bad/incomplete transcripts are rejected. 237 --- 238 name: bad_output 239 error: Unmatched output 240 windowfn: FixedWindows(10) 241 transcript: 242 - input: [1, 2, 3] 243 - expect: 244 - {window: [0, 9], values: [1, 2, 3]} # bad 245 - watermark: 100 246 247 --- 248 name: bad_expected_values 249 error: Unmatched output 250 window_fn: FixedWindows(10) 251 transcript: 252 - input: [1, 2, 3] 253 - watermark: 100 254 - expect: 255 - {window: [0, 9], values: [1, 2]} # bad values 256 257 --- 258 name: bad_expected_window 259 error: Unmatched output 260 window_fn: FixedWindows(10) 261 transcript: 262 - input: [1, 2, 3] 263 - watermark: 100 264 - expect: 265 - {window: [0, 19], values: [1, 2, 3]} # bad window 266 267 --- 268 name: missing_output 269 error: Unexpected output 270 window_fn: FixedWindows(10) 271 transcript: 272 - input: [1, 2, 3] 273 - watermark: 100 274 # missing output 275 - watermark: 200 276 277 --- 278 name: missing_output_at_end 279 error: Unexpected output 280 window_fn: FixedWindows(10) 281 transcript: 282 - input: [1, 2, 3] 283 - watermark: 100 284 # missing output