github.com/apache/beam/sdks/v2@v2.48.2/go/test/integration/primitives/windowinto.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one or more 2 // contributor license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright ownership. 4 // The ASF licenses this file to You under the Apache License, Version 2.0 5 // (the "License"); you may not use this file except in compliance with 6 // the License. You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 package primitives 17 18 import ( 19 "time" 20 21 "github.com/apache/beam/sdks/v2/go/pkg/beam" 22 "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime" 23 "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window" 24 "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window/trigger" 25 "github.com/apache/beam/sdks/v2/go/pkg/beam/register" 26 "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert" 27 "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/teststream" 28 "github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/stats" 29 ) 30 31 func init() { 32 register.Function4x2(sumPerKey) 33 register.Function3x0(sumSideInputs) 34 register.DoFn2x0[[]byte, func(beam.EventTime, string, int)](&createTimestampedData{}) 35 36 register.Emitter3[beam.EventTime, string, int]() 37 register.Emitter1[int]() 38 register.Iter1[int]() 39 } 40 41 // createTimestampedData produces data timestamped with the ordinal. 42 type createTimestampedData struct { 43 Data []int 44 } 45 46 func (f *createTimestampedData) ProcessElement(_ []byte, emit func(beam.EventTime, string, int)) { 47 for i, v := range f.Data { 48 timestamp := mtime.FromMilliseconds(int64((i + 1) * 1000)).Subtract(10 * time.Millisecond) 49 emit(timestamp, "magic", v) 50 } 51 } 52 53 // WindowSums produces a pipeline that generates the numbers of a 3x3 magic square, and 54 // configures the pipeline so that PCollection. Sum is a closure to handle summing data over the window, in a few conditions. 55 func WindowSums(s beam.Scope, sumPerKey func(beam.Scope, beam.PCollection) beam.PCollection) { 56 timestampedData := beam.ParDo(s, &createTimestampedData{Data: []int{4, 9, 2, 3, 5, 7, 8, 1, 6}}, beam.Impulse(s)) 57 58 windowSize := 3 * time.Second 59 60 validate := func(s beam.Scope, wfn *window.Fn, in beam.PCollection, expected ...any) { 61 // Window the data. 62 windowed := beam.WindowInto(s, wfn, in) 63 // Perform the appropriate sum operation. 64 sums := sumPerKey(s, windowed) 65 // Drop back to Global windows, and drop the key otherwise passert.Equals doesn't work. 66 sums = beam.WindowInto(s, window.NewGlobalWindows(), sums) 67 sums = beam.DropKey(s, sums) 68 passert.Equals(s, sums, expected...) 69 } 70 71 // Use fixed windows to divide the data into 3 chunks. 72 validate(s.Scope("Fixed"), window.NewFixedWindows(windowSize), timestampedData, 15, 15, 15) 73 // This should be identical to the "fixed" windows. 74 validate(s.Scope("SlidingFixed"), window.NewSlidingWindows(windowSize, windowSize), timestampedData, 15, 15, 15) 75 // This will have overlap, but each value should be a multiple of the magic number. 76 validate(s.Scope("Sliding"), window.NewSlidingWindows(windowSize, 3*windowSize), timestampedData, 15, 30, 45, 30, 15) 77 // With such a large gap, there should be a single session which will sum to 45. 78 validate(s.Scope("Session"), window.NewSessions(windowSize), timestampedData, 45) 79 } 80 81 func sumPerKey(ws beam.Window, ts beam.EventTime, key beam.U, iter func(*int) bool) (beam.U, int) { 82 var v, sum int 83 for iter(&v) { 84 sum += v 85 } 86 return key, sum 87 } 88 89 func gbkSumPerKey(s beam.Scope, in beam.PCollection) beam.PCollection { 90 grouped := beam.GroupByKey(s, in) 91 return beam.ParDo(s, sumPerKey, grouped) 92 } 93 94 func WindowSums_GBK(s beam.Scope) { 95 WindowSums(s.Scope("GBK"), gbkSumPerKey) 96 } 97 98 func WindowSums_Lifted(s beam.Scope) { 99 WindowSums(s.Scope("Lifted"), stats.SumPerKey) 100 } 101 102 // ValidateWindowedSideInputs checks that side inputs have accurate windowing information when used. 103 func ValidateWindowedSideInputs(s beam.Scope) { 104 timestampedData := beam.ParDo(s, &createTimestampedData{Data: []int{1, 2, 3}}, beam.Impulse(s)) 105 106 timestampedData = beam.DropKey(s, timestampedData) 107 108 windowSize := 1 * time.Second 109 110 validateSums := func(s beam.Scope, wfn, sideFn *window.Fn, in, side beam.PCollection, expected ...any) { 111 wData := beam.WindowInto(s, wfn, in) 112 wSide := beam.WindowInto(s, sideFn, side) 113 114 sums := beam.ParDo(s, sumSideInputs, wData, beam.SideInput{Input: wSide}) 115 116 sums = beam.WindowInto(s, window.NewGlobalWindows(), sums) 117 118 passert.Equals(s, sums, expected...) 119 } 120 121 validateSums(s.Scope("Fixed-Global"), window.NewFixedWindows(windowSize), window.NewGlobalWindows(), timestampedData, timestampedData, 7, 8, 9) 122 validateSums(s.Scope("Fixed-Same"), window.NewFixedWindows(windowSize), window.NewFixedWindows(windowSize), timestampedData, timestampedData, 2, 4, 6) 123 validateSums(s.Scope("Fixed-Big"), window.NewFixedWindows(windowSize), window.NewFixedWindows(10*time.Second), timestampedData, timestampedData, 7, 8, 9) 124 // Main: With window size 1, each window contains 1 element (1, 2, 3) 125 // Side: Window size 2 with period 1, so each window covers 2 seconds of time 126 // Have [1], [1,2], [2,3], [3] 127 // Each main input should map to the earliest occuring sliding window it maps to: 128 // (1, [1]) = 2 129 // (2, [1, 2]) = 5 130 // (3, [2, 3]) = 8 131 validateSums(s.Scope("Fixed-Sliding"), window.NewFixedWindows(windowSize), window.NewSlidingWindows(windowSize, 2*windowSize), timestampedData, timestampedData, 2, 5, 8) 132 // Main: Window size 2 with period 1, so each window has up to two elements 133 // Have [1], [1,2], [2,3], [3] 134 // Side: With window size 1, each window contains 1 element (1, 2, 3) 135 // Each main input will map to the window its latest timestamp corresponds to: 136 // ([1], 1) = 2 137 // ([1, 2], 2) = 3, 4 138 // ([2, 3], 3) = 5, 6 139 // ([3], -) = 3 140 validateSums(s.Scope("Sliding-Fixed"), window.NewSlidingWindows(windowSize, 2*windowSize), window.NewFixedWindows(windowSize), timestampedData, timestampedData, 2, 3, 4, 5, 6, 3) 141 } 142 143 func sumSideInputs(input int, iter func(*int) bool, emit func(int)) { 144 var v, sum int 145 sum += input 146 for iter(&v) { 147 sum += v 148 } 149 emit(sum) 150 } 151 152 func validateEquals(s beam.Scope, wfn *window.Fn, in beam.PCollection, opts []beam.WindowIntoOption, expected ...any) { 153 windowed := beam.WindowInto(s, wfn, in, opts...) 154 sums := stats.Sum(s, windowed) 155 sums = beam.WindowInto(s, window.NewGlobalWindows(), sums) 156 passert.Equals(s, sums, expected...) 157 } 158 159 // TriggerDefault tests the default trigger which fires the pane after the end of the window 160 func TriggerDefault(s beam.Scope) { 161 con := teststream.NewConfig() 162 con.AddElements(1000, 1.0, 2.0, 3.0) 163 con.AdvanceWatermark(11000) 164 con.AddElements(12000, 4.0, 5.0) 165 con.AdvanceWatermark(13000) 166 167 col := teststream.Create(s, con) 168 windowSize := 10 * time.Second 169 validateEquals(s.Scope("Fixed"), window.NewFixedWindows(windowSize), col, 170 []beam.WindowIntoOption{ 171 beam.Trigger(trigger.Default()), 172 }, 6.0, 9.0) 173 } 174 175 // TriggerAlways tests the Always trigger, it is expected to receive every input value as the output. 176 func TriggerAlways(s beam.Scope) { 177 con := teststream.NewConfig() 178 con.AddElements(1000, 1.0, 2.0, 3.0) 179 con.AdvanceWatermark(11000) 180 col := teststream.Create(s, con) 181 windowSize := 10 * time.Second 182 183 validateEquals(s.Scope("Fixed"), window.NewFixedWindows(windowSize), col, 184 []beam.WindowIntoOption{ 185 beam.Trigger(trigger.Always()), 186 }, 1.0, 2.0, 3.0) 187 } 188 189 // validateCount handles cases where we can only be sure of the count of elements 190 // and not their ordering. 191 func validateCount(s beam.Scope, wfn *window.Fn, in beam.PCollection, opts []beam.WindowIntoOption, expected int) { 192 windowed := beam.WindowInto(s, wfn, in, opts...) 193 sums := stats.Sum(s, windowed) 194 sums = beam.WindowInto(s, window.NewGlobalWindows(), sums) 195 passert.Count(s, sums, "total collections", expected) 196 } 197 198 // TriggerElementCount tests the ElementCount Trigger, it waits for atleast N elements to be ready 199 // to fire an output pane 200 func TriggerElementCount(s beam.Scope) { 201 con := teststream.NewConfig() 202 con.AddElements(1000, 1.0, 2.0, 3.0) 203 con.AdvanceWatermark(2000) 204 con.AddElements(6000, 4.0, 5.0) 205 con.AdvanceWatermark(10000) 206 con.AddElements(52000, 10.0) 207 con.AdvanceWatermark(53000) 208 209 col := teststream.Create(s, con) 210 windowSize := 10 * time.Second 211 212 // waits only for two elements to arrive and fires output after that and never fires that. 213 // For the trigger to fire every 2 elements, combine it with Repeat Trigger 214 validateCount(s.Scope("Fixed"), window.NewFixedWindows(windowSize), col, 215 []beam.WindowIntoOption{ 216 beam.Trigger(trigger.AfterCount(2)), 217 }, 2) 218 } 219 220 // TriggerAfterProcessingTime tests the AfterProcessingTime Trigger, it fires output panes once 't' processing time has passed 221 // Not yet supported by the flink runner: 222 // java.lang.UnsupportedOperationException: Advancing Processing time is not supported by the Flink Runner. 223 func TriggerAfterProcessingTime(s beam.Scope) { 224 con := teststream.NewConfig() 225 con.AdvanceProcessingTime(100) 226 con.AddElements(1000, 1.0, 2.0, 3.0) 227 con.AdvanceProcessingTime(2000) 228 con.AddElements(22000, 4.0) 229 230 col := teststream.Create(s, con) 231 232 validateEquals(s.Scope("Global"), window.NewGlobalWindows(), col, 233 []beam.WindowIntoOption{ 234 beam.Trigger(trigger.AfterProcessingTime().PlusDelay(5 * time.Second)), 235 }, 6.0) 236 } 237 238 // TriggerRepeat tests the repeat trigger. As of now is it is configure to take only one trigger as a subtrigger. 239 // In the below test, it is expected to receive three output panes with two elements each. 240 func TriggerRepeat(s beam.Scope) { 241 // create a teststream pipeline and get the pcollection 242 con := teststream.NewConfig() 243 con.AddElements(1000, 1.0, 2.0, 3.0) 244 con.AdvanceWatermark(2000) 245 con.AddElements(6000, 4.0, 5.0, 6.0) 246 con.AdvanceWatermark(10000) 247 248 col := teststream.Create(s, con) 249 250 validateCount(s.Scope("Global"), window.NewGlobalWindows(), col, 251 []beam.WindowIntoOption{ 252 beam.Trigger(trigger.Repeat(trigger.AfterCount(2))), 253 }, 3) 254 } 255 256 // TriggerAfterEndOfWindow tests the AfterEndOfWindow Trigger. With AfterCount(2) as the early firing trigger and AfterCount(1) as late firing trigger. 257 // It fires two times, one with early firing when there are two elements while the third elements waits in. This third element is fired in the late firing. 258 func TriggerAfterEndOfWindow(s beam.Scope) { 259 con := teststream.NewConfig() 260 con.AddElements(1000, 1.0, 2.0, 3.0) 261 con.AdvanceWatermark(11000) 262 263 col := teststream.Create(s, con) 264 windowSize := 10 * time.Second 265 trigger := trigger.AfterEndOfWindow(). 266 EarlyFiring(trigger.AfterCount(2)). 267 LateFiring(trigger.AfterCount(1)) 268 269 validateCount(s.Scope("Fixed"), window.NewFixedWindows(windowSize), col, 270 []beam.WindowIntoOption{ 271 beam.Trigger(trigger), 272 }, 2) 273 } 274 275 // TriggerAfterAll tests AfterAll trigger. The output pane is fired when all triggers in the subtriggers 276 // are ready. In this test, since trigger.AfterCount(int32(5)) won't be ready unless we see 5 elements, 277 // trigger.Always() won't fire until we meet that condition. So we fire only once when we see the 5th element. 278 func TriggerAfterAll(s beam.Scope) { 279 con := teststream.NewConfig() 280 con.AddElements(1000, 1.0, 2.0, 3.0, 5.0, 8.0) 281 con.AdvanceWatermark(11000) 282 283 col := teststream.Create(s, con) 284 trigger := trigger.Repeat( 285 trigger.AfterAll( 286 []trigger.Trigger{ 287 trigger.Always(), 288 trigger.AfterCount(int32(5)), 289 }, 290 ), 291 ) 292 293 validateCount(s.Scope("Global"), window.NewFixedWindows(10*time.Second), col, 294 []beam.WindowIntoOption{ 295 beam.Trigger(trigger), 296 }, 1) 297 } 298 299 // TriggerAfterEach tests AfterEach trigger. The output pane is fired after each trigger 300 // is ready in the order set in subtriggers. In this test, since trigger.AfterCount(int32(3)) is first, 301 // first pane is fired after 3 elements, then a pane is fired each for trigger.Always() for 302 // element 5.0 and 8.0 303 func TriggerAfterEach(s beam.Scope) { 304 con := teststream.NewConfig() 305 con.AddElements(1000, 1.0, 2.0, 3.0, 5.0, 8.0) 306 con.AdvanceWatermark(11000) 307 308 col := teststream.Create(s, con) 309 trigger := trigger.Repeat( 310 trigger.AfterEach( 311 []trigger.Trigger{ 312 trigger.AfterCount(int32(3)), 313 trigger.Always(), 314 }, 315 ), 316 ) 317 318 validateCount(s.Scope("Global"), window.NewGlobalWindows(), col, 319 []beam.WindowIntoOption{ 320 beam.Trigger(trigger), 321 }, 3) 322 } 323 324 // TriggerAfterAny tests AfterAny trigger. In this test, trigger.Always() gets ready everytime. 325 // So we would expect panes to be fired at every element irrespective of checking for other triggers. 326 func TriggerAfterAny(s beam.Scope) { 327 con := teststream.NewConfig() 328 con.AddElements(1000, 1.0, 2.0, 3.0) 329 con.AdvanceWatermark(11000) 330 con.AddElements(12000, 5.0, 8.0) 331 332 col := teststream.Create(s, con) 333 trigger := trigger.Repeat( 334 trigger.AfterAny( 335 []trigger.Trigger{ 336 trigger.AfterCount(int32(3)), 337 trigger.Always(), 338 }, 339 ), 340 ) 341 windowSize := 10 * time.Second 342 validateCount(s.Scope("Global"), window.NewFixedWindows(windowSize), col, 343 []beam.WindowIntoOption{ 344 beam.Trigger(trigger), 345 }, 5) 346 } 347 348 // TriggerAfterSynchronizedProcessingTime tests AfterSynchronizedProcessingTime trigger. It fires at the window 349 // expiration since the times doesn't synchronize in this test case. 350 func TriggerAfterSynchronizedProcessingTime(s beam.Scope) { 351 con := teststream.NewConfig() 352 con.AddElements(1000, 1.0, 2.0, 3.0) 353 con.AdvanceWatermark(11000) 354 con.AddElements(12000, 5.0, 8.0) 355 356 col := teststream.Create(s, con) 357 trigger := trigger.Repeat(trigger.AfterSynchronizedProcessingTime()) 358 windowSize := 10 * time.Second 359 validateCount(s.Scope("Global"), window.NewFixedWindows(windowSize), col, 360 []beam.WindowIntoOption{ 361 beam.Trigger(trigger), 362 }, 2) 363 } 364 365 // TriggerNever tests Never Trigger. It fires at the window expiration. 366 func TriggerNever(s beam.Scope) { 367 con := teststream.NewConfig() 368 con.AddElements(1000, 1.0, 2.0, 3.0) 369 con.AdvanceWatermark(11000) 370 con.AddElements(12000, 5.0, 8.0) 371 372 col := teststream.Create(s, con) 373 trigger := trigger.Never() 374 windowSize := 10 * time.Second 375 validateCount(s.Scope("Global"), window.NewFixedWindows(windowSize), col, 376 []beam.WindowIntoOption{ 377 beam.Trigger(trigger), 378 }, 2) 379 } 380 381 // TriggerOrFinally tests OrFinally trigger. The main trigger in this test case trigger.Always() 382 // is always ready. But the output is produced only when finally trigger is ready. So it is ready at second 383 // element in first window and produces two output panes. Similarly, for the second window. 384 func TriggerOrFinally(s beam.Scope) { 385 con := teststream.NewConfig() 386 con.AddElements(1000, 1.0, 2.0, 3.0) 387 con.AdvanceWatermark(11000) 388 con.AddElements(12000, 5.0, 8.0) 389 390 col := teststream.Create(s, con) 391 trigger := trigger.OrFinally(trigger.Always(), trigger.AfterCount(int32(2))) 392 windowSize := 10 * time.Second 393 validateCount(s.Scope("Global"), window.NewFixedWindows(windowSize), col, 394 []beam.WindowIntoOption{ 395 beam.Trigger(trigger), 396 }, 4) 397 }