github.com/apache/beam/sdks/v2@v2.48.2/go/test/integration/primitives/windowinto.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package primitives
    17  
    18  import (
    19  	"time"
    20  
    21  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    22  	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime"
    23  	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window"
    24  	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window/trigger"
    25  	"github.com/apache/beam/sdks/v2/go/pkg/beam/register"
    26  	"github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert"
    27  	"github.com/apache/beam/sdks/v2/go/pkg/beam/testing/teststream"
    28  	"github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/stats"
    29  )
    30  
    31  func init() {
    32  	register.Function4x2(sumPerKey)
    33  	register.Function3x0(sumSideInputs)
    34  	register.DoFn2x0[[]byte, func(beam.EventTime, string, int)](&createTimestampedData{})
    35  
    36  	register.Emitter3[beam.EventTime, string, int]()
    37  	register.Emitter1[int]()
    38  	register.Iter1[int]()
    39  }
    40  
    41  // createTimestampedData produces data timestamped with the ordinal.
    42  type createTimestampedData struct {
    43  	Data []int
    44  }
    45  
    46  func (f *createTimestampedData) ProcessElement(_ []byte, emit func(beam.EventTime, string, int)) {
    47  	for i, v := range f.Data {
    48  		timestamp := mtime.FromMilliseconds(int64((i + 1) * 1000)).Subtract(10 * time.Millisecond)
    49  		emit(timestamp, "magic", v)
    50  	}
    51  }
    52  
    53  // WindowSums produces a pipeline that generates the numbers of a 3x3 magic square, and
    54  // configures the pipeline so that PCollection. Sum is a closure to handle summing data over the window, in a few conditions.
    55  func WindowSums(s beam.Scope, sumPerKey func(beam.Scope, beam.PCollection) beam.PCollection) {
    56  	timestampedData := beam.ParDo(s, &createTimestampedData{Data: []int{4, 9, 2, 3, 5, 7, 8, 1, 6}}, beam.Impulse(s))
    57  
    58  	windowSize := 3 * time.Second
    59  
    60  	validate := func(s beam.Scope, wfn *window.Fn, in beam.PCollection, expected ...any) {
    61  		// Window the data.
    62  		windowed := beam.WindowInto(s, wfn, in)
    63  		// Perform the appropriate sum operation.
    64  		sums := sumPerKey(s, windowed)
    65  		// Drop back to Global windows, and drop the key otherwise passert.Equals doesn't work.
    66  		sums = beam.WindowInto(s, window.NewGlobalWindows(), sums)
    67  		sums = beam.DropKey(s, sums)
    68  		passert.Equals(s, sums, expected...)
    69  	}
    70  
    71  	// Use fixed windows to divide the data into 3 chunks.
    72  	validate(s.Scope("Fixed"), window.NewFixedWindows(windowSize), timestampedData, 15, 15, 15)
    73  	// This should be identical to the "fixed" windows.
    74  	validate(s.Scope("SlidingFixed"), window.NewSlidingWindows(windowSize, windowSize), timestampedData, 15, 15, 15)
    75  	// This will have overlap, but each value should be a multiple of the magic number.
    76  	validate(s.Scope("Sliding"), window.NewSlidingWindows(windowSize, 3*windowSize), timestampedData, 15, 30, 45, 30, 15)
    77  	// With such a large gap, there should be a single session which will sum to 45.
    78  	validate(s.Scope("Session"), window.NewSessions(windowSize), timestampedData, 45)
    79  }
    80  
    81  func sumPerKey(ws beam.Window, ts beam.EventTime, key beam.U, iter func(*int) bool) (beam.U, int) {
    82  	var v, sum int
    83  	for iter(&v) {
    84  		sum += v
    85  	}
    86  	return key, sum
    87  }
    88  
    89  func gbkSumPerKey(s beam.Scope, in beam.PCollection) beam.PCollection {
    90  	grouped := beam.GroupByKey(s, in)
    91  	return beam.ParDo(s, sumPerKey, grouped)
    92  }
    93  
    94  func WindowSums_GBK(s beam.Scope) {
    95  	WindowSums(s.Scope("GBK"), gbkSumPerKey)
    96  }
    97  
    98  func WindowSums_Lifted(s beam.Scope) {
    99  	WindowSums(s.Scope("Lifted"), stats.SumPerKey)
   100  }
   101  
   102  // ValidateWindowedSideInputs checks that side inputs have accurate windowing information when used.
   103  func ValidateWindowedSideInputs(s beam.Scope) {
   104  	timestampedData := beam.ParDo(s, &createTimestampedData{Data: []int{1, 2, 3}}, beam.Impulse(s))
   105  
   106  	timestampedData = beam.DropKey(s, timestampedData)
   107  
   108  	windowSize := 1 * time.Second
   109  
   110  	validateSums := func(s beam.Scope, wfn, sideFn *window.Fn, in, side beam.PCollection, expected ...any) {
   111  		wData := beam.WindowInto(s, wfn, in)
   112  		wSide := beam.WindowInto(s, sideFn, side)
   113  
   114  		sums := beam.ParDo(s, sumSideInputs, wData, beam.SideInput{Input: wSide})
   115  
   116  		sums = beam.WindowInto(s, window.NewGlobalWindows(), sums)
   117  
   118  		passert.Equals(s, sums, expected...)
   119  	}
   120  
   121  	validateSums(s.Scope("Fixed-Global"), window.NewFixedWindows(windowSize), window.NewGlobalWindows(), timestampedData, timestampedData, 7, 8, 9)
   122  	validateSums(s.Scope("Fixed-Same"), window.NewFixedWindows(windowSize), window.NewFixedWindows(windowSize), timestampedData, timestampedData, 2, 4, 6)
   123  	validateSums(s.Scope("Fixed-Big"), window.NewFixedWindows(windowSize), window.NewFixedWindows(10*time.Second), timestampedData, timestampedData, 7, 8, 9)
   124  	// Main: With window size 1, each window contains 1 element (1, 2, 3)
   125  	// Side: Window size 2 with period 1, so each window covers 2 seconds of time
   126  	//	 Have [1], [1,2], [2,3], [3]
   127  	// Each main input should map to the earliest occuring sliding window it maps to:
   128  	// (1, [1]) = 2
   129  	// (2, [1, 2]) = 5
   130  	// (3, [2, 3]) = 8
   131  	validateSums(s.Scope("Fixed-Sliding"), window.NewFixedWindows(windowSize), window.NewSlidingWindows(windowSize, 2*windowSize), timestampedData, timestampedData, 2, 5, 8)
   132  	// Main: Window size 2 with period 1, so each window has up to two elements
   133  	//	 Have [1], [1,2], [2,3], [3]
   134  	// Side: With window size 1, each window contains 1 element (1, 2, 3)
   135  	// Each main input will map to the window its latest timestamp corresponds to:
   136  	// ([1], 1) = 2
   137  	// ([1, 2], 2) = 3, 4
   138  	// ([2, 3], 3) = 5, 6
   139  	// ([3], -) = 3
   140  	validateSums(s.Scope("Sliding-Fixed"), window.NewSlidingWindows(windowSize, 2*windowSize), window.NewFixedWindows(windowSize), timestampedData, timestampedData, 2, 3, 4, 5, 6, 3)
   141  }
   142  
   143  func sumSideInputs(input int, iter func(*int) bool, emit func(int)) {
   144  	var v, sum int
   145  	sum += input
   146  	for iter(&v) {
   147  		sum += v
   148  	}
   149  	emit(sum)
   150  }
   151  
   152  func validateEquals(s beam.Scope, wfn *window.Fn, in beam.PCollection, opts []beam.WindowIntoOption, expected ...any) {
   153  	windowed := beam.WindowInto(s, wfn, in, opts...)
   154  	sums := stats.Sum(s, windowed)
   155  	sums = beam.WindowInto(s, window.NewGlobalWindows(), sums)
   156  	passert.Equals(s, sums, expected...)
   157  }
   158  
   159  // TriggerDefault tests the default trigger which fires the pane after the end of the window
   160  func TriggerDefault(s beam.Scope) {
   161  	con := teststream.NewConfig()
   162  	con.AddElements(1000, 1.0, 2.0, 3.0)
   163  	con.AdvanceWatermark(11000)
   164  	con.AddElements(12000, 4.0, 5.0)
   165  	con.AdvanceWatermark(13000)
   166  
   167  	col := teststream.Create(s, con)
   168  	windowSize := 10 * time.Second
   169  	validateEquals(s.Scope("Fixed"), window.NewFixedWindows(windowSize), col,
   170  		[]beam.WindowIntoOption{
   171  			beam.Trigger(trigger.Default()),
   172  		}, 6.0, 9.0)
   173  }
   174  
   175  // TriggerAlways tests the Always trigger, it is expected to receive every input value as the output.
   176  func TriggerAlways(s beam.Scope) {
   177  	con := teststream.NewConfig()
   178  	con.AddElements(1000, 1.0, 2.0, 3.0)
   179  	con.AdvanceWatermark(11000)
   180  	col := teststream.Create(s, con)
   181  	windowSize := 10 * time.Second
   182  
   183  	validateEquals(s.Scope("Fixed"), window.NewFixedWindows(windowSize), col,
   184  		[]beam.WindowIntoOption{
   185  			beam.Trigger(trigger.Always()),
   186  		}, 1.0, 2.0, 3.0)
   187  }
   188  
   189  // validateCount handles cases where we can only be sure of the count of elements
   190  // and not their ordering.
   191  func validateCount(s beam.Scope, wfn *window.Fn, in beam.PCollection, opts []beam.WindowIntoOption, expected int) {
   192  	windowed := beam.WindowInto(s, wfn, in, opts...)
   193  	sums := stats.Sum(s, windowed)
   194  	sums = beam.WindowInto(s, window.NewGlobalWindows(), sums)
   195  	passert.Count(s, sums, "total collections", expected)
   196  }
   197  
   198  // TriggerElementCount tests the ElementCount Trigger, it waits for atleast N elements to be ready
   199  // to fire an output pane
   200  func TriggerElementCount(s beam.Scope) {
   201  	con := teststream.NewConfig()
   202  	con.AddElements(1000, 1.0, 2.0, 3.0)
   203  	con.AdvanceWatermark(2000)
   204  	con.AddElements(6000, 4.0, 5.0)
   205  	con.AdvanceWatermark(10000)
   206  	con.AddElements(52000, 10.0)
   207  	con.AdvanceWatermark(53000)
   208  
   209  	col := teststream.Create(s, con)
   210  	windowSize := 10 * time.Second
   211  
   212  	// waits only for two elements to arrive and fires output after that and never fires that.
   213  	// For the trigger to fire every 2 elements, combine it with Repeat Trigger
   214  	validateCount(s.Scope("Fixed"), window.NewFixedWindows(windowSize), col,
   215  		[]beam.WindowIntoOption{
   216  			beam.Trigger(trigger.AfterCount(2)),
   217  		}, 2)
   218  }
   219  
   220  // TriggerAfterProcessingTime tests the AfterProcessingTime Trigger, it fires output panes once 't' processing time has passed
   221  // Not yet supported by the flink runner:
   222  // java.lang.UnsupportedOperationException: Advancing Processing time is not supported by the Flink Runner.
   223  func TriggerAfterProcessingTime(s beam.Scope) {
   224  	con := teststream.NewConfig()
   225  	con.AdvanceProcessingTime(100)
   226  	con.AddElements(1000, 1.0, 2.0, 3.0)
   227  	con.AdvanceProcessingTime(2000)
   228  	con.AddElements(22000, 4.0)
   229  
   230  	col := teststream.Create(s, con)
   231  
   232  	validateEquals(s.Scope("Global"), window.NewGlobalWindows(), col,
   233  		[]beam.WindowIntoOption{
   234  			beam.Trigger(trigger.AfterProcessingTime().PlusDelay(5 * time.Second)),
   235  		}, 6.0)
   236  }
   237  
   238  // TriggerRepeat tests the repeat trigger. As of now is it is configure to take only one trigger as a subtrigger.
   239  // In the below test, it is expected to receive three output panes with two elements each.
   240  func TriggerRepeat(s beam.Scope) {
   241  	// create a teststream pipeline and get the pcollection
   242  	con := teststream.NewConfig()
   243  	con.AddElements(1000, 1.0, 2.0, 3.0)
   244  	con.AdvanceWatermark(2000)
   245  	con.AddElements(6000, 4.0, 5.0, 6.0)
   246  	con.AdvanceWatermark(10000)
   247  
   248  	col := teststream.Create(s, con)
   249  
   250  	validateCount(s.Scope("Global"), window.NewGlobalWindows(), col,
   251  		[]beam.WindowIntoOption{
   252  			beam.Trigger(trigger.Repeat(trigger.AfterCount(2))),
   253  		}, 3)
   254  }
   255  
   256  // TriggerAfterEndOfWindow tests the AfterEndOfWindow Trigger. With AfterCount(2) as the early firing trigger and AfterCount(1) as late firing trigger.
   257  // It fires two times, one with early firing when there are two elements while the third elements waits in. This third element is fired in the late firing.
   258  func TriggerAfterEndOfWindow(s beam.Scope) {
   259  	con := teststream.NewConfig()
   260  	con.AddElements(1000, 1.0, 2.0, 3.0)
   261  	con.AdvanceWatermark(11000)
   262  
   263  	col := teststream.Create(s, con)
   264  	windowSize := 10 * time.Second
   265  	trigger := trigger.AfterEndOfWindow().
   266  		EarlyFiring(trigger.AfterCount(2)).
   267  		LateFiring(trigger.AfterCount(1))
   268  
   269  	validateCount(s.Scope("Fixed"), window.NewFixedWindows(windowSize), col,
   270  		[]beam.WindowIntoOption{
   271  			beam.Trigger(trigger),
   272  		}, 2)
   273  }
   274  
   275  // TriggerAfterAll tests AfterAll trigger. The output pane is fired when all triggers in the subtriggers
   276  // are ready. In this test, since trigger.AfterCount(int32(5)) won't be ready unless we see 5 elements,
   277  // trigger.Always() won't fire until we meet that condition. So we fire only once when we see the 5th element.
   278  func TriggerAfterAll(s beam.Scope) {
   279  	con := teststream.NewConfig()
   280  	con.AddElements(1000, 1.0, 2.0, 3.0, 5.0, 8.0)
   281  	con.AdvanceWatermark(11000)
   282  
   283  	col := teststream.Create(s, con)
   284  	trigger := trigger.Repeat(
   285  		trigger.AfterAll(
   286  			[]trigger.Trigger{
   287  				trigger.Always(),
   288  				trigger.AfterCount(int32(5)),
   289  			},
   290  		),
   291  	)
   292  
   293  	validateCount(s.Scope("Global"), window.NewFixedWindows(10*time.Second), col,
   294  		[]beam.WindowIntoOption{
   295  			beam.Trigger(trigger),
   296  		}, 1)
   297  }
   298  
   299  // TriggerAfterEach tests AfterEach trigger. The output pane is fired after each trigger
   300  // is ready in the order set in subtriggers. In this test, since trigger.AfterCount(int32(3)) is first,
   301  // first pane is fired after 3 elements, then a pane is fired each for trigger.Always() for
   302  // element 5.0 and 8.0
   303  func TriggerAfterEach(s beam.Scope) {
   304  	con := teststream.NewConfig()
   305  	con.AddElements(1000, 1.0, 2.0, 3.0, 5.0, 8.0)
   306  	con.AdvanceWatermark(11000)
   307  
   308  	col := teststream.Create(s, con)
   309  	trigger := trigger.Repeat(
   310  		trigger.AfterEach(
   311  			[]trigger.Trigger{
   312  				trigger.AfterCount(int32(3)),
   313  				trigger.Always(),
   314  			},
   315  		),
   316  	)
   317  
   318  	validateCount(s.Scope("Global"), window.NewGlobalWindows(), col,
   319  		[]beam.WindowIntoOption{
   320  			beam.Trigger(trigger),
   321  		}, 3)
   322  }
   323  
   324  // TriggerAfterAny tests AfterAny trigger. In this test, trigger.Always() gets ready everytime.
   325  // So we would expect panes to be fired at every element irrespective of checking for other triggers.
   326  func TriggerAfterAny(s beam.Scope) {
   327  	con := teststream.NewConfig()
   328  	con.AddElements(1000, 1.0, 2.0, 3.0)
   329  	con.AdvanceWatermark(11000)
   330  	con.AddElements(12000, 5.0, 8.0)
   331  
   332  	col := teststream.Create(s, con)
   333  	trigger := trigger.Repeat(
   334  		trigger.AfterAny(
   335  			[]trigger.Trigger{
   336  				trigger.AfterCount(int32(3)),
   337  				trigger.Always(),
   338  			},
   339  		),
   340  	)
   341  	windowSize := 10 * time.Second
   342  	validateCount(s.Scope("Global"), window.NewFixedWindows(windowSize), col,
   343  		[]beam.WindowIntoOption{
   344  			beam.Trigger(trigger),
   345  		}, 5)
   346  }
   347  
   348  // TriggerAfterSynchronizedProcessingTime tests AfterSynchronizedProcessingTime trigger. It fires at the window
   349  // expiration since the times doesn't synchronize in this test case.
   350  func TriggerAfterSynchronizedProcessingTime(s beam.Scope) {
   351  	con := teststream.NewConfig()
   352  	con.AddElements(1000, 1.0, 2.0, 3.0)
   353  	con.AdvanceWatermark(11000)
   354  	con.AddElements(12000, 5.0, 8.0)
   355  
   356  	col := teststream.Create(s, con)
   357  	trigger := trigger.Repeat(trigger.AfterSynchronizedProcessingTime())
   358  	windowSize := 10 * time.Second
   359  	validateCount(s.Scope("Global"), window.NewFixedWindows(windowSize), col,
   360  		[]beam.WindowIntoOption{
   361  			beam.Trigger(trigger),
   362  		}, 2)
   363  }
   364  
   365  // TriggerNever tests Never Trigger. It fires at the window expiration.
   366  func TriggerNever(s beam.Scope) {
   367  	con := teststream.NewConfig()
   368  	con.AddElements(1000, 1.0, 2.0, 3.0)
   369  	con.AdvanceWatermark(11000)
   370  	con.AddElements(12000, 5.0, 8.0)
   371  
   372  	col := teststream.Create(s, con)
   373  	trigger := trigger.Never()
   374  	windowSize := 10 * time.Second
   375  	validateCount(s.Scope("Global"), window.NewFixedWindows(windowSize), col,
   376  		[]beam.WindowIntoOption{
   377  			beam.Trigger(trigger),
   378  		}, 2)
   379  }
   380  
   381  // TriggerOrFinally tests OrFinally trigger. The main trigger in this test case trigger.Always()
   382  // is always ready. But the output is produced only when finally trigger is ready. So it is ready at second
   383  // element in first window and produces two output panes. Similarly, for the second window.
   384  func TriggerOrFinally(s beam.Scope) {
   385  	con := teststream.NewConfig()
   386  	con.AddElements(1000, 1.0, 2.0, 3.0)
   387  	con.AdvanceWatermark(11000)
   388  	con.AddElements(12000, 5.0, 8.0)
   389  
   390  	col := teststream.Create(s, con)
   391  	trigger := trigger.OrFinally(trigger.Always(), trigger.AfterCount(int32(2)))
   392  	windowSize := 10 * time.Second
   393  	validateCount(s.Scope("Global"), window.NewFixedWindows(windowSize), col,
   394  		[]beam.WindowIntoOption{
   395  			beam.Trigger(trigger),
   396  		}, 4)
   397  }