github.com/apache/beam/sdks/v2@v2.48.2/go/examples/snippets/04transforms.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package snippets
    17  
    18  import (
    19  	"fmt"
    20  	"math"
    21  	"reflect"
    22  	"sort"
    23  	"strings"
    24  	"time"
    25  
    26  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    27  	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window"
    28  	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/sdf"
    29  	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/state"
    30  	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex"
    31  	"github.com/apache/beam/sdks/v2/go/pkg/beam/io/rtrackers/offsetrange"
    32  	"github.com/apache/beam/sdks/v2/go/pkg/beam/register"
    33  	"github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/stats"
    34  )
    35  
    36  // [START model_pardo_pardo]
    37  
    38  // ComputeWordLengthFn is the DoFn to perform on each element in the input PCollection.
    39  type ComputeWordLengthFn struct{}
    40  
    41  // ProcessElement is the method to execute for each element.
    42  func (fn *ComputeWordLengthFn) ProcessElement(word string, emit func(int)) {
    43  	emit(len(word))
    44  }
    45  
    46  // DoFns must be registered with beam.
    47  func init() {
    48  	beam.RegisterType(reflect.TypeOf((*ComputeWordLengthFn)(nil)))
    49  	// 2 inputs and 0 outputs => DoFn2x0
    50  	// 1 input => Emitter1
    51  	// Input/output types are included in order in the brackets
    52  	register.DoFn2x0[string, func(int)](&ComputeWordLengthFn{})
    53  	register.Emitter1[int]()
    54  }
    55  
    56  // [END model_pardo_pardo]
    57  
    58  // applyWordLen applies ComputeWordLengthFn to words, which must be
    59  // a PCollection<string>
    60  func applyWordLen(s beam.Scope, words beam.PCollection) beam.PCollection {
    61  	// [START model_pardo_apply]
    62  	wordLengths := beam.ParDo(s, &ComputeWordLengthFn{}, words)
    63  	// [END model_pardo_apply]
    64  	return wordLengths
    65  }
    66  
    67  func applyWordLenAnon(s beam.Scope, words beam.PCollection) beam.PCollection {
    68  	// [START model_pardo_apply_anon]
    69  	// Apply an anonymous function as a DoFn PCollection words.
    70  	// Save the result as the PCollection wordLengths.
    71  	wordLengths := beam.ParDo(s, func(word string) int {
    72  		return len(word)
    73  	}, words)
    74  	// [END model_pardo_apply_anon]
    75  	return wordLengths
    76  }
    77  
    78  func applyGbk(s beam.Scope, input []stringPair) beam.PCollection {
    79  	// [START groupbykey]
    80  	// CreateAndSplit creates and returns a PCollection with <K,V>
    81  	// from an input slice of stringPair (struct with K, V string fields).
    82  	pairs := CreateAndSplit(s, input)
    83  	keyed := beam.GroupByKey(s, pairs)
    84  	// [END groupbykey]
    85  	return keyed
    86  }
    87  
    88  // [START cogroupbykey_input_helpers]
    89  
    90  type stringPair struct {
    91  	K, V string
    92  }
    93  
    94  func splitStringPair(e stringPair) (string, string) {
    95  	return e.K, e.V
    96  }
    97  
    98  func init() {
    99  	// Register DoFn.
   100  	register.Function1x2(splitStringPair)
   101  }
   102  
   103  // CreateAndSplit is a helper function that creates
   104  func CreateAndSplit(s beam.Scope, input []stringPair) beam.PCollection {
   105  	initial := beam.CreateList(s, input)
   106  	return beam.ParDo(s, splitStringPair, initial)
   107  }
   108  
   109  // [END cogroupbykey_input_helpers]
   110  
   111  type splittableDoFn struct{}
   112  
   113  type weDoFn struct{}
   114  
   115  // [START bundlefinalization_simplecallback]
   116  
   117  func (fn *splittableDoFn) ProcessElement(bf beam.BundleFinalization, rt *sdf.LockRTracker, element string) {
   118  	// ... produce output ...
   119  
   120  	bf.RegisterCallback(5*time.Minute, func() error {
   121  		// ... perform a side effect ...
   122  
   123  		return nil
   124  	})
   125  }
   126  
   127  // [END bundlefinalization_simplecallback]
   128  
   129  // [START watermarkestimation_customestimator]
   130  
   131  // WatermarkState is a custom type.`
   132  //
   133  // It is optional to write your own state type when making a custom estimator.
   134  type WatermarkState struct {
   135  	Watermark time.Time
   136  }
   137  
   138  // CustomWatermarkEstimator is a custom watermark estimator.
   139  // You may use any type here, including some of Beam's built in watermark estimator types,
   140  // e.g. sdf.WallTimeWatermarkEstimator, sdf.TimestampObservingWatermarkEstimator, and sdf.ManualWatermarkEstimator
   141  type CustomWatermarkEstimator struct {
   142  	state WatermarkState
   143  }
   144  
   145  // CurrentWatermark returns the current watermark and is invoked on DoFn splits and self-checkpoints.
   146  // Watermark estimators must implement CurrentWatermark() time.Time
   147  func (e *CustomWatermarkEstimator) CurrentWatermark() time.Time {
   148  	return e.state.Watermark
   149  }
   150  
   151  // ObserveTimestamp is called on the output timestamps of all
   152  // emitted elements to update the watermark. It is optional
   153  func (e *CustomWatermarkEstimator) ObserveTimestamp(ts time.Time) {
   154  	e.state.Watermark = ts
   155  }
   156  
   157  // InitialWatermarkEstimatorState defines an initial state used to initialize the watermark
   158  // estimator. It is optional. If this is not defined, WatermarkEstimatorState may not be
   159  // defined and CreateWatermarkEstimator must not take in parameters.
   160  func (fn *weDoFn) InitialWatermarkEstimatorState(et beam.EventTime, rest offsetrange.Restriction, element string) WatermarkState {
   161  	// Return some watermark state
   162  	return WatermarkState{Watermark: time.Now()}
   163  }
   164  
   165  // CreateWatermarkEstimator creates the watermark estimator used by this Splittable DoFn.
   166  // Must take in a state parameter if InitialWatermarkEstimatorState is defined, otherwise takes no parameters.
   167  func (fn *weDoFn) CreateWatermarkEstimator(initialState WatermarkState) *CustomWatermarkEstimator {
   168  	return &CustomWatermarkEstimator{state: initialState}
   169  }
   170  
   171  // WatermarkEstimatorState returns the state used to resume future watermark estimation
   172  // after a checkpoint/split. It is required if InitialWatermarkEstimatorState is defined,
   173  // otherwise it must not be defined.
   174  func (fn *weDoFn) WatermarkEstimatorState(e *CustomWatermarkEstimator) WatermarkState {
   175  	return e.state
   176  }
   177  
   178  // ProcessElement is the method to execute for each element.
   179  // It can optionally take in a watermark estimator.
   180  func (fn *weDoFn) ProcessElement(e *CustomWatermarkEstimator, element string) {
   181  	// ...
   182  	e.state.Watermark = time.Now()
   183  }
   184  
   185  // [END watermarkestimation_customestimator]
   186  
   187  // [START sdf_truncate]
   188  
   189  // TruncateRestriction is a transform that is triggered when pipeline starts to drain. It helps to finish a
   190  // pipeline quicker by truncating the restriction.
   191  func (fn *splittableDoFn) TruncateRestriction(rt *sdf.LockRTracker, element string) offsetrange.Restriction {
   192  	start := rt.GetRestriction().(offsetrange.Restriction).Start
   193  	prevEnd := rt.GetRestriction().(offsetrange.Restriction).End
   194  	// truncate the restriction by half.
   195  	newEnd := prevEnd / 2
   196  	return offsetrange.Restriction{
   197  		Start: start,
   198  		End:   newEnd,
   199  	}
   200  }
   201  
   202  // [END sdf_truncate]
   203  
   204  // [START cogroupbykey_output_helpers]
   205  
   206  func formatCoGBKResults(key string, emailIter, phoneIter func(*string) bool) string {
   207  	var s string
   208  	var emails, phones []string
   209  	for emailIter(&s) {
   210  		emails = append(emails, s)
   211  	}
   212  	for phoneIter(&s) {
   213  		phones = append(phones, s)
   214  	}
   215  	// Values have no guaranteed order, sort for deterministic output.
   216  	sort.Strings(emails)
   217  	sort.Strings(phones)
   218  	return fmt.Sprintf("%s; %s; %s", key, formatStringIter(emails), formatStringIter(phones))
   219  }
   220  
   221  func init() {
   222  	register.Function3x1(formatCoGBKResults)
   223  	// 1 input of type string => Iter1[string]
   224  	register.Iter1[string]()
   225  }
   226  
   227  // [END cogroupbykey_output_helpers]
   228  
   229  func formatStringIter(vs []string) string {
   230  	var b strings.Builder
   231  	b.WriteRune('[')
   232  	for i, v := range vs {
   233  		b.WriteRune('\'')
   234  		b.WriteString(v)
   235  		b.WriteRune('\'')
   236  		if i < len(vs)-1 {
   237  			b.WriteString(", ")
   238  		}
   239  	}
   240  	b.WriteRune(']')
   241  	return b.String()
   242  }
   243  
   244  func coGBKExample(s beam.Scope) beam.PCollection {
   245  	// [START cogroupbykey_inputs]
   246  	var emailSlice = []stringPair{
   247  		{"amy", "amy@example.com"},
   248  		{"carl", "carl@example.com"},
   249  		{"julia", "julia@example.com"},
   250  		{"carl", "carl@email.com"},
   251  	}
   252  
   253  	var phoneSlice = []stringPair{
   254  		{"amy", "111-222-3333"},
   255  		{"james", "222-333-4444"},
   256  		{"amy", "333-444-5555"},
   257  		{"carl", "444-555-6666"},
   258  	}
   259  	emails := CreateAndSplit(s.Scope("CreateEmails"), emailSlice)
   260  	phones := CreateAndSplit(s.Scope("CreatePhones"), phoneSlice)
   261  	// [END cogroupbykey_inputs]
   262  
   263  	// [START cogroupbykey_outputs]
   264  	results := beam.CoGroupByKey(s, emails, phones)
   265  
   266  	contactLines := beam.ParDo(s, formatCoGBKResults, results)
   267  	// [END cogroupbykey_outputs]
   268  
   269  	return contactLines
   270  }
   271  
   272  // [START combine_simple_sum]
   273  func sumInts(a, v int) int {
   274  	return a + v
   275  }
   276  
   277  func init() {
   278  	register.Function2x1(sumInts)
   279  }
   280  
   281  func globallySumInts(s beam.Scope, ints beam.PCollection) beam.PCollection {
   282  	return beam.Combine(s, sumInts, ints)
   283  }
   284  
   285  type boundedSum struct {
   286  	Bound int
   287  }
   288  
   289  func (fn *boundedSum) MergeAccumulators(a, v int) int {
   290  	sum := a + v
   291  	if fn.Bound > 0 && sum > fn.Bound {
   292  		return fn.Bound
   293  	}
   294  	return sum
   295  }
   296  
   297  func init() {
   298  	register.Combiner1[int](&boundedSum{})
   299  }
   300  
   301  func globallyBoundedSumInts(s beam.Scope, bound int, ints beam.PCollection) beam.PCollection {
   302  	return beam.Combine(s, &boundedSum{Bound: bound}, ints)
   303  }
   304  
   305  // [END combine_simple_sum]
   306  
   307  // [START combine_custom_average]
   308  
   309  type averageFn struct{}
   310  
   311  type averageAccum struct {
   312  	Count, Sum int
   313  }
   314  
   315  func (fn *averageFn) CreateAccumulator() averageAccum {
   316  	return averageAccum{0, 0}
   317  }
   318  
   319  func (fn *averageFn) AddInput(a averageAccum, v int) averageAccum {
   320  	return averageAccum{Count: a.Count + 1, Sum: a.Sum + v}
   321  }
   322  
   323  func (fn *averageFn) MergeAccumulators(a, v averageAccum) averageAccum {
   324  	return averageAccum{Count: a.Count + v.Count, Sum: a.Sum + v.Sum}
   325  }
   326  
   327  func (fn *averageFn) ExtractOutput(a averageAccum) float64 {
   328  	if a.Count == 0 {
   329  		return math.NaN()
   330  	}
   331  	return float64(a.Sum) / float64(a.Count)
   332  }
   333  
   334  func init() {
   335  	register.Combiner3[averageAccum, int, float64](&averageFn{})
   336  }
   337  
   338  // [END combine_custom_average]
   339  
   340  func globallyAverage(s beam.Scope, ints beam.PCollection) beam.PCollection {
   341  	// [START combine_global_average]
   342  	average := beam.Combine(s, &averageFn{}, ints)
   343  	// [END combine_global_average]
   344  	return average
   345  }
   346  
   347  func globallyAverageWithDefault(s beam.Scope, ints beam.PCollection) beam.PCollection {
   348  	// [START combine_global_with_default]
   349  	// Setting combine defaults has requires no helper function in the Go SDK.
   350  	average := beam.Combine(s, &averageFn{}, ints)
   351  
   352  	// To add a default value:
   353  	defaultValue := beam.Create(s, float64(0))
   354  	avgWithDefault := beam.ParDo(s, func(d float64, iter func(*float64) bool) float64 {
   355  		var c float64
   356  		if iter(&c) {
   357  			// Side input has a value, so return it.
   358  			return c
   359  		}
   360  		// Otherwise, return the default
   361  		return d
   362  	}, defaultValue, beam.SideInput{Input: average})
   363  	// [END combine_global_with_default]
   364  	return avgWithDefault
   365  }
   366  
   367  func perKeyAverage(s beam.Scope, playerAccuracies beam.PCollection) beam.PCollection {
   368  	// [START combine_per_key]
   369  	avgAccuracyPerPlayer := stats.MeanPerKey(s, playerAccuracies)
   370  	// [END combine_per_key]
   371  	return avgAccuracyPerPlayer
   372  }
   373  
   374  func applyFlatten(s beam.Scope, pcol1, pcol2, pcol3 beam.PCollection) beam.PCollection {
   375  	// [START model_multiple_pcollections_flatten]
   376  	merged := beam.Flatten(s, pcol1, pcol2, pcol3)
   377  	// [END model_multiple_pcollections_flatten]
   378  	return merged
   379  }
   380  
   381  type Student struct {
   382  	Percentile int
   383  }
   384  
   385  // [START model_multiple_pcollections_partition_fn]
   386  
   387  func decileFn(student Student) int {
   388  	return int(float64(student.Percentile) / float64(10))
   389  }
   390  
   391  func init() {
   392  	register.Function1x1(decileFn)
   393  }
   394  
   395  // [END model_multiple_pcollections_partition_fn]
   396  
   397  // applyPartition returns the 40th percentile of students.
   398  func applyPartition(s beam.Scope, students beam.PCollection) beam.PCollection {
   399  	// [START model_multiple_pcollections_partition]
   400  	// Partition returns a slice of PCollections
   401  	studentsByPercentile := beam.Partition(s, 10, decileFn, students)
   402  	// Each partition can be extracted by indexing into the slice.
   403  	fortiethPercentile := studentsByPercentile[4]
   404  	// [END model_multiple_pcollections_partition]
   405  	return fortiethPercentile
   406  }
   407  
   408  // [START model_pardo_side_input_dofn]
   409  
   410  // filterWordsAbove is a DoFn that takes in a word,
   411  // and a singleton side input iterator as of a length cut off
   412  // and only emits words that are beneath that cut off.
   413  //
   414  // If the iterator has no elements, an error is returned, aborting processing.
   415  func filterWordsAbove(word string, lengthCutOffIter func(*float64) bool, emitAboveCutoff func(string)) error {
   416  	var cutOff float64
   417  	ok := lengthCutOffIter(&cutOff)
   418  	if !ok {
   419  		return fmt.Errorf("no length cutoff provided")
   420  	}
   421  	if float64(len(word)) > cutOff {
   422  		emitAboveCutoff(word)
   423  	}
   424  	return nil
   425  }
   426  
   427  // filterWordsBelow is a DoFn that takes in a word,
   428  // and a singleton side input of a length cut off
   429  // and only emits words that are beneath that cut off.
   430  //
   431  // If the side input isn't a singleton, a runtime panic will occur.
   432  func filterWordsBelow(word string, lengthCutOff float64, emitBelowCutoff func(string)) {
   433  	if float64(len(word)) <= lengthCutOff {
   434  		emitBelowCutoff(word)
   435  	}
   436  }
   437  
   438  func init() {
   439  	register.Function3x1(filterWordsAbove)
   440  	register.Function3x0(filterWordsBelow)
   441  	// 1 input of type string => Emitter1[string]
   442  	register.Emitter1[string]()
   443  	// 1 input of type float64 => Iter1[float64]
   444  	register.Iter1[float64]()
   445  }
   446  
   447  // [END model_pardo_side_input_dofn]
   448  
   449  // addSideInput demonstrates passing a side input to a DoFn.
   450  func addSideInput(s beam.Scope, words beam.PCollection) (beam.PCollection, beam.PCollection) {
   451  	wordLengths := applyWordLen(s, words)
   452  
   453  	// [START model_pardo_side_input]
   454  	// avgWordLength is a PCollection containing a single element, a singleton.
   455  	avgWordLength := stats.Mean(s, wordLengths)
   456  
   457  	// Side inputs are added as with the beam.SideInput option to beam.ParDo.
   458  	wordsAboveCutOff := beam.ParDo(s, filterWordsAbove, words, beam.SideInput{Input: avgWordLength})
   459  	wordsBelowCutOff := beam.ParDo(s, filterWordsBelow, words, beam.SideInput{Input: avgWordLength})
   460  	// [END model_pardo_side_input]
   461  	return wordsAboveCutOff, wordsBelowCutOff
   462  }
   463  
   464  // isMarkedWord is a dummy function.
   465  func isMarkedWord(word string) bool {
   466  	return strings.HasPrefix(word, "MARKER")
   467  }
   468  
   469  // [START model_multiple_output_dofn]
   470  
   471  // processWords is a DoFn that has 3 output PCollections. The emitter functions
   472  // are matched in positional order to the PCollections returned by beam.ParDo3.
   473  func processWords(word string, emitBelowCutoff, emitAboveCutoff, emitMarked func(string)) {
   474  	const cutOff = 5
   475  	if len(word) < cutOff {
   476  		emitBelowCutoff(word)
   477  	} else {
   478  		emitAboveCutoff(word)
   479  	}
   480  	if isMarkedWord(word) {
   481  		emitMarked(word)
   482  	}
   483  }
   484  
   485  // processWordsMixed demonstrates mixing an emitter, with a standard return.
   486  // If a standard return is used, it will always be the first returned PCollection,
   487  // followed in positional order by the emitter functions.
   488  func processWordsMixed(word string, emitMarked func(string)) int {
   489  	if isMarkedWord(word) {
   490  		emitMarked(word)
   491  	}
   492  	return len(word)
   493  }
   494  
   495  func init() {
   496  	register.Function4x0(processWords)
   497  	register.Function2x1(processWordsMixed)
   498  	// 1 input of type string => Emitter1[string]
   499  	register.Emitter1[string]()
   500  }
   501  
   502  // [END model_multiple_output_dofn]
   503  
   504  func applyMultipleOut(s beam.Scope, words beam.PCollection) (belows, aboves, markeds, lengths, mixedMarkeds beam.PCollection) {
   505  	// [START model_multiple_output]
   506  	// beam.ParDo3 returns PCollections in the same order as
   507  	// the emit function parameters in processWords.
   508  	below, above, marked := beam.ParDo3(s, processWords, words)
   509  
   510  	// processWordsMixed uses both a standard return and an emitter function.
   511  	// The standard return produces the first PCollection from beam.ParDo2,
   512  	// and the emitter produces the second PCollection.
   513  	length, mixedMarked := beam.ParDo2(s, processWordsMixed, words)
   514  	// [END model_multiple_output]
   515  	return below, above, marked, length, mixedMarked
   516  }
   517  
   518  // [START model_paneinfo]
   519  
   520  func extractWordsFn(pn beam.PaneInfo, line string, emitWords func(string)) {
   521  	if pn.Timing == typex.PaneEarly || pn.Timing == typex.PaneOnTime {
   522  		// ... perform operation ...
   523  	}
   524  	if pn.Timing == typex.PaneLate {
   525  		// ... perform operation ...
   526  	}
   527  	if pn.IsFirst {
   528  		// ... perform operation ...
   529  	}
   530  	if pn.IsLast {
   531  		// ... perform operation ...
   532  	}
   533  
   534  	words := strings.Split(line, " ")
   535  	for _, w := range words {
   536  		emitWords(w)
   537  	}
   538  }
   539  
   540  // [END model_paneinfo]
   541  
   542  func contains(s []string, e string) bool {
   543  	for _, a := range s {
   544  		if a == e {
   545  			return true
   546  		}
   547  	}
   548  	return false
   549  }
   550  
   551  // TODO(https://github.com/apache/beam/issues/22737): Update state_and_timers to a good example to demonstrate both state and timers.
   552  // Rename this to bag_state and update the bag state example in the programming guide at that point.
   553  // [START state_and_timers]
   554  
   555  // bagStateFn only emits words that haven't been seen
   556  type bagStateFn struct {
   557  	bag state.Bag[string]
   558  }
   559  
   560  func (s *bagStateFn) ProcessElement(p state.Provider, book string, word string, emitWords func(string)) error {
   561  	// Get all values we've written to this bag state in this window.
   562  	vals, ok, err := s.bag.Read(p)
   563  	if err != nil {
   564  		return err
   565  	}
   566  	if !ok || !contains(vals, word) {
   567  		emitWords(word)
   568  		s.bag.Add(p, word)
   569  	}
   570  
   571  	if len(vals) > 10000 {
   572  		// Example of clearing and starting again with an empty bag
   573  		s.bag.Clear(p)
   574  	}
   575  
   576  	return nil
   577  }
   578  
   579  // [END state_and_timers]
   580  
   581  // [START value_state]
   582  
   583  // valueStateFn keeps track of the number of elements seen.
   584  type valueStateFn struct {
   585  	val state.Value[int]
   586  }
   587  
   588  func (s *valueStateFn) ProcessElement(p state.Provider, book string, word string, emitWords func(string)) error {
   589  	// Get the value stored in our state
   590  	val, ok, err := s.val.Read(p)
   591  	if err != nil {
   592  		return err
   593  	}
   594  	if !ok {
   595  		s.val.Write(p, 1)
   596  	} else {
   597  		s.val.Write(p, val+1)
   598  	}
   599  
   600  	if val > 10000 {
   601  		// Example of clearing and starting again with an empty bag
   602  		s.val.Clear(p)
   603  	}
   604  
   605  	return nil
   606  }
   607  
   608  // [END value_state]
   609  
   610  type MyCustomType struct{}
   611  
   612  func (m MyCustomType) Bytes() []byte {
   613  	return nil
   614  }
   615  
   616  func (m MyCustomType) FromBytes(_ []byte) MyCustomType {
   617  	return m
   618  }
   619  
   620  // [START value_state_coder]
   621  
   622  type valueStateDoFn struct {
   623  	val state.Value[MyCustomType]
   624  }
   625  
   626  func encode(m MyCustomType) []byte {
   627  	return m.Bytes()
   628  }
   629  
   630  func decode(b []byte) MyCustomType {
   631  	return MyCustomType{}.FromBytes(b)
   632  }
   633  
   634  func init() {
   635  	beam.RegisterCoder(reflect.TypeOf((*MyCustomType)(nil)).Elem(), encode, decode)
   636  }
   637  
   638  // [END value_state_coder]
   639  
   640  type combineFn struct{}
   641  
   642  // [START combining_state]
   643  
   644  // combiningStateFn keeps track of the number of elements seen.
   645  type combiningStateFn struct {
   646  	// types are the types of the accumulator, input, and output respectively
   647  	val state.Combining[int, int, int]
   648  }
   649  
   650  func (s *combiningStateFn) ProcessElement(p state.Provider, book string, word string, emitWords func(string)) error {
   651  	// Get the value stored in our state
   652  	val, _, err := s.val.Read(p)
   653  	if err != nil {
   654  		return err
   655  	}
   656  	s.val.Add(p, 1)
   657  
   658  	if val > 10000 {
   659  		// Example of clearing and starting again with an empty bag
   660  		s.val.Clear(p)
   661  	}
   662  
   663  	return nil
   664  }
   665  
   666  func main() {
   667  	// ...
   668  	// CombineFn param can be a simple fn like this or a structural CombineFn
   669  	cFn := state.MakeCombiningState[int, int, int]("stateKey", func(a, b int) int {
   670  		return a + b
   671  	})
   672  	// ...
   673  
   674  	// [END combining_state]
   675  
   676  	fmt.Print(cFn)
   677  }
   678  
   679  type statefulDoFn struct {
   680  	s state.Value[int]
   681  }
   682  
   683  func statefulPipeline() beam.PCollection {
   684  	var s beam.Scope
   685  	var elements beam.PCollection
   686  
   687  	// [START windowed_state]
   688  
   689  	items := beam.ParDo(s, statefulDoFn{}, elements)
   690  	out := beam.WindowInto(s, window.NewFixedWindows(24*time.Hour), items)
   691  
   692  	// [END windowed_state]
   693  
   694  	return out
   695  }
   696  
   697  func init() {
   698  	register.Function3x0(extractWordsFn)
   699  	// 1 input of type string => Emitter1[string]
   700  	register.Emitter1[string]()
   701  }
   702  
   703  // [START countwords_composite]
   704  // CountWords is a function that builds a composite PTransform
   705  // to count the number of times each word appears.
   706  func CountWords(s beam.Scope, lines beam.PCollection) beam.PCollection {
   707  	// A subscope is required for a function to become a composite transform.
   708  	// We assign it to the original scope variable s to shadow the original
   709  	// for the rest of the CountWords function.
   710  	s = s.Scope("CountWords")
   711  
   712  	// Since the same subscope is used for the following transforms,
   713  	// they are in the same composite PTransform.
   714  
   715  	// Convert lines of text into individual words.
   716  	words := beam.ParDo(s, extractWordsFn, lines)
   717  
   718  	// Count the number of times each word occurs.
   719  	wordCounts := stats.Count(s, words)
   720  
   721  	// Return any PCollections that should be available after
   722  	// the composite transform.
   723  	return wordCounts
   724  }
   725  
   726  // [END countwords_composite]