github.com/apache/beam/sdks/v2@v2.48.2/go/test/load/group_by_key/group_by_key.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one or more 2 // contributor license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright ownership. 4 // The ASF licenses this file to You under the Apache License, Version 2.0 5 // (the "License"); you may not use this file except in compliance with 6 // the License. You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 // This is GroupByKey load test with Synthetic Source. 17 18 package main 19 20 import ( 21 "context" 22 "flag" 23 24 "github.com/apache/beam/sdks/v2/go/pkg/beam" 25 "github.com/apache/beam/sdks/v2/go/pkg/beam/io/synthetic" 26 "github.com/apache/beam/sdks/v2/go/pkg/beam/log" 27 "github.com/apache/beam/sdks/v2/go/pkg/beam/register" 28 "github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx" 29 "github.com/apache/beam/sdks/v2/go/test/load" 30 ) 31 32 var ( 33 fanout = flag.Int( 34 "fanout", 35 1, 36 "A number of GroupByKey operations to perform in parallel.") 37 iterations = flag.Int( 38 "iterations", 39 1, 40 "A number of reiterations over per-key-grouped values to perform.") 41 syntheticConfig = flag.String( 42 "input_options", 43 "", 44 "A JSON object that describes the configuration for synthetic source.") 45 ) 46 47 func parseSyntheticConfig() synthetic.SourceConfig { 48 if *syntheticConfig == "" { 49 panic("--input_options not provided") 50 } else { 51 encoded := []byte(*syntheticConfig) 52 return synthetic.DefaultSourceConfig().BuildFromJSON(encoded) 53 } 54 } 55 56 func init() { 57 register.DoFn2x2[[]byte, func(*[]byte) bool, []byte, []byte]((*ungroupAndReiterateFn)(nil)) 58 register.Iter1[[]byte]() 59 } 60 61 // ungroupAndReiterateFn reiterates given number of times over GBK's output. 62 type ungroupAndReiterateFn struct { 63 Iterations int 64 } 65 66 // TODO use re-iterators once supported. 67 68 func (fn *ungroupAndReiterateFn) ProcessElement(key []byte, values func(*[]byte) bool) ([]byte, []byte) { 69 var value []byte 70 for i := 0; i < fn.Iterations; i++ { 71 for values(&value) { 72 if i == fn.Iterations-1 { 73 return key, value 74 } 75 } 76 } 77 return key, []byte{0} 78 } 79 80 func main() { 81 flag.Parse() 82 beam.Init() 83 84 ctx := context.Background() 85 86 p, s := beam.NewPipelineWithRoot() 87 src := synthetic.SourceSingle(s, parseSyntheticConfig()) 88 src = beam.ParDo(s, &load.RuntimeMonitor{}, src) 89 for i := 0; i < *fanout; i++ { 90 pcoll := beam.GroupByKey(s, src) 91 pcoll = beam.ParDo(s, &ungroupAndReiterateFn{*iterations}, pcoll) 92 pcoll = beam.ParDo(s, &load.RuntimeMonitor{}, pcoll) 93 _ = pcoll 94 } 95 96 presult, err := beamx.RunWithMetrics(ctx, p) 97 if err != nil { 98 log.Fatalf(ctx, "Failed to execute job: %v", err) 99 } 100 101 if presult != nil { 102 metrics := presult.Metrics().AllMetrics() 103 load.PublishMetrics(metrics) 104 } 105 }