github.com/apache/beam/sdks/v2@v2.48.2/go/test/load/group_by_key/group_by_key.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  // This is GroupByKey load test with Synthetic Source.
    17  
    18  package main
    19  
    20  import (
    21  	"context"
    22  	"flag"
    23  
    24  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    25  	"github.com/apache/beam/sdks/v2/go/pkg/beam/io/synthetic"
    26  	"github.com/apache/beam/sdks/v2/go/pkg/beam/log"
    27  	"github.com/apache/beam/sdks/v2/go/pkg/beam/register"
    28  	"github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx"
    29  	"github.com/apache/beam/sdks/v2/go/test/load"
    30  )
    31  
    32  var (
    33  	fanout = flag.Int(
    34  		"fanout",
    35  		1,
    36  		"A number of GroupByKey operations to perform in parallel.")
    37  	iterations = flag.Int(
    38  		"iterations",
    39  		1,
    40  		"A number of reiterations over per-key-grouped values to perform.")
    41  	syntheticConfig = flag.String(
    42  		"input_options",
    43  		"",
    44  		"A JSON object that describes the configuration for synthetic source.")
    45  )
    46  
    47  func parseSyntheticConfig() synthetic.SourceConfig {
    48  	if *syntheticConfig == "" {
    49  		panic("--input_options not provided")
    50  	} else {
    51  		encoded := []byte(*syntheticConfig)
    52  		return synthetic.DefaultSourceConfig().BuildFromJSON(encoded)
    53  	}
    54  }
    55  
    56  func init() {
    57  	register.DoFn2x2[[]byte, func(*[]byte) bool, []byte, []byte]((*ungroupAndReiterateFn)(nil))
    58  	register.Iter1[[]byte]()
    59  }
    60  
    61  // ungroupAndReiterateFn reiterates given number of times over GBK's output.
    62  type ungroupAndReiterateFn struct {
    63  	Iterations int
    64  }
    65  
    66  // TODO use re-iterators once supported.
    67  
    68  func (fn *ungroupAndReiterateFn) ProcessElement(key []byte, values func(*[]byte) bool) ([]byte, []byte) {
    69  	var value []byte
    70  	for i := 0; i < fn.Iterations; i++ {
    71  		for values(&value) {
    72  			if i == fn.Iterations-1 {
    73  				return key, value
    74  			}
    75  		}
    76  	}
    77  	return key, []byte{0}
    78  }
    79  
    80  func main() {
    81  	flag.Parse()
    82  	beam.Init()
    83  
    84  	ctx := context.Background()
    85  
    86  	p, s := beam.NewPipelineWithRoot()
    87  	src := synthetic.SourceSingle(s, parseSyntheticConfig())
    88  	src = beam.ParDo(s, &load.RuntimeMonitor{}, src)
    89  	for i := 0; i < *fanout; i++ {
    90  		pcoll := beam.GroupByKey(s, src)
    91  		pcoll = beam.ParDo(s, &ungroupAndReiterateFn{*iterations}, pcoll)
    92  		pcoll = beam.ParDo(s, &load.RuntimeMonitor{}, pcoll)
    93  		_ = pcoll
    94  	}
    95  
    96  	presult, err := beamx.RunWithMetrics(ctx, p)
    97  	if err != nil {
    98  		log.Fatalf(ctx, "Failed to execute job: %v", err)
    99  	}
   100  
   101  	if presult != nil {
   102  		metrics := presult.Metrics().AllMetrics()
   103  		load.PublishMetrics(metrics)
   104  	}
   105  }