github.com/apache/beam/sdks/v2@v2.48.2/go/examples/xlang/group_by/group_by.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  // group_by exemplifies using a cross-language group by key transform from a test expansion service.
    17  //
    18  // Prerequisites to run wordcount:
    19  // –> [Required] Job needs to be submitted to a portable runner (--runner=universal)
    20  // –> [Required] Endpoint of job service needs to be passed (--endpoint=<ip:port>)
    21  // –> [Required] Endpoint of expansion service needs to be passed (--expansion_addr=<ip:port>)
    22  // –> [Optional] Environment type can be LOOPBACK. Defaults to DOCKER. (--environment_type=LOOPBACK|DOCKER)
    23  package main
    24  
    25  import (
    26  	"context"
    27  	"flag"
    28  	"fmt"
    29  	"log"
    30  	"sort"
    31  
    32  	"github.com/apache/beam/sdks/v2/go/examples/xlang"
    33  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    34  	"github.com/apache/beam/sdks/v2/go/pkg/beam/register"
    35  	"github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert"
    36  	"github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx"
    37  
    38  	// Imports to enable correct filesystem access and runner setup in LOOPBACK mode
    39  	_ "github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem/gcs"
    40  	_ "github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem/local"
    41  	_ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/universal"
    42  )
    43  
    44  var (
    45  	expansionAddr = flag.String("expansion_addr", "", "Address of Expansion Service")
    46  )
    47  
    48  // formatFn is a DoFn that formats a word and its count as a string.
    49  func formatFn(w string, c []int) string {
    50  	sort.Ints(c)
    51  	return fmt.Sprintf("%v:%v", w, c)
    52  }
    53  
    54  // KV used to represent KV PCollection values
    55  type KV struct {
    56  	X string
    57  	Y int64
    58  }
    59  
    60  func getKV(kv KV, emit func(string, int64)) {
    61  	emit(kv.X, kv.Y)
    62  }
    63  
    64  func collectValues(key string, iter func(*int64) bool) (string, []int) {
    65  	var count int64
    66  	var values []int
    67  	for iter(&count) {
    68  		values = append(values, int(count))
    69  	}
    70  	return key, values
    71  }
    72  
    73  func init() {
    74  	register.Function2x1(formatFn)
    75  	register.Function2x0(getKV)
    76  	register.Function2x2(collectValues)
    77  
    78  	register.Emitter2[string, int64]()
    79  	register.Iter1[int64]()
    80  }
    81  
    82  func main() {
    83  	flag.Parse()
    84  	beam.Init()
    85  
    86  	if *expansionAddr == "" {
    87  		log.Fatal("No expansion address provided")
    88  	}
    89  
    90  	p := beam.NewPipeline()
    91  	s := p.Root()
    92  
    93  	// Using the cross-language transform
    94  	kvs := beam.Create(s, KV{X: "0", Y: 1}, KV{X: "0", Y: 2}, KV{X: "1", Y: 3})
    95  	in := beam.ParDo(s, getKV, kvs)
    96  	out := xlang.GroupByKey(s, *expansionAddr, in)
    97  
    98  	vals := beam.ParDo(s, collectValues, out)
    99  	formatted := beam.ParDo(s, formatFn, vals)
   100  	passert.Equals(s, formatted, "0:[1 2]", "1:[3]")
   101  
   102  	if err := beamx.Run(context.Background(), p); err != nil {
   103  		log.Fatalf("Failed to execute job: %v", err)
   104  	}
   105  }