github.com/apache/beam/sdks/v2@v2.48.2/go/examples/xlang/cogroup_by/cogroup_by.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  // cogroup_by exemplifies using a cross-language cogroup by key transform from a test expansion service.
    17  //
    18  // Prerequisites to run wordcount:
    19  // –> [Required] Job needs to be submitted to a portable runner (--runner=universal)
    20  // –> [Required] Endpoint of job service needs to be passed (--endpoint=<ip:port>)
    21  // –> [Required] Endpoint of expansion service needs to be passed (--expansion_addr=<ip:port>)
    22  // –> [Optional] Environment type can be LOOPBACK. Defaults to DOCKER. (--environment_type=LOOPBACK|DOCKER)
    23  package main
    24  
    25  import (
    26  	"context"
    27  	"flag"
    28  	"fmt"
    29  	"log"
    30  	"sort"
    31  
    32  	"github.com/apache/beam/sdks/v2/go/examples/xlang"
    33  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    34  	"github.com/apache/beam/sdks/v2/go/pkg/beam/register"
    35  	"github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert"
    36  	"github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx"
    37  
    38  	// Imports to enable correct filesystem access and runner setup in LOOPBACK mode
    39  	_ "github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem/gcs"
    40  	_ "github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem/local"
    41  	_ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/universal"
    42  )
    43  
    44  var (
    45  	expansionAddr = flag.String("expansion_addr", "", "Address of Expansion Service")
    46  )
    47  
    48  // formatFn is a DoFn that formats a word and its count as a string.
    49  func formatFn(w int64, c []string) string {
    50  	sort.Strings(c)
    51  	return fmt.Sprintf("%v:%v", w, c)
    52  }
    53  
    54  // KV used to represent KV PCollection values
    55  type KV struct {
    56  	X int64
    57  	Y string
    58  }
    59  
    60  func getKV(kv KV, emit func(int64, string)) {
    61  	emit(kv.X, kv.Y)
    62  }
    63  
    64  func sumCounts(key int64, iter1 func(*string) bool) (int64, []string) {
    65  	var val string
    66  	var values []string
    67  
    68  	for iter1(&val) {
    69  		values = append(values, val)
    70  	}
    71  	return key, values
    72  }
    73  
    74  func init() {
    75  	register.Function2x1(formatFn)
    76  	register.Function2x0(getKV)
    77  	register.Function2x2(sumCounts)
    78  
    79  	register.Emitter2[int64, string]()
    80  	register.Iter1[string]()
    81  }
    82  
    83  func main() {
    84  	flag.Parse()
    85  	beam.Init()
    86  
    87  	if *expansionAddr == "" {
    88  		log.Fatal("No expansion address provided")
    89  	}
    90  
    91  	p := beam.NewPipeline()
    92  	s := p.Root()
    93  
    94  	// Using the cross-language transform
    95  	col1 := beam.ParDo(s, getKV, beam.Create(s, KV{X: 0, Y: "1"}, KV{X: 0, Y: "2"}, KV{X: 1, Y: "3"}))
    96  	col2 := beam.ParDo(s, getKV, beam.Create(s, KV{X: 0, Y: "4"}, KV{X: 1, Y: "5"}, KV{X: 1, Y: "6"}))
    97  	c := xlang.CoGroupByKey(s, *expansionAddr, col1, col2)
    98  	sums := beam.ParDo(s, sumCounts, c)
    99  	formatted := beam.ParDo(s, formatFn, sums)
   100  	passert.Equals(s, formatted, "0:[1 2 4]", "1:[3 5 6]")
   101  
   102  	if err := beamx.Run(context.Background(), p); err != nil {
   103  		log.Fatalf("Failed to execute job: %v", err)
   104  	}
   105  }