github.com/apache/beam/sdks/v2@v2.48.2/go/examples/xlang/cogroup_by/cogroup_by.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one or more 2 // contributor license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright ownership. 4 // The ASF licenses this file to You under the Apache License, Version 2.0 5 // (the "License"); you may not use this file except in compliance with 6 // the License. You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 // cogroup_by exemplifies using a cross-language cogroup by key transform from a test expansion service. 17 // 18 // Prerequisites to run wordcount: 19 // –> [Required] Job needs to be submitted to a portable runner (--runner=universal) 20 // –> [Required] Endpoint of job service needs to be passed (--endpoint=<ip:port>) 21 // –> [Required] Endpoint of expansion service needs to be passed (--expansion_addr=<ip:port>) 22 // –> [Optional] Environment type can be LOOPBACK. Defaults to DOCKER. (--environment_type=LOOPBACK|DOCKER) 23 package main 24 25 import ( 26 "context" 27 "flag" 28 "fmt" 29 "log" 30 "sort" 31 32 "github.com/apache/beam/sdks/v2/go/examples/xlang" 33 "github.com/apache/beam/sdks/v2/go/pkg/beam" 34 "github.com/apache/beam/sdks/v2/go/pkg/beam/register" 35 "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert" 36 "github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx" 37 38 // Imports to enable correct filesystem access and runner setup in LOOPBACK mode 39 _ "github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem/gcs" 40 _ "github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem/local" 41 _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/universal" 42 ) 43 44 var ( 45 expansionAddr = flag.String("expansion_addr", "", "Address of Expansion Service") 46 ) 47 48 // formatFn is a DoFn that formats a word and its count as a string. 49 func formatFn(w int64, c []string) string { 50 sort.Strings(c) 51 return fmt.Sprintf("%v:%v", w, c) 52 } 53 54 // KV used to represent KV PCollection values 55 type KV struct { 56 X int64 57 Y string 58 } 59 60 func getKV(kv KV, emit func(int64, string)) { 61 emit(kv.X, kv.Y) 62 } 63 64 func sumCounts(key int64, iter1 func(*string) bool) (int64, []string) { 65 var val string 66 var values []string 67 68 for iter1(&val) { 69 values = append(values, val) 70 } 71 return key, values 72 } 73 74 func init() { 75 register.Function2x1(formatFn) 76 register.Function2x0(getKV) 77 register.Function2x2(sumCounts) 78 79 register.Emitter2[int64, string]() 80 register.Iter1[string]() 81 } 82 83 func main() { 84 flag.Parse() 85 beam.Init() 86 87 if *expansionAddr == "" { 88 log.Fatal("No expansion address provided") 89 } 90 91 p := beam.NewPipeline() 92 s := p.Root() 93 94 // Using the cross-language transform 95 col1 := beam.ParDo(s, getKV, beam.Create(s, KV{X: 0, Y: "1"}, KV{X: 0, Y: "2"}, KV{X: 1, Y: "3"})) 96 col2 := beam.ParDo(s, getKV, beam.Create(s, KV{X: 0, Y: "4"}, KV{X: 1, Y: "5"}, KV{X: 1, Y: "6"})) 97 c := xlang.CoGroupByKey(s, *expansionAddr, col1, col2) 98 sums := beam.ParDo(s, sumCounts, c) 99 formatted := beam.ParDo(s, formatFn, sums) 100 passert.Equals(s, formatted, "0:[1 2 4]", "1:[3 5 6]") 101 102 if err := beamx.Run(context.Background(), p); err != nil { 103 log.Fatalf("Failed to execute job: %v", err) 104 } 105 }