github.com/apache/beam/sdks/v2@v2.48.2/go/examples/xlang/group_by/group_by.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one or more 2 // contributor license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright ownership. 4 // The ASF licenses this file to You under the Apache License, Version 2.0 5 // (the "License"); you may not use this file except in compliance with 6 // the License. You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 // group_by exemplifies using a cross-language group by key transform from a test expansion service. 17 // 18 // Prerequisites to run wordcount: 19 // –> [Required] Job needs to be submitted to a portable runner (--runner=universal) 20 // –> [Required] Endpoint of job service needs to be passed (--endpoint=<ip:port>) 21 // –> [Required] Endpoint of expansion service needs to be passed (--expansion_addr=<ip:port>) 22 // –> [Optional] Environment type can be LOOPBACK. Defaults to DOCKER. (--environment_type=LOOPBACK|DOCKER) 23 package main 24 25 import ( 26 "context" 27 "flag" 28 "fmt" 29 "log" 30 "sort" 31 32 "github.com/apache/beam/sdks/v2/go/examples/xlang" 33 "github.com/apache/beam/sdks/v2/go/pkg/beam" 34 "github.com/apache/beam/sdks/v2/go/pkg/beam/register" 35 "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert" 36 "github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx" 37 38 // Imports to enable correct filesystem access and runner setup in LOOPBACK mode 39 _ "github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem/gcs" 40 _ "github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem/local" 41 _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/universal" 42 ) 43 44 var ( 45 expansionAddr = flag.String("expansion_addr", "", "Address of Expansion Service") 46 ) 47 48 // formatFn is a DoFn that formats a word and its count as a string. 49 func formatFn(w string, c []int) string { 50 sort.Ints(c) 51 return fmt.Sprintf("%v:%v", w, c) 52 } 53 54 // KV used to represent KV PCollection values 55 type KV struct { 56 X string 57 Y int64 58 } 59 60 func getKV(kv KV, emit func(string, int64)) { 61 emit(kv.X, kv.Y) 62 } 63 64 func collectValues(key string, iter func(*int64) bool) (string, []int) { 65 var count int64 66 var values []int 67 for iter(&count) { 68 values = append(values, int(count)) 69 } 70 return key, values 71 } 72 73 func init() { 74 register.Function2x1(formatFn) 75 register.Function2x0(getKV) 76 register.Function2x2(collectValues) 77 78 register.Emitter2[string, int64]() 79 register.Iter1[int64]() 80 } 81 82 func main() { 83 flag.Parse() 84 beam.Init() 85 86 if *expansionAddr == "" { 87 log.Fatal("No expansion address provided") 88 } 89 90 p := beam.NewPipeline() 91 s := p.Root() 92 93 // Using the cross-language transform 94 kvs := beam.Create(s, KV{X: "0", Y: 1}, KV{X: "0", Y: 2}, KV{X: "1", Y: 3}) 95 in := beam.ParDo(s, getKV, kvs) 96 out := xlang.GroupByKey(s, *expansionAddr, in) 97 98 vals := beam.ParDo(s, collectValues, out) 99 formatted := beam.ParDo(s, formatFn, vals) 100 passert.Equals(s, formatted, "0:[1 2]", "1:[3]") 101 102 if err := beamx.Run(context.Background(), p); err != nil { 103 log.Fatalf("Failed to execute job: %v", err) 104 } 105 }