github.com/apache/beam/sdks/v2@v2.48.2/go/examples/xlang/wordcount/wordcount.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one or more 2 // contributor license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright ownership. 4 // The ASF licenses this file to You under the Apache License, Version 2.0 5 // (the "License"); you may not use this file except in compliance with 6 // the License. You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 // wordcount exemplifies using a cross-language Count transform from a test 17 // expansion service to count words. 18 // 19 // Prerequisites to run wordcount: 20 // –> [Required] Job needs to be submitted to a portable runner (--runner=universal) 21 // –> [Required] Endpoint of job service needs to be passed (--endpoint=<ip:port>) 22 // –> [Required] Endpoint of expansion service needs to be passed (--expansion_addr=<ip:port>) 23 // –> [Optional] Environment type can be LOOPBACK. Defaults to DOCKER. (--environment_type=LOOPBACK|DOCKER) 24 package main 25 26 import ( 27 "context" 28 "flag" 29 "fmt" 30 "log" 31 "regexp" 32 "strings" 33 34 "github.com/apache/beam/sdks/v2/go/examples/xlang" 35 "github.com/apache/beam/sdks/v2/go/pkg/beam" 36 "github.com/apache/beam/sdks/v2/go/pkg/beam/register" 37 "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert" 38 "github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx" 39 40 // Imports to enable correct filesystem access and runner setup in LOOPBACK mode 41 _ "github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem/gcs" 42 _ "github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem/local" 43 _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/universal" 44 ) 45 46 var ( 47 expansionAddr = flag.String("expansion_addr", "", "Address of Expansion Service") 48 ) 49 50 var ( 51 wordRE = regexp.MustCompile(`[a-zA-Z]+('[a-z])?`) 52 empty = beam.NewCounter("extract", "emptyLines") 53 lineLen = beam.NewDistribution("extract", "lineLenDistro") 54 ) 55 56 // extractFn is a DoFn that emits the words in a given line. 57 func extractFn(ctx context.Context, line string, emit func(string)) { 58 lineLen.Update(ctx, int64(len(line))) 59 if len(strings.TrimSpace(line)) == 0 { 60 empty.Inc(ctx, 1) 61 } 62 for _, word := range wordRE.FindAllString(line, -1) { 63 emit(word) 64 } 65 } 66 67 // formatFn is a DoFn that formats a word and its count as a string. 68 func formatFn(w string, c int64) string { 69 return fmt.Sprintf("%s:%v", w, c) 70 } 71 72 func init() { 73 register.Function3x0(extractFn) 74 register.Function2x1(formatFn) 75 76 register.Emitter1[string]() 77 } 78 79 func main() { 80 flag.Parse() 81 beam.Init() 82 83 if *expansionAddr == "" { 84 log.Fatal("No expansion address provided") 85 } 86 87 p := beam.NewPipeline() 88 s := p.Root() 89 90 lines := beam.CreateList(s, strings.Split(lorem, "\n")) 91 col := beam.ParDo(s, extractFn, lines) 92 93 // Using the cross-language transform 94 counted := xlang.Count(s, *expansionAddr, col) 95 96 formatted := beam.ParDo(s, formatFn, counted) 97 passert.Equals(s, formatted, "a:4", "b:4", "c:5") 98 99 if err := beamx.Run(context.Background(), p); err != nil { 100 log.Fatalf("Failed to execute job: %v", err) 101 } 102 } 103 104 var lorem = `a b b c 105 b c a 106 a b c 107 c 108 a 109 c 110 `