github.com/apache/beam/sdks/v2@v2.48.2/go/examples/xlang/wordcount/wordcount.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  // wordcount exemplifies using a cross-language Count transform from a test
    17  // expansion service to count words.
    18  //
    19  // Prerequisites to run wordcount:
    20  // –> [Required] Job needs to be submitted to a portable runner (--runner=universal)
    21  // –> [Required] Endpoint of job service needs to be passed (--endpoint=<ip:port>)
    22  // –> [Required] Endpoint of expansion service needs to be passed (--expansion_addr=<ip:port>)
    23  // –> [Optional] Environment type can be LOOPBACK. Defaults to DOCKER. (--environment_type=LOOPBACK|DOCKER)
    24  package main
    25  
    26  import (
    27  	"context"
    28  	"flag"
    29  	"fmt"
    30  	"log"
    31  	"regexp"
    32  	"strings"
    33  
    34  	"github.com/apache/beam/sdks/v2/go/examples/xlang"
    35  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    36  	"github.com/apache/beam/sdks/v2/go/pkg/beam/register"
    37  	"github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert"
    38  	"github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx"
    39  
    40  	// Imports to enable correct filesystem access and runner setup in LOOPBACK mode
    41  	_ "github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem/gcs"
    42  	_ "github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem/local"
    43  	_ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/universal"
    44  )
    45  
    46  var (
    47  	expansionAddr = flag.String("expansion_addr", "", "Address of Expansion Service")
    48  )
    49  
    50  var (
    51  	wordRE  = regexp.MustCompile(`[a-zA-Z]+('[a-z])?`)
    52  	empty   = beam.NewCounter("extract", "emptyLines")
    53  	lineLen = beam.NewDistribution("extract", "lineLenDistro")
    54  )
    55  
    56  // extractFn is a DoFn that emits the words in a given line.
    57  func extractFn(ctx context.Context, line string, emit func(string)) {
    58  	lineLen.Update(ctx, int64(len(line)))
    59  	if len(strings.TrimSpace(line)) == 0 {
    60  		empty.Inc(ctx, 1)
    61  	}
    62  	for _, word := range wordRE.FindAllString(line, -1) {
    63  		emit(word)
    64  	}
    65  }
    66  
    67  // formatFn is a DoFn that formats a word and its count as a string.
    68  func formatFn(w string, c int64) string {
    69  	return fmt.Sprintf("%s:%v", w, c)
    70  }
    71  
    72  func init() {
    73  	register.Function3x0(extractFn)
    74  	register.Function2x1(formatFn)
    75  
    76  	register.Emitter1[string]()
    77  }
    78  
    79  func main() {
    80  	flag.Parse()
    81  	beam.Init()
    82  
    83  	if *expansionAddr == "" {
    84  		log.Fatal("No expansion address provided")
    85  	}
    86  
    87  	p := beam.NewPipeline()
    88  	s := p.Root()
    89  
    90  	lines := beam.CreateList(s, strings.Split(lorem, "\n"))
    91  	col := beam.ParDo(s, extractFn, lines)
    92  
    93  	// Using the cross-language transform
    94  	counted := xlang.Count(s, *expansionAddr, col)
    95  
    96  	formatted := beam.ParDo(s, formatFn, counted)
    97  	passert.Equals(s, formatted, "a:4", "b:4", "c:5")
    98  
    99  	if err := beamx.Run(context.Background(), p); err != nil {
   100  		log.Fatalf("Failed to execute job: %v", err)
   101  	}
   102  }
   103  
   104  var lorem = `a b b c
   105  b c a
   106  a b c
   107  c
   108  a
   109  c
   110  `