github.com/apache/beam/sdks/v2@v2.48.2/go/examples/large_wordcount/large_wordcount_test.go

github.com/apache/beam/sdks/v2@v2.48.2/go/examples/large_wordcount/large_wordcount_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limit, Shard: 0ations under the License.
    15  
    16  package main
    17  
    18  import (
    19  	"os"
    20  	"path/filepath"
    21  	"testing"
    22  
    23  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    24  	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/metrics"
    25  	"github.com/apache/beam/sdks/v2/go/pkg/beam/options/jobopts"
    26  	"github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert"
    27  	"github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest"
    28  	"github.com/apache/beam/sdks/v2/go/test/integration"
    29  )
    30  
    31  // Concept: Testing
    32  
    33  // TestMain calls ptest.Main in order to allow Go tests to call and
    34  // execute beam pipelines against distributed runners.
    35  func TestMain(m *testing.M) {
    36  	ptest.Main(m)
    37  }
    38  
    39  var file1 string = `one two three four five
    40  six seven eight nine ten`
    41  
    42  var file2 string = `zero one two
    43   three four
    44  five six seven 
    45  eight nine`
    46  
    47  func TestLargeWordcount_Loopback(t *testing.T) {
    48  	// This call is not part of the example, but part of Beam testing infrastructure.
    49  	integration.CheckFilters(t)
    50  
    51  	// Concept: Loopback mode.
    52  	// Force setting loopback to allow for local file when testing
    53  	// against local portable runners.
    54  	// Loopback mode connects the runner back to the main program to
    55  	// operate as the worker. This is useful for local development
    56  	// and testing of pipelines.
    57  	oldEnvType := *jobopts.EnvironmentType
    58  	*jobopts.EnvironmentType = "LOOPBACK"
    59  	t.Cleanup(func() {
    60  		*jobopts.EnvironmentType = oldEnvType
    61  	})
    62  
    63  	// Create a place to write out files for testing & clean up post test completion.
    64  	dirPath := filepath.Join(os.TempDir(), "large_wordcount_test")
    65  	t.Cleanup(func() {
    66  		if err := os.RemoveAll(dirPath); err != nil {
    67  			t.Fatalf("unable to remove @%v: %v", dirPath, err)
    68  		}
    69  	})
    70  	const perms = 0777
    71  	inputPath := filepath.Join(dirPath, "input")
    72  	if err := os.MkdirAll(inputPath, perms); err != nil {
    73  		t.Fatalf("unable to create %v: %v", inputPath, err)
    74  	}
    75  	outputDir := filepath.Join(dirPath, "output")
    76  	if err := os.MkdirAll(outputDir, perms); err != nil {
    77  		t.Fatalf("unable to create %v: %v", outputDir, err)
    78  	}
    79  
    80  	// Write out two test input files to read from.
    81  	if err := os.WriteFile(filepath.Join(inputPath, "file1.txt"), []byte(file1), perms); err != nil {
    82  		t.Fatalf("unable to write file1: %v", err)
    83  	}
    84  	if err := os.WriteFile(filepath.Join(inputPath, "file2.txt"), []byte(file2), perms); err != nil {
    85  		t.Fatalf("unable to write file2: %v", err)
    86  	}
    87  
    88  	// Testing a pipeline is as simple as configuring and running the pipeline.
    89  	p, s := beam.NewPipelineWithRoot()
    90  	finalFiles := Pipeline(s,
    91  		filepath.Join(inputPath, "*.txt"),
    92  		filepath.Join(outputDir, "wordcounts@2.txt"))
    93  
    94  	// Concept: Using passert.Equals to validate PCollection output.
    95  	// In this case, we validate that we have produced the expected final filepaths.
    96  	// The passert package allows validation of properties of the pipeline in the pipeline itself.
    97  	passert.Equals(s, finalFiles,
    98  		filepath.Join(outputDir, "wordcounts000-002.txt"),
    99  		filepath.Join(outputDir, "wordcounts001-002.txt"),
   100  	)
   101  
   102  	// Concept: ptest
   103  	// ptest is a package with helpers intended to abstract out testing.
   104  	// ptest allows you to execute the pipeline on a runner of your
   105  	// choice with flags passed to the test execution.
   106  	// By default, this runs against the Go direct runner.
   107  	pr := ptest.RunAndValidate(t, p)
   108  
   109  	// Concept: Using PipelineResults to query for metrics to ensure
   110  	// expected running of specific DoFns.
   111  
   112  	qr := pr.Metrics().Query(func(mr beam.MetricResult) bool {
   113  		return mr.Name() == "keycount"
   114  	})
   115  	if got, want := len(qr.Counters()), 1; got != want {
   116  		t.Fatalf("Metrics().Query(by Name = keycount) = %v counter, want %v", got, want)
   117  	}
   118  	c := qr.Counters()[0]
   119  	if got, want := c.Result(), int64(11); got != want {
   120  		t.Errorf("Metrics().Query(by Name) = %v, want %v", got, want)
   121  	}
   122  
   123  	qr = pr.Metrics().Query(func(mr beam.MetricResult) bool {
   124  		return mr.Name() == "countdistro"
   125  	})
   126  	if got, want := len(qr.Distributions()), 1; got != want {
   127  		t.Fatalf("Metrics().Query(by Name = countdistro) returned %d distribution, want %v", got, want)
   128  	}
   129  	d := qr.Distributions()[0]
   130  	if got, want := d.Result(), (metrics.DistributionValue{Count: 11, Sum: 20, Min: 1, Max: 2}); got != want {
   131  		t.Errorf("Metrics().Query(by Name = countdistro) failed. Got %+v distribution, Want %+v distribution", got, want)
   132  	}
   133  }