github.com/apache/beam/sdks/v2@v2.48.2/go/examples/large_wordcount/large_wordcount_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one or more 2 // contributor license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright ownership. 4 // The ASF licenses this file to You under the Apache License, Version 2.0 5 // (the "License"); you may not use this file except in compliance with 6 // the License. You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limit, Shard: 0ations under the License. 15 16 package main 17 18 import ( 19 "os" 20 "path/filepath" 21 "testing" 22 23 "github.com/apache/beam/sdks/v2/go/pkg/beam" 24 "github.com/apache/beam/sdks/v2/go/pkg/beam/core/metrics" 25 "github.com/apache/beam/sdks/v2/go/pkg/beam/options/jobopts" 26 "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert" 27 "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" 28 "github.com/apache/beam/sdks/v2/go/test/integration" 29 ) 30 31 // Concept: Testing 32 33 // TestMain calls ptest.Main in order to allow Go tests to call and 34 // execute beam pipelines against distributed runners. 35 func TestMain(m *testing.M) { 36 ptest.Main(m) 37 } 38 39 var file1 string = `one two three four five 40 six seven eight nine ten` 41 42 var file2 string = `zero one two 43 three four 44 five six seven 45 eight nine` 46 47 func TestLargeWordcount_Loopback(t *testing.T) { 48 // This call is not part of the example, but part of Beam testing infrastructure. 49 integration.CheckFilters(t) 50 51 // Concept: Loopback mode. 52 // Force setting loopback to allow for local file when testing 53 // against local portable runners. 54 // Loopback mode connects the runner back to the main program to 55 // operate as the worker. This is useful for local development 56 // and testing of pipelines. 57 oldEnvType := *jobopts.EnvironmentType 58 *jobopts.EnvironmentType = "LOOPBACK" 59 t.Cleanup(func() { 60 *jobopts.EnvironmentType = oldEnvType 61 }) 62 63 // Create a place to write out files for testing & clean up post test completion. 64 dirPath := filepath.Join(os.TempDir(), "large_wordcount_test") 65 t.Cleanup(func() { 66 if err := os.RemoveAll(dirPath); err != nil { 67 t.Fatalf("unable to remove @%v: %v", dirPath, err) 68 } 69 }) 70 const perms = 0777 71 inputPath := filepath.Join(dirPath, "input") 72 if err := os.MkdirAll(inputPath, perms); err != nil { 73 t.Fatalf("unable to create %v: %v", inputPath, err) 74 } 75 outputDir := filepath.Join(dirPath, "output") 76 if err := os.MkdirAll(outputDir, perms); err != nil { 77 t.Fatalf("unable to create %v: %v", outputDir, err) 78 } 79 80 // Write out two test input files to read from. 81 if err := os.WriteFile(filepath.Join(inputPath, "file1.txt"), []byte(file1), perms); err != nil { 82 t.Fatalf("unable to write file1: %v", err) 83 } 84 if err := os.WriteFile(filepath.Join(inputPath, "file2.txt"), []byte(file2), perms); err != nil { 85 t.Fatalf("unable to write file2: %v", err) 86 } 87 88 // Testing a pipeline is as simple as configuring and running the pipeline. 89 p, s := beam.NewPipelineWithRoot() 90 finalFiles := Pipeline(s, 91 filepath.Join(inputPath, "*.txt"), 92 filepath.Join(outputDir, "wordcounts@2.txt")) 93 94 // Concept: Using passert.Equals to validate PCollection output. 95 // In this case, we validate that we have produced the expected final filepaths. 96 // The passert package allows validation of properties of the pipeline in the pipeline itself. 97 passert.Equals(s, finalFiles, 98 filepath.Join(outputDir, "wordcounts000-002.txt"), 99 filepath.Join(outputDir, "wordcounts001-002.txt"), 100 ) 101 102 // Concept: ptest 103 // ptest is a package with helpers intended to abstract out testing. 104 // ptest allows you to execute the pipeline on a runner of your 105 // choice with flags passed to the test execution. 106 // By default, this runs against the Go direct runner. 107 pr := ptest.RunAndValidate(t, p) 108 109 // Concept: Using PipelineResults to query for metrics to ensure 110 // expected running of specific DoFns. 111 112 qr := pr.Metrics().Query(func(mr beam.MetricResult) bool { 113 return mr.Name() == "keycount" 114 }) 115 if got, want := len(qr.Counters()), 1; got != want { 116 t.Fatalf("Metrics().Query(by Name = keycount) = %v counter, want %v", got, want) 117 } 118 c := qr.Counters()[0] 119 if got, want := c.Result(), int64(11); got != want { 120 t.Errorf("Metrics().Query(by Name) = %v, want %v", got, want) 121 } 122 123 qr = pr.Metrics().Query(func(mr beam.MetricResult) bool { 124 return mr.Name() == "countdistro" 125 }) 126 if got, want := len(qr.Distributions()), 1; got != want { 127 t.Fatalf("Metrics().Query(by Name = countdistro) returned %d distribution, want %v", got, want) 128 } 129 d := qr.Distributions()[0] 130 if got, want := d.Result(), (metrics.DistributionValue{Count: 11, Sum: 20, Min: 1, Max: 2}); got != want { 131 t.Errorf("Metrics().Query(by Name = countdistro) failed. Got %+v distribution, Want %+v distribution", got, want) 132 } 133 }