github.com/apache/beam/sdks/v2@v2.48.2/go/test/integration/wordcount/wordcount_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one or more 2 // contributor license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright ownership. 4 // The ASF licenses this file to You under the Apache License, Version 2.0 5 // (the "License"); you may not use this file except in compliance with 6 // the License. You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 package wordcount 17 18 import ( 19 "strings" 20 "testing" 21 22 "github.com/apache/beam/sdks/v2/go/pkg/beam" 23 "github.com/apache/beam/sdks/v2/go/pkg/beam/core/metrics" 24 _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" 25 _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink" 26 _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/samza" 27 _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" 28 "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" 29 "github.com/apache/beam/sdks/v2/go/test/integration" 30 ) 31 32 func TestWordCount(t *testing.T) { 33 tests := []struct { 34 lines []string 35 words int 36 hash string 37 smallWordsCount int64 38 lineLen metrics.DistributionValue 39 transform string 40 transformCount int 41 }{ 42 { 43 []string{ 44 "foo", 45 }, 46 1, 47 "6zZtmVTet7aIhR3wmPE8BA==", 48 1, 49 metrics.DistributionValue{Count: 1, Sum: 3, Min: 3, Max: 3}, 50 "wordcount.extractFn", 51 1, 52 }, 53 { 54 []string{ 55 "foo foo foo", 56 "foo foo", 57 "foo", 58 }, 59 1, 60 "jAk8+k4BOH7vQDUiUZdfWg==", 61 6, 62 metrics.DistributionValue{Count: 3, Sum: 21, Min: 3, Max: 11}, 63 "extractFn", 64 1, 65 }, 66 { 67 []string{ 68 "bar bar foo bar foo foo", 69 }, 70 2, 71 "Nz70m/sn3Ep9o484r7MalQ==", 72 6, 73 metrics.DistributionValue{Count: 1, Sum: 23, Min: 23, Max: 23}, 74 "CountFn", 75 1, 76 }, 77 { 78 []string{ 79 "foo bar foo bar foo bar", 80 }, 81 2, 82 "Nz70m/sn3Ep9o484r7MalQ==", // ordering doesn't matter: same hash as above 83 6, 84 metrics.DistributionValue{Count: 1, Sum: 23, Min: 23, Max: 23}, 85 "extract", 86 1, 87 }, 88 { 89 []string{ 90 "", 91 "bar foo bar", 92 " \t ", 93 " \n\n\n ", 94 "foo bar", 95 " foo", 96 }, 97 2, 98 "Nz70m/sn3Ep9o484r7MalQ==", // whitespace doesn't matter: same hash as above 99 6, 100 metrics.DistributionValue{Count: 6, Sum: 37, Min: 0, Max: 11}, 101 "CreateFn", 102 0, 103 }, 104 } 105 106 for _, test := range tests { 107 integration.CheckFilters(t) 108 p, s := beam.NewPipelineWithRoot() 109 lines := beam.CreateList(s, test.lines) 110 WordCountFromPCol(s, lines, test.hash, test.words) 111 pr, err := ptest.RunWithMetrics(p) 112 if err != nil { 113 t.Errorf("WordCount(\"%v\") failed: %v", strings.Join(test.lines, "|"), err) 114 } 115 116 qr := pr.Metrics().Query(func(mr beam.MetricResult) bool { 117 return mr.Name() == "smallWords" 118 }) 119 counter := metrics.CounterResult{} 120 if len(qr.Counters()) != 0 { 121 counter = qr.Counters()[0] 122 } 123 if counter.Result() != test.smallWordsCount { 124 t.Errorf("Metrics().Query(by Name) failed. Got %d counters, Want %d counters", counter.Result(), test.smallWordsCount) 125 } 126 127 qr = pr.Metrics().Query(func(mr beam.MetricResult) bool { 128 return mr.Name() == "lineLenDistro" 129 }) 130 distribution := metrics.DistributionResult{} 131 if len(qr.Distributions()) != 0 { 132 distribution = qr.Distributions()[0] 133 } 134 if distribution.Result() != test.lineLen { 135 t.Errorf("Metrics().Query(by Name) failed. Got %v distribution, Want %v distribution", distribution.Result(), test.lineLen) 136 } 137 } 138 } 139 140 func TestMain(m *testing.M) { 141 ptest.Main(m) 142 }