github.com/apache/beam/sdks/v2@v2.48.2/go/test/regression/lperror.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one or more 2 // contributor license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright ownership. 4 // The ASF licenses this file to You under the Apache License, Version 2.0 5 // (the "License"); you may not use this file except in compliance with 6 // the License. You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 package regression 17 18 import ( 19 "context" 20 "fmt" 21 "reflect" 22 "sort" 23 24 "github.com/apache/beam/sdks/v2/go/pkg/beam" 25 ) 26 27 // REPRO found by https://github.com/zelliott 28 29 type fruit struct { 30 Name string 31 } 32 33 func toFoo(id int, _ func(**fruit) bool) (int, string) { 34 return id, "Foo" 35 } 36 37 func toID(id int, fruitIter func(**fruit) bool, _ func(*string) bool) int { 38 var fruit *fruit 39 for fruitIter(&fruit) { 40 } 41 return id 42 } 43 44 // LPErrorPipeline constructs a pipeline that has a GBK followed by a CoGBK using the same 45 // input, with schema encoded structs as elements. This ends up having the stage after the 46 // CoGBK fail since the decoder post-cogbk is missing a Length Prefix coder that was 47 // applied to the GBK input, but not the CoGBK output. 48 // Root is likely in that there's no Beam standard CoGBK format for inject and expand. 49 // JIRA: BEAM-12438 50 func LPErrorPipeline(s beam.Scope) beam.PCollection { 51 // ["Apple", "Banana", "Cherry"] 52 fruits := beam.CreateList(s, []*fruit{{"Apple"}, {"Banana"}, {"Cherry"}}) 53 54 // [0 "Apple", 0 "Banana", 0 "Cherry"] 55 fruitsKV := beam.AddFixedKey(s, fruits) 56 57 // [0 ["Apple", "Banana", "Cherry"]] 58 fruitsGBK := beam.GroupByKey(s, fruitsKV) 59 60 // [0 "Foo"] 61 fooKV := beam.ParDo(s, toFoo, fruitsGBK) 62 63 // [0 ["Apple", "Banana", "Cherry"] ["Foo"]] 64 fruitsFooCoGBK := beam.CoGroupByKey(s, fruitsKV, fooKV) 65 66 // [0] 67 return beam.ParDo(s, toID, fruitsFooCoGBK) 68 } 69 70 const ( 71 // MetricNamespace is the namespace for regression test metrics. 72 MetricNamespace = string("regression") 73 // FruitCounterName is the name of the fruit counter metric. 74 FruitCounterName = string("fruitCount") 75 ) 76 77 func sendFruit(_ []byte, emit func(fruit)) { 78 emit(fruit{"Apple"}) 79 emit(fruit{"Banana"}) 80 emit(fruit{"Cherry"}) 81 } 82 83 // countFruit counts the fruit that pass through. 84 func countFruit(ctx context.Context, v fruit) fruit { 85 beam.NewCounter(MetricNamespace, FruitCounterName).Inc(ctx, 1) 86 return v 87 } 88 89 type iterSideStrings struct { 90 Wants []string 91 } 92 93 func (fn *iterSideStrings) ProcessElement(_ []byte, iter func(*fruit) bool) error { 94 var val fruit 95 var gots []string 96 for iter(&val) { 97 gots = append(gots, val.Name) 98 } 99 sort.Strings(gots) 100 sort.Strings(fn.Wants) 101 102 if got, want := len(gots), len(fn.Wants); got != want { 103 return fmt.Errorf("len mismatch between lists. got %v, want %v; \n\t got: %v \n\twant: %v", got, want, gots, fn.Wants) 104 } 105 106 for i := range fn.Wants { 107 if got, want := gots[i], fn.Wants[i]; got != want { 108 return fmt.Errorf("mismatch value in sorted list at index %d: got %v, want %v", i, got, want) 109 } 110 } 111 return nil 112 } 113 114 func init() { 115 beam.RegisterFunction(countFruit) 116 beam.RegisterFunction(sendFruit) 117 beam.RegisterType(reflect.TypeOf((*iterSideStrings)(nil))) 118 beam.RegisterType(reflect.TypeOf((*fruit)(nil)).Elem()) 119 } 120 121 // LPErrorReshufflePipeline checks a Row type with reshuffle transforms. 122 // It's intentionally just a prefix with validation done in the specific 123 // test cases, as the success/failure is dependent on subsequent pipeline 124 // use of data. 125 // 126 // This pipeline will output a pcollection containing 3 fruit. 127 func LPErrorReshufflePipeline(s beam.Scope) beam.PCollection { 128 sf := s.Scope("Basket") 129 fruits := beam.ParDo(sf, sendFruit, beam.Impulse(sf)) 130 return beam.Reshuffle(sf, fruits) 131 }