github.com/apache/beam/sdks/v2@v2.48.2/go/test/regression/lperror.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package regression
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  	"reflect"
    22  	"sort"
    23  
    24  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    25  )
    26  
    27  // REPRO found by https://github.com/zelliott
    28  
    29  type fruit struct {
    30  	Name string
    31  }
    32  
    33  func toFoo(id int, _ func(**fruit) bool) (int, string) {
    34  	return id, "Foo"
    35  }
    36  
    37  func toID(id int, fruitIter func(**fruit) bool, _ func(*string) bool) int {
    38  	var fruit *fruit
    39  	for fruitIter(&fruit) {
    40  	}
    41  	return id
    42  }
    43  
    44  // LPErrorPipeline constructs a pipeline that has a GBK followed by a CoGBK using the same
    45  // input, with schema encoded structs as elements. This ends up having the stage after the
    46  // CoGBK fail since the decoder post-cogbk is missing a Length Prefix coder that was
    47  // applied to the GBK input, but not the CoGBK output.
    48  // Root is likely in that there's no Beam standard CoGBK format for inject and expand.
    49  // JIRA: BEAM-12438
    50  func LPErrorPipeline(s beam.Scope) beam.PCollection {
    51  	// ["Apple", "Banana", "Cherry"]
    52  	fruits := beam.CreateList(s, []*fruit{{"Apple"}, {"Banana"}, {"Cherry"}})
    53  
    54  	// [0 "Apple", 0 "Banana", 0 "Cherry"]
    55  	fruitsKV := beam.AddFixedKey(s, fruits)
    56  
    57  	// [0 ["Apple", "Banana", "Cherry"]]
    58  	fruitsGBK := beam.GroupByKey(s, fruitsKV)
    59  
    60  	// [0 "Foo"]
    61  	fooKV := beam.ParDo(s, toFoo, fruitsGBK)
    62  
    63  	// [0 ["Apple", "Banana", "Cherry"] ["Foo"]]
    64  	fruitsFooCoGBK := beam.CoGroupByKey(s, fruitsKV, fooKV)
    65  
    66  	// [0]
    67  	return beam.ParDo(s, toID, fruitsFooCoGBK)
    68  }
    69  
    70  const (
    71  	// MetricNamespace is the namespace for regression test metrics.
    72  	MetricNamespace = string("regression")
    73  	// FruitCounterName is the name of the fruit counter metric.
    74  	FruitCounterName = string("fruitCount")
    75  )
    76  
    77  func sendFruit(_ []byte, emit func(fruit)) {
    78  	emit(fruit{"Apple"})
    79  	emit(fruit{"Banana"})
    80  	emit(fruit{"Cherry"})
    81  }
    82  
    83  // countFruit counts the fruit that pass through.
    84  func countFruit(ctx context.Context, v fruit) fruit {
    85  	beam.NewCounter(MetricNamespace, FruitCounterName).Inc(ctx, 1)
    86  	return v
    87  }
    88  
    89  type iterSideStrings struct {
    90  	Wants []string
    91  }
    92  
    93  func (fn *iterSideStrings) ProcessElement(_ []byte, iter func(*fruit) bool) error {
    94  	var val fruit
    95  	var gots []string
    96  	for iter(&val) {
    97  		gots = append(gots, val.Name)
    98  	}
    99  	sort.Strings(gots)
   100  	sort.Strings(fn.Wants)
   101  
   102  	if got, want := len(gots), len(fn.Wants); got != want {
   103  		return fmt.Errorf("len mismatch between lists. got %v, want %v; \n\t got: %v \n\twant: %v", got, want, gots, fn.Wants)
   104  	}
   105  
   106  	for i := range fn.Wants {
   107  		if got, want := gots[i], fn.Wants[i]; got != want {
   108  			return fmt.Errorf("mismatch value in sorted list at index %d: got %v, want %v", i, got, want)
   109  		}
   110  	}
   111  	return nil
   112  }
   113  
   114  func init() {
   115  	beam.RegisterFunction(countFruit)
   116  	beam.RegisterFunction(sendFruit)
   117  	beam.RegisterType(reflect.TypeOf((*iterSideStrings)(nil)))
   118  	beam.RegisterType(reflect.TypeOf((*fruit)(nil)).Elem())
   119  }
   120  
   121  // LPErrorReshufflePipeline checks a Row type with reshuffle transforms.
   122  // It's intentionally just a prefix with validation done in the specific
   123  // test cases, as the success/failure is dependent on subsequent pipeline
   124  // use of data.
   125  //
   126  // This pipeline will output a pcollection containing 3 fruit.
   127  func LPErrorReshufflePipeline(s beam.Scope) beam.PCollection {
   128  	sf := s.Scope("Basket")
   129  	fruits := beam.ParDo(sf, sendFruit, beam.Impulse(sf))
   130  	return beam.Reshuffle(sf, fruits)
   131  }