github.com/apache/beam/sdks/v2@v2.48.2/go/test/integration/integration.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  // Package integration provides functionality that needs to be shared between all
    17  // integration tests.
    18  //
    19  // Integration tests are implemented through Go's test framework, as test
    20  // functions that create and execute pipelines using the ptest package. Tests
    21  // should be placed in smaller sub-packages for organizational purposes and
    22  // parallelism (tests are only run in parallel across different packages).
    23  // Integration tests should always begin with a call to CheckFilters to ensure
    24  // test filters can be applied, and each package containing integration tests
    25  // should call ptest.Main in a TestMain function if it uses ptest.
    26  //
    27  // Running integration tests can be done with a go test call with any flags that
    28  // are required by the test pipelines, such as --runner or --endpoint.
    29  // Example:
    30  //
    31  //	go test -v ./sdks/go/test/integration/... --runner=portable --endpoint=localhost:8099
    32  //
    33  // Alternatively, tests can be executed by running the
    34  // run_validatesrunner_tests.sh script, which also performs much of the
    35  // environment setup, or by calling gradle commands in :sdks:go:test.
    36  package integration
    37  
    38  import (
    39  	"fmt"
    40  	"math/rand"
    41  	"regexp"
    42  	"strings"
    43  	"testing"
    44  	"time"
    45  
    46  	// common runner flag.
    47  	"github.com/apache/beam/sdks/v2/go/pkg/beam/options/jobopts"
    48  	"github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest"
    49  )
    50  
    51  // Filters for temporarily skipping integration tests. All filters are regex
    52  // matchers that must match the full name of a test at the point where
    53  // CheckFilters is called. Multiple tests can be skipped by using regex
    54  // wildcards. (ex. "TestXLang_.*" filters all tests starting with TestXLang_)
    55  //
    56  // It is strongly recommended to include, TODOs, GitHub issues, or just comments
    57  // describing why tests are being skipped.
    58  
    59  // sickbay filters tests that fail due to Go SDK errors. These tests will not
    60  // execute on any runners.
    61  var sickbay = []string{}
    62  
    63  // Runner-specific test filters, for features that are not yet supported on
    64  // specific runners.
    65  
    66  var directFilters = []string{
    67  	// The direct runner does not yet support cross-language.
    68  	"TestXLang.*",
    69  	"TestKafkaIO.*",
    70  	"TestBigQueryIO.*",
    71  	"TestDebeziumIO_BasicRead",
    72  	"TestJDBCIO_BasicReadWrite",
    73  	"TestJDBCIO_PostgresReadWrite",
    74  	"TestDataframe",
    75  	// Triggers, Panes are not yet supported
    76  	"TestTrigger.*",
    77  	"TestPanes",
    78  	// The direct runner does not support the TestStream primitive
    79  	"TestTestStream.*",
    80  	// (https://github.com/apache/beam/issues/21130): The direct runner does not support windowed side inputs
    81  	"TestValidateWindowedSideInputs",
    82  	// (https://github.com/apache/beam/issues/21130): The direct runner does not currently support multimap side inputs
    83  	"TestParDoMultiMapSideInput",
    84  	"TestLargeWordcount_Loopback",
    85  	// The direct runner does not support self-checkpointing
    86  	"TestCheckpointing",
    87  	// The direct runner does not support pipeline drain for SDF.
    88  	"TestDrain",
    89  	// FhirIO currently only supports Dataflow runner
    90  	"TestFhirIO.*",
    91  	// OOMs currently only lead to heap dumps on Dataflow runner
    92  	"TestOomParDo",
    93  	// The direct runner does not support user state.
    94  	"TestValueState",
    95  	"TestValueStateWindowed",
    96  	"TestValueStateClear",
    97  	"TestBagState",
    98  	"TestBagStateClear",
    99  	"TestCombiningState",
   100  	"TestMapState",
   101  	"TestMapStateClear",
   102  	"TestSetState",
   103  	"TestSetStateClear",
   104  }
   105  
   106  var portableFilters = []string{
   107  	// The portable runner does not support the TestStream primitive
   108  	"TestTestStream.*",
   109  	// The trigger and pane tests uses TestStream
   110  	"TestTrigger.*",
   111  	"TestPanes",
   112  	// TODO(https://github.com/apache/beam/issues/21058): Python portable runner times out on Kafka reads.
   113  	"TestKafkaIO.*",
   114  	// TODO(BEAM-13215): GCP IOs currently do not work in non-Dataflow portable runners.
   115  	"TestBigQueryIO.*",
   116  	// The portable runner does not support self-checkpointing
   117  	"TestCheckpointing",
   118  	// The portable runner does not support pipeline drain for SDF.
   119  	"TestDrain",
   120  	// FhirIO currently only supports Dataflow runner
   121  	"TestFhirIO.*",
   122  	// OOMs currently only lead to heap dumps on Dataflow runner
   123  	"TestOomParDo",
   124  	// The portable runner does not support user state.
   125  	"TestValueState",
   126  	"TestValueStateWindowed",
   127  	"TestValueStateClear",
   128  	"TestBagState",
   129  	"TestBagStateClear",
   130  	"TestCombiningState",
   131  	"TestMapState",
   132  	"TestMapStateClear",
   133  	"TestSetState",
   134  	"TestSetStateClear",
   135  }
   136  
   137  var flinkFilters = []string{
   138  	// TODO(https://github.com/apache/beam/issues/20723): Flink tests timing out on reads.
   139  	"TestXLang_Combine.*",
   140  	"TestDebeziumIO_BasicRead",
   141  	// TODO(BEAM-13215): GCP IOs currently do not work in non-Dataflow portable runners.
   142  	"TestBigQueryIO.*",
   143  	// The number of produced outputs in AfterSynchronizedProcessingTime varies in different runs.
   144  	"TestTriggerAfterSynchronizedProcessingTime",
   145  	// The flink runner does not support pipeline drain for SDF.
   146  	"TestDrain",
   147  	// FhirIO currently only supports Dataflow runner
   148  	"TestFhirIO.*",
   149  	// OOMs currently only lead to heap dumps on Dataflow runner
   150  	"TestOomParDo",
   151  	// Flink does not support map based state types.
   152  	"TestMapState",
   153  	"TestMapStateClear",
   154  	"TestSetStateClear",
   155  	"TestSetState",
   156  }
   157  
   158  var samzaFilters = []string{
   159  	// TODO(https://github.com/apache/beam/issues/20987): Samza tests invalid encoding.
   160  	"TestReshuffle",
   161  	"TestReshuffleKV",
   162  	// The Samza runner does not support the TestStream primitive
   163  	"TestTestStream.*",
   164  	// The trigger and pane tests uses TestStream
   165  	"TestTrigger.*",
   166  	"TestPanes",
   167  	// TODO(https://github.com/apache/beam/issues/21244): Samza doesn't yet support post job metrics, used by WordCount
   168  	"TestWordCount.*",
   169  	// TODO(BEAM-13215): GCP IOs currently do not work in non-Dataflow portable runners.
   170  	"TestBigQueryIO.*",
   171  	// The Samza runner does not support self-checkpointing
   172  	"TestCheckpointing",
   173  	// The samza runner does not support pipeline drain for SDF.
   174  	"TestDrain",
   175  	// FhirIO currently only supports Dataflow runner
   176  	"TestFhirIO.*",
   177  	// OOMs currently only lead to heap dumps on Dataflow runner
   178  	"TestOomParDo",
   179  	// The samza runner does not support user state.
   180  	"TestValueState",
   181  	"TestValueStateWindowed",
   182  	"TestValueStateClear",
   183  	"TestBagState",
   184  	"TestBagStateClear",
   185  	"TestCombiningState",
   186  	"TestMapState",
   187  	"TestMapStateClear",
   188  	"TestSetState",
   189  	"TestSetStateClear",
   190  	// TODO(https://github.com/apache/beam/issues/26126): Java runner issue (AcitveBundle has no regsitered handler)
   191  	"TestDebeziumIO_BasicRead",
   192  }
   193  
   194  var sparkFilters = []string{
   195  	// TODO(BEAM-11498): XLang tests broken with Spark runner.
   196  	"TestXLang.*",
   197  	"TestParDoSideInput",
   198  	"TestParDoKVSideInput",
   199  	// The Spark runner does not support the TestStream primitive
   200  	"TestTestStream.*",
   201  	// The trigger and pane tests uses TestStream
   202  	"TestTrigger.*",
   203  	"TestPanes",
   204  	// [BEAM-13921]: Spark doesn't support side inputs to executable stages
   205  	"TestDebeziumIO_BasicRead",
   206  	// TODO(BEAM-13215): GCP IOs currently do not work in non-Dataflow portable runners.
   207  	"TestBigQueryIO.*",
   208  	// The spark runner does not support self-checkpointing
   209  	"TestCheckpointing",
   210  	// The spark runner does not support pipeline drain for SDF.
   211  	"TestDrain",
   212  	// FhirIO currently only supports Dataflow runner
   213  	"TestFhirIO.*",
   214  	// OOMs currently only lead to heap dumps on Dataflow runner
   215  	"TestOomParDo",
   216  	// Spark does not support map based state types.
   217  	"TestMapState",
   218  	"TestMapStateClear",
   219  	"TestSetStateClear",
   220  	"TestSetState",
   221  }
   222  
   223  var dataflowFilters = []string{
   224  	// The Dataflow runner doesn't work with tests using testcontainers locally.
   225  	"TestJDBCIO_BasicReadWrite",
   226  	"TestJDBCIO_PostgresReadWrite",
   227  	"TestDebeziumIO_BasicRead",
   228  	"TestMongoDBIO.*",
   229  	// TODO(BEAM-11576): TestFlattenDup failing on this runner.
   230  	"TestFlattenDup",
   231  	// The Dataflow runner does not support the TestStream primitive
   232  	"TestTestStream.*",
   233  	// The trigger and pane tests uses TestStream
   234  	"TestTrigger.*",
   235  	"TestPanes",
   236  	// There is no infrastructure for running KafkaIO tests with Dataflow.
   237  	"TestKafkaIO.*",
   238  	// Dataflow doesn't support any test that requires loopback.
   239  	// Eg. For FileIO examples.
   240  	".*Loopback.*",
   241  	// Dataflow does not automatically terminate the TestCheckpointing pipeline when
   242  	// complete.
   243  	"TestCheckpointing",
   244  	// TODO(21761): This test needs to provide GCP project to expansion service.
   245  	"TestBigQueryIO_BasicWriteQueryRead",
   246  	// Dataflow does not drain jobs by itself.
   247  	"TestDrain",
   248  }
   249  
   250  // CheckFilters checks if an integration test is filtered to be skipped, either
   251  // because the intended runner does not support it, or the test is sickbayed.
   252  // This method should be called at the beginning of any integration test. If
   253  // t.Run is used, CheckFilters should be called within the t.Run callback, so
   254  // that sub-tests can be skipped individually.
   255  func CheckFilters(t *testing.T) {
   256  	if !ptest.MainCalled() {
   257  		panic("ptest.Main() has not been called: please override TestMain to ensure that the integration test runs properly.")
   258  	}
   259  
   260  	// Check for sickbaying first.
   261  	n := t.Name()
   262  	for _, f := range sickbay {
   263  		// Add start and end of string regexp matchers so only a full match is
   264  		// counted.
   265  		f = fmt.Sprintf("^%v$", f)
   266  		match, err := regexp.MatchString(f, n)
   267  		if err != nil {
   268  			t.Errorf("Matching of regex '%v' with test '%v' failed: %v", f, n, err)
   269  		}
   270  		if match {
   271  			t.Skipf("Test %v is currently sickbayed on all runners", n)
   272  		}
   273  	}
   274  	s1 := rand.NewSource(time.Now().UnixNano())
   275  	r1 := rand.New(s1)
   276  	*jobopts.JobName = fmt.Sprintf("go-%v-%v", strings.ToLower(n), r1.Intn(1000))
   277  
   278  	// Test for runner-specific skipping second.
   279  	var filters []string
   280  	runner := *ptest.Runner
   281  	if runner == "" {
   282  		runner = ptest.DefaultRunner()
   283  	}
   284  	switch runner {
   285  	case "direct", "DirectRunner":
   286  		filters = directFilters
   287  	case "portable", "PortableRunner":
   288  		filters = portableFilters
   289  	case "flink", "FlinkRunner":
   290  		filters = flinkFilters
   291  	case "samza", "SamzaRunner":
   292  		filters = samzaFilters
   293  	case "spark", "SparkRunner":
   294  		filters = sparkFilters
   295  	case "dataflow", "DataflowRunner":
   296  		filters = dataflowFilters
   297  	default:
   298  		return
   299  	}
   300  
   301  	for _, f := range filters {
   302  		// Add start and end of string regexp matchers so only a full match is
   303  		// counted.
   304  		f = fmt.Sprintf("^%v$", f)
   305  		match, err := regexp.MatchString(f, n)
   306  		if err != nil {
   307  			t.Errorf("Matching of regex '%v' with test '%v' failed: %v", f, n, err)
   308  		}
   309  		if match {
   310  			t.Skipf("Test %v is currently filtered for runner %v", n, runner)
   311  		}
   312  	}
   313  }