github.com/apache/beam/sdks/v2@v2.48.2/go/test/integration/integration.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one or more 2 // contributor license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright ownership. 4 // The ASF licenses this file to You under the Apache License, Version 2.0 5 // (the "License"); you may not use this file except in compliance with 6 // the License. You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 // Package integration provides functionality that needs to be shared between all 17 // integration tests. 18 // 19 // Integration tests are implemented through Go's test framework, as test 20 // functions that create and execute pipelines using the ptest package. Tests 21 // should be placed in smaller sub-packages for organizational purposes and 22 // parallelism (tests are only run in parallel across different packages). 23 // Integration tests should always begin with a call to CheckFilters to ensure 24 // test filters can be applied, and each package containing integration tests 25 // should call ptest.Main in a TestMain function if it uses ptest. 26 // 27 // Running integration tests can be done with a go test call with any flags that 28 // are required by the test pipelines, such as --runner or --endpoint. 29 // Example: 30 // 31 // go test -v ./sdks/go/test/integration/... --runner=portable --endpoint=localhost:8099 32 // 33 // Alternatively, tests can be executed by running the 34 // run_validatesrunner_tests.sh script, which also performs much of the 35 // environment setup, or by calling gradle commands in :sdks:go:test. 36 package integration 37 38 import ( 39 "fmt" 40 "math/rand" 41 "regexp" 42 "strings" 43 "testing" 44 "time" 45 46 // common runner flag. 47 "github.com/apache/beam/sdks/v2/go/pkg/beam/options/jobopts" 48 "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" 49 ) 50 51 // Filters for temporarily skipping integration tests. All filters are regex 52 // matchers that must match the full name of a test at the point where 53 // CheckFilters is called. Multiple tests can be skipped by using regex 54 // wildcards. (ex. "TestXLang_.*" filters all tests starting with TestXLang_) 55 // 56 // It is strongly recommended to include, TODOs, GitHub issues, or just comments 57 // describing why tests are being skipped. 58 59 // sickbay filters tests that fail due to Go SDK errors. These tests will not 60 // execute on any runners. 61 var sickbay = []string{} 62 63 // Runner-specific test filters, for features that are not yet supported on 64 // specific runners. 65 66 var directFilters = []string{ 67 // The direct runner does not yet support cross-language. 68 "TestXLang.*", 69 "TestKafkaIO.*", 70 "TestBigQueryIO.*", 71 "TestDebeziumIO_BasicRead", 72 "TestJDBCIO_BasicReadWrite", 73 "TestJDBCIO_PostgresReadWrite", 74 "TestDataframe", 75 // Triggers, Panes are not yet supported 76 "TestTrigger.*", 77 "TestPanes", 78 // The direct runner does not support the TestStream primitive 79 "TestTestStream.*", 80 // (https://github.com/apache/beam/issues/21130): The direct runner does not support windowed side inputs 81 "TestValidateWindowedSideInputs", 82 // (https://github.com/apache/beam/issues/21130): The direct runner does not currently support multimap side inputs 83 "TestParDoMultiMapSideInput", 84 "TestLargeWordcount_Loopback", 85 // The direct runner does not support self-checkpointing 86 "TestCheckpointing", 87 // The direct runner does not support pipeline drain for SDF. 88 "TestDrain", 89 // FhirIO currently only supports Dataflow runner 90 "TestFhirIO.*", 91 // OOMs currently only lead to heap dumps on Dataflow runner 92 "TestOomParDo", 93 // The direct runner does not support user state. 94 "TestValueState", 95 "TestValueStateWindowed", 96 "TestValueStateClear", 97 "TestBagState", 98 "TestBagStateClear", 99 "TestCombiningState", 100 "TestMapState", 101 "TestMapStateClear", 102 "TestSetState", 103 "TestSetStateClear", 104 } 105 106 var portableFilters = []string{ 107 // The portable runner does not support the TestStream primitive 108 "TestTestStream.*", 109 // The trigger and pane tests uses TestStream 110 "TestTrigger.*", 111 "TestPanes", 112 // TODO(https://github.com/apache/beam/issues/21058): Python portable runner times out on Kafka reads. 113 "TestKafkaIO.*", 114 // TODO(BEAM-13215): GCP IOs currently do not work in non-Dataflow portable runners. 115 "TestBigQueryIO.*", 116 // The portable runner does not support self-checkpointing 117 "TestCheckpointing", 118 // The portable runner does not support pipeline drain for SDF. 119 "TestDrain", 120 // FhirIO currently only supports Dataflow runner 121 "TestFhirIO.*", 122 // OOMs currently only lead to heap dumps on Dataflow runner 123 "TestOomParDo", 124 // The portable runner does not support user state. 125 "TestValueState", 126 "TestValueStateWindowed", 127 "TestValueStateClear", 128 "TestBagState", 129 "TestBagStateClear", 130 "TestCombiningState", 131 "TestMapState", 132 "TestMapStateClear", 133 "TestSetState", 134 "TestSetStateClear", 135 } 136 137 var flinkFilters = []string{ 138 // TODO(https://github.com/apache/beam/issues/20723): Flink tests timing out on reads. 139 "TestXLang_Combine.*", 140 "TestDebeziumIO_BasicRead", 141 // TODO(BEAM-13215): GCP IOs currently do not work in non-Dataflow portable runners. 142 "TestBigQueryIO.*", 143 // The number of produced outputs in AfterSynchronizedProcessingTime varies in different runs. 144 "TestTriggerAfterSynchronizedProcessingTime", 145 // The flink runner does not support pipeline drain for SDF. 146 "TestDrain", 147 // FhirIO currently only supports Dataflow runner 148 "TestFhirIO.*", 149 // OOMs currently only lead to heap dumps on Dataflow runner 150 "TestOomParDo", 151 // Flink does not support map based state types. 152 "TestMapState", 153 "TestMapStateClear", 154 "TestSetStateClear", 155 "TestSetState", 156 } 157 158 var samzaFilters = []string{ 159 // TODO(https://github.com/apache/beam/issues/20987): Samza tests invalid encoding. 160 "TestReshuffle", 161 "TestReshuffleKV", 162 // The Samza runner does not support the TestStream primitive 163 "TestTestStream.*", 164 // The trigger and pane tests uses TestStream 165 "TestTrigger.*", 166 "TestPanes", 167 // TODO(https://github.com/apache/beam/issues/21244): Samza doesn't yet support post job metrics, used by WordCount 168 "TestWordCount.*", 169 // TODO(BEAM-13215): GCP IOs currently do not work in non-Dataflow portable runners. 170 "TestBigQueryIO.*", 171 // The Samza runner does not support self-checkpointing 172 "TestCheckpointing", 173 // The samza runner does not support pipeline drain for SDF. 174 "TestDrain", 175 // FhirIO currently only supports Dataflow runner 176 "TestFhirIO.*", 177 // OOMs currently only lead to heap dumps on Dataflow runner 178 "TestOomParDo", 179 // The samza runner does not support user state. 180 "TestValueState", 181 "TestValueStateWindowed", 182 "TestValueStateClear", 183 "TestBagState", 184 "TestBagStateClear", 185 "TestCombiningState", 186 "TestMapState", 187 "TestMapStateClear", 188 "TestSetState", 189 "TestSetStateClear", 190 // TODO(https://github.com/apache/beam/issues/26126): Java runner issue (AcitveBundle has no regsitered handler) 191 "TestDebeziumIO_BasicRead", 192 } 193 194 var sparkFilters = []string{ 195 // TODO(BEAM-11498): XLang tests broken with Spark runner. 196 "TestXLang.*", 197 "TestParDoSideInput", 198 "TestParDoKVSideInput", 199 // The Spark runner does not support the TestStream primitive 200 "TestTestStream.*", 201 // The trigger and pane tests uses TestStream 202 "TestTrigger.*", 203 "TestPanes", 204 // [BEAM-13921]: Spark doesn't support side inputs to executable stages 205 "TestDebeziumIO_BasicRead", 206 // TODO(BEAM-13215): GCP IOs currently do not work in non-Dataflow portable runners. 207 "TestBigQueryIO.*", 208 // The spark runner does not support self-checkpointing 209 "TestCheckpointing", 210 // The spark runner does not support pipeline drain for SDF. 211 "TestDrain", 212 // FhirIO currently only supports Dataflow runner 213 "TestFhirIO.*", 214 // OOMs currently only lead to heap dumps on Dataflow runner 215 "TestOomParDo", 216 // Spark does not support map based state types. 217 "TestMapState", 218 "TestMapStateClear", 219 "TestSetStateClear", 220 "TestSetState", 221 } 222 223 var dataflowFilters = []string{ 224 // The Dataflow runner doesn't work with tests using testcontainers locally. 225 "TestJDBCIO_BasicReadWrite", 226 "TestJDBCIO_PostgresReadWrite", 227 "TestDebeziumIO_BasicRead", 228 "TestMongoDBIO.*", 229 // TODO(BEAM-11576): TestFlattenDup failing on this runner. 230 "TestFlattenDup", 231 // The Dataflow runner does not support the TestStream primitive 232 "TestTestStream.*", 233 // The trigger and pane tests uses TestStream 234 "TestTrigger.*", 235 "TestPanes", 236 // There is no infrastructure for running KafkaIO tests with Dataflow. 237 "TestKafkaIO.*", 238 // Dataflow doesn't support any test that requires loopback. 239 // Eg. For FileIO examples. 240 ".*Loopback.*", 241 // Dataflow does not automatically terminate the TestCheckpointing pipeline when 242 // complete. 243 "TestCheckpointing", 244 // TODO(21761): This test needs to provide GCP project to expansion service. 245 "TestBigQueryIO_BasicWriteQueryRead", 246 // Dataflow does not drain jobs by itself. 247 "TestDrain", 248 } 249 250 // CheckFilters checks if an integration test is filtered to be skipped, either 251 // because the intended runner does not support it, or the test is sickbayed. 252 // This method should be called at the beginning of any integration test. If 253 // t.Run is used, CheckFilters should be called within the t.Run callback, so 254 // that sub-tests can be skipped individually. 255 func CheckFilters(t *testing.T) { 256 if !ptest.MainCalled() { 257 panic("ptest.Main() has not been called: please override TestMain to ensure that the integration test runs properly.") 258 } 259 260 // Check for sickbaying first. 261 n := t.Name() 262 for _, f := range sickbay { 263 // Add start and end of string regexp matchers so only a full match is 264 // counted. 265 f = fmt.Sprintf("^%v$", f) 266 match, err := regexp.MatchString(f, n) 267 if err != nil { 268 t.Errorf("Matching of regex '%v' with test '%v' failed: %v", f, n, err) 269 } 270 if match { 271 t.Skipf("Test %v is currently sickbayed on all runners", n) 272 } 273 } 274 s1 := rand.NewSource(time.Now().UnixNano()) 275 r1 := rand.New(s1) 276 *jobopts.JobName = fmt.Sprintf("go-%v-%v", strings.ToLower(n), r1.Intn(1000)) 277 278 // Test for runner-specific skipping second. 279 var filters []string 280 runner := *ptest.Runner 281 if runner == "" { 282 runner = ptest.DefaultRunner() 283 } 284 switch runner { 285 case "direct", "DirectRunner": 286 filters = directFilters 287 case "portable", "PortableRunner": 288 filters = portableFilters 289 case "flink", "FlinkRunner": 290 filters = flinkFilters 291 case "samza", "SamzaRunner": 292 filters = samzaFilters 293 case "spark", "SparkRunner": 294 filters = sparkFilters 295 case "dataflow", "DataflowRunner": 296 filters = dataflowFilters 297 default: 298 return 299 } 300 301 for _, f := range filters { 302 // Add start and end of string regexp matchers so only a full match is 303 // counted. 304 f = fmt.Sprintf("^%v$", f) 305 match, err := regexp.MatchString(f, n) 306 if err != nil { 307 t.Errorf("Matching of regex '%v' with test '%v' failed: %v", f, n, err) 308 } 309 if match { 310 t.Skipf("Test %v is currently filtered for runner %v", n, runner) 311 } 312 } 313 }