github.com/apache/beam/sdks/v2@v2.48.2/go/test/integration/io/bigqueryio/bigqueryio_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one or more 2 // contributor license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright ownership. 4 // The ASF licenses this file to You under the Apache License, Version 2.0 5 // (the "License"); you may not use this file except in compliance with 6 // the License. You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 package bigqueryio 17 18 import ( 19 "context" 20 "flag" 21 "fmt" 22 "math/rand" 23 "strings" 24 "testing" 25 "time" 26 27 "cloud.google.com/go/bigquery" 28 "github.com/apache/beam/sdks/v2/go/pkg/beam" 29 "github.com/apache/beam/sdks/v2/go/pkg/beam/io/bigqueryio" 30 "github.com/apache/beam/sdks/v2/go/pkg/beam/options/gcpopts" 31 "github.com/apache/beam/sdks/v2/go/pkg/beam/register" 32 _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" 33 "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" 34 "github.com/apache/beam/sdks/v2/go/test/integration" 35 ) 36 37 func init() { 38 register.DoFn2x0[[]byte, func(TestRow)](&CreateTestRowsFn{}) 39 register.Emitter1[TestRow]() 40 } 41 42 func checkFlags(t *testing.T) { 43 gcpProjectIsNotSet := gcpopts.Project == nil || *gcpopts.Project == "" 44 if gcpProjectIsNotSet { 45 t.Skip("GCP project flag is not set.") 46 } 47 if *integration.BigQueryDataset == "" { 48 t.Skip("No BigQuery dataset provided.") 49 } 50 } 51 52 const ( 53 // A text to shuffle to get random words. 54 text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas eget nulla nec " + 55 "velit hendrerit placerat. Donec eu odio ultricies, fermentum arcu at, mollis lectus. " + 56 "Vestibulum porttitor pharetra sem vitae feugiat. Mauris facilisis neque in mauris " + 57 "feugiat rhoncus. Donec eu ipsum at nibh lobortis euismod. Nam at hendrerit felis. " + 58 "Vivamus et orci ex. Nam dui nisl, rutrum ac pretium eget, vehicula in tortor. Class " + 59 "aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. " + 60 "Phasellus ante lorem, pharetra blandit dapibus et, tempus nec purus. Maecenas in " + 61 "posuere sem, vel pharetra nisl. Pellentesque habitant morbi tristique senectus et netus " + 62 "et malesuada fames ac turpis egestas. Donec nec facilisis ex. Praesent euismod commodo " + 63 "efficitur. Fusce in nisi nunc." 64 // Number of random elements to create for test. Must be less than number of words in text. 65 inputSize = 50 66 ) 67 68 // TestRow is a sample row to write and read from that is expected to contain enough deterministic 69 // and random data in different data types to provide a reasonable signal that reading and writing 70 // works at a basic level. 71 type TestRow struct { 72 Counter int64 `bigquery:"counter"` // A deterministic counter, increments for each row generated. 73 RandData RandData `bigquery:"rand_data"` // An inner struct containing randomized data. 74 } 75 76 func shuffleText() []string { 77 words := strings.Fields(text) 78 rand.Shuffle(len(words), func(i, j int) { words[i], words[j] = words[j], words[i] }) 79 return words 80 } 81 82 // RandData is a struct of various types of random data. 83 type RandData struct { 84 Flip bool `bigquery:"flip"` // Flip is a bool with a random chance of either result (a coin flip). 85 Num int64 `bigquery:"num"` // Num is a random int64. 86 Word string `bigquery:"word"` // Word is a randomly selected word from a sample text. 87 } 88 89 // ddlSchema is a string for BigQuery data definition language that corresponds to TestRow. 90 const ddlTestRowSchema = "counter INT64 NOT NULL, " + 91 "rand_data STRUCT<" + 92 "flip BOOL NOT NULL," + 93 "num INT64 NOT NULL," + 94 "word STRING NOT NULL" + 95 "> NOT NULL" 96 97 // CreateTestRowsFn is a DoFn that creates randomized TestRows based on a seed. 98 type CreateTestRowsFn struct { 99 seed int64 100 } 101 102 // ProcessElement creates a number of TestRows, populating the randomized data. 103 func (fn *CreateTestRowsFn) ProcessElement(_ []byte, emit func(TestRow)) { 104 rand.Seed(fn.seed) 105 words := shuffleText() 106 for i := 0; i < inputSize; i++ { 107 emit(TestRow{ 108 Counter: int64(i), 109 RandData: RandData{ 110 Flip: rand.Int63n(2) != 0, 111 Num: rand.Int63(), 112 Word: words[i], 113 }, 114 }) 115 } 116 } 117 118 func TestBigQueryIO_Write(t *testing.T) { 119 integration.CheckFilters(t) 120 checkFlags(t) 121 122 ctx := context.Background() 123 124 tests := []struct { 125 name string 126 preCreate bool 127 createDisposition bigquery.TableCreateDisposition 128 wantErr bool 129 }{ 130 { 131 name: "CreateNever table create disposition with preexisting table", 132 preCreate: true, 133 createDisposition: bigquery.CreateNever, 134 wantErr: false, 135 }, 136 { 137 name: "CreateIfNeeded table create disposition with preexisting table", 138 preCreate: true, 139 createDisposition: bigquery.CreateIfNeeded, 140 wantErr: false, 141 }, 142 { 143 name: "CreateNever table create disposition with no preexisting table", 144 preCreate: false, 145 createDisposition: bigquery.CreateNever, 146 wantErr: true, 147 }, 148 { 149 name: "CreateIfNeeded table create disposition with no preexisting table", 150 preCreate: false, 151 createDisposition: bigquery.CreateIfNeeded, 152 wantErr: false, 153 }, 154 } 155 for _, tt := range tests { 156 t.Run(tt.name, func(t *testing.T) { 157 // Get the GCP project 158 // this assumes dataflow is running in the same project as the project in which the bigquery dataset 159 // is located 160 project := gcpopts.GetProject(ctx) 161 tableID := fmt.Sprintf("%s_temp_%v", "go_bqio_it", time.Now().UnixNano()) 162 tableName := fmt.Sprintf("%s.%s", *integration.BigQueryDataset, tableID) 163 if tt.preCreate { 164 newTempTable(t, tableName, ddlTestRowSchema) 165 } 166 t.Cleanup(func() { 167 deleteTempTable(t, tableName) 168 }) 169 createTestRows := &CreateTestRowsFn{seed: time.Now().UnixNano()} 170 p, s := beam.NewPipelineWithRoot() 171 172 // Generate elements and write to table. 173 rows := beam.ParDo(s, createTestRows, beam.Impulse(s)) 174 bigqueryio.Write(s, project, fmt.Sprintf("%s:%s", project, tableName), rows, 175 bigqueryio.WithCreateDisposition(tt.createDisposition)) 176 177 if err := ptest.Run(p); (err != nil) != tt.wantErr { 178 t.Fatalf("ptest.Run() err = %v, wantErr %v", err, tt.wantErr) 179 } else if err != nil { 180 // Pipeline failed as expected, return early 181 return 182 } 183 checkTableExistsAndNonEmpty(ctx, t, project, tableID) 184 }) 185 } 186 } 187 188 func TestMain(m *testing.M) { 189 flag.Parse() 190 beam.Init() 191 192 ptest.MainRet(m) 193 }