github.com/apache/beam/sdks/v2@v2.48.2/go/test/integration/io/bigqueryio/bigqueryio_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package bigqueryio
    17  
    18  import (
    19  	"context"
    20  	"flag"
    21  	"fmt"
    22  	"math/rand"
    23  	"strings"
    24  	"testing"
    25  	"time"
    26  
    27  	"cloud.google.com/go/bigquery"
    28  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    29  	"github.com/apache/beam/sdks/v2/go/pkg/beam/io/bigqueryio"
    30  	"github.com/apache/beam/sdks/v2/go/pkg/beam/options/gcpopts"
    31  	"github.com/apache/beam/sdks/v2/go/pkg/beam/register"
    32  	_ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow"
    33  	"github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest"
    34  	"github.com/apache/beam/sdks/v2/go/test/integration"
    35  )
    36  
    37  func init() {
    38  	register.DoFn2x0[[]byte, func(TestRow)](&CreateTestRowsFn{})
    39  	register.Emitter1[TestRow]()
    40  }
    41  
    42  func checkFlags(t *testing.T) {
    43  	gcpProjectIsNotSet := gcpopts.Project == nil || *gcpopts.Project == ""
    44  	if gcpProjectIsNotSet {
    45  		t.Skip("GCP project flag is not set.")
    46  	}
    47  	if *integration.BigQueryDataset == "" {
    48  		t.Skip("No BigQuery dataset provided.")
    49  	}
    50  }
    51  
    52  const (
    53  	// A text to shuffle to get random words.
    54  	text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas eget nulla nec " +
    55  		"velit hendrerit placerat. Donec eu odio ultricies, fermentum arcu at, mollis lectus. " +
    56  		"Vestibulum porttitor pharetra sem vitae feugiat. Mauris facilisis neque in mauris " +
    57  		"feugiat rhoncus. Donec eu ipsum at nibh lobortis euismod. Nam at hendrerit felis. " +
    58  		"Vivamus et orci ex. Nam dui nisl, rutrum ac pretium eget, vehicula in tortor. Class " +
    59  		"aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. " +
    60  		"Phasellus ante lorem, pharetra blandit dapibus et, tempus nec purus. Maecenas in " +
    61  		"posuere sem, vel pharetra nisl. Pellentesque habitant morbi tristique senectus et netus " +
    62  		"et malesuada fames ac turpis egestas. Donec nec facilisis ex. Praesent euismod commodo " +
    63  		"efficitur. Fusce in nisi nunc."
    64  	// Number of random elements to create for test. Must be less than number of words in text.
    65  	inputSize = 50
    66  )
    67  
    68  // TestRow is a sample row to write and read from that is expected to contain enough deterministic
    69  // and random data in different data types to provide a reasonable signal that reading and writing
    70  // works at a basic level.
    71  type TestRow struct {
    72  	Counter  int64    `bigquery:"counter"`   // A deterministic counter, increments for each row generated.
    73  	RandData RandData `bigquery:"rand_data"` // An inner struct containing randomized data.
    74  }
    75  
    76  func shuffleText() []string {
    77  	words := strings.Fields(text)
    78  	rand.Shuffle(len(words), func(i, j int) { words[i], words[j] = words[j], words[i] })
    79  	return words
    80  }
    81  
    82  // RandData is a struct of various types of random data.
    83  type RandData struct {
    84  	Flip bool   `bigquery:"flip"` // Flip is a bool with a random chance of either result (a coin flip).
    85  	Num  int64  `bigquery:"num"`  // Num is a random int64.
    86  	Word string `bigquery:"word"` // Word is a randomly selected word from a sample text.
    87  }
    88  
    89  // ddlSchema is a string for BigQuery data definition language that corresponds to TestRow.
    90  const ddlTestRowSchema = "counter INT64 NOT NULL, " +
    91  	"rand_data STRUCT<" +
    92  	"flip BOOL NOT NULL," +
    93  	"num INT64 NOT NULL," +
    94  	"word STRING NOT NULL" +
    95  	"> NOT NULL"
    96  
    97  // CreateTestRowsFn is a DoFn that creates randomized TestRows based on a seed.
    98  type CreateTestRowsFn struct {
    99  	seed int64
   100  }
   101  
   102  // ProcessElement creates a number of TestRows, populating the randomized data.
   103  func (fn *CreateTestRowsFn) ProcessElement(_ []byte, emit func(TestRow)) {
   104  	rand.Seed(fn.seed)
   105  	words := shuffleText()
   106  	for i := 0; i < inputSize; i++ {
   107  		emit(TestRow{
   108  			Counter: int64(i),
   109  			RandData: RandData{
   110  				Flip: rand.Int63n(2) != 0,
   111  				Num:  rand.Int63(),
   112  				Word: words[i],
   113  			},
   114  		})
   115  	}
   116  }
   117  
   118  func TestBigQueryIO_Write(t *testing.T) {
   119  	integration.CheckFilters(t)
   120  	checkFlags(t)
   121  
   122  	ctx := context.Background()
   123  
   124  	tests := []struct {
   125  		name              string
   126  		preCreate         bool
   127  		createDisposition bigquery.TableCreateDisposition
   128  		wantErr           bool
   129  	}{
   130  		{
   131  			name:              "CreateNever table create disposition with preexisting table",
   132  			preCreate:         true,
   133  			createDisposition: bigquery.CreateNever,
   134  			wantErr:           false,
   135  		},
   136  		{
   137  			name:              "CreateIfNeeded table create disposition with preexisting table",
   138  			preCreate:         true,
   139  			createDisposition: bigquery.CreateIfNeeded,
   140  			wantErr:           false,
   141  		},
   142  		{
   143  			name:              "CreateNever table create disposition with no preexisting table",
   144  			preCreate:         false,
   145  			createDisposition: bigquery.CreateNever,
   146  			wantErr:           true,
   147  		},
   148  		{
   149  			name:              "CreateIfNeeded table create disposition with no preexisting table",
   150  			preCreate:         false,
   151  			createDisposition: bigquery.CreateIfNeeded,
   152  			wantErr:           false,
   153  		},
   154  	}
   155  	for _, tt := range tests {
   156  		t.Run(tt.name, func(t *testing.T) {
   157  			// Get the GCP project
   158  			// this assumes dataflow is running in the same project as the project in which the bigquery dataset
   159  			// is located
   160  			project := gcpopts.GetProject(ctx)
   161  			tableID := fmt.Sprintf("%s_temp_%v", "go_bqio_it", time.Now().UnixNano())
   162  			tableName := fmt.Sprintf("%s.%s", *integration.BigQueryDataset, tableID)
   163  			if tt.preCreate {
   164  				newTempTable(t, tableName, ddlTestRowSchema)
   165  			}
   166  			t.Cleanup(func() {
   167  				deleteTempTable(t, tableName)
   168  			})
   169  			createTestRows := &CreateTestRowsFn{seed: time.Now().UnixNano()}
   170  			p, s := beam.NewPipelineWithRoot()
   171  
   172  			// Generate elements and write to table.
   173  			rows := beam.ParDo(s, createTestRows, beam.Impulse(s))
   174  			bigqueryio.Write(s, project, fmt.Sprintf("%s:%s", project, tableName), rows,
   175  				bigqueryio.WithCreateDisposition(tt.createDisposition))
   176  
   177  			if err := ptest.Run(p); (err != nil) != tt.wantErr {
   178  				t.Fatalf("ptest.Run() err = %v, wantErr %v", err, tt.wantErr)
   179  			} else if err != nil {
   180  				// Pipeline failed as expected, return early
   181  				return
   182  			}
   183  			checkTableExistsAndNonEmpty(ctx, t, project, tableID)
   184  		})
   185  	}
   186  }
   187  
   188  func TestMain(m *testing.M) {
   189  	flag.Parse()
   190  	beam.Init()
   191  
   192  	ptest.MainRet(m)
   193  }