go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/appengine/bqlog/bqlog_test.go (about)

     1  // Copyright 2016 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bqlog
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"encoding/gob"
    21  	"encoding/json"
    22  	"fmt"
    23  	"math/rand"
    24  	"sync"
    25  	"testing"
    26  	"time"
    27  
    28  	"cloud.google.com/go/bigquery"
    29  	bqapi "google.golang.org/api/bigquery/v2"
    30  
    31  	"go.chromium.org/luci/appengine/gaetesting"
    32  	"go.chromium.org/luci/common/clock"
    33  	"go.chromium.org/luci/common/clock/testclock"
    34  	"go.chromium.org/luci/common/data/rand/mathrand"
    35  	"go.chromium.org/luci/common/data/stringset"
    36  	"go.chromium.org/luci/common/errors"
    37  	"go.chromium.org/luci/common/retry/transient"
    38  	"go.chromium.org/luci/gae/filter/featureBreaker"
    39  	"go.chromium.org/luci/gae/service/taskqueue"
    40  
    41  	. "github.com/smartystreets/goconvey/convey"
    42  )
    43  
    44  var testingLog = Log{
    45  	QueueName:           "pull-queue",
    46  	ProjectID:           "projectID",
    47  	DatasetID:           "datasetID",
    48  	TableID:             "tableID",
    49  	DumpEntriesToLogger: true,
    50  }
    51  
    52  type testEntry struct {
    53  	InsertID string
    54  	Data     map[string]bigquery.Value
    55  }
    56  
    57  func (e testEntry) Save() (map[string]bigquery.Value, string, error) {
    58  	return e.Data, e.InsertID, nil
    59  }
    60  
    61  func TestInsert(t *testing.T) {
    62  	Convey("With mock context", t, func() {
    63  		ctx := gaetesting.TestingContext()
    64  		ctx = mathrand.Set(ctx, rand.New(rand.NewSource(12345)))
    65  		tq := taskqueue.GetTestable(ctx)
    66  
    67  		tq.CreatePullQueue("pull-queue")
    68  
    69  		Convey("simple insert works", func() {
    70  			err := testingLog.Insert(ctx,
    71  				testEntry{
    72  					InsertID: "abc",
    73  					Data: map[string]bigquery.Value{
    74  						"a": map[string]bigquery.Value{"b": "c"},
    75  					},
    76  				},
    77  				testEntry{
    78  					InsertID: "def",
    79  				})
    80  			So(err, ShouldBeNil)
    81  
    82  			tasks := tq.GetScheduledTasks()["pull-queue"]
    83  			So(len(tasks), ShouldEqual, 1)
    84  			var task *taskqueue.Task
    85  			for _, t := range tasks {
    86  				task = t
    87  				break
    88  			}
    89  
    90  			decoded := []rawEntry{}
    91  			So(gob.NewDecoder(bytes.NewReader(task.Payload)).Decode(&decoded), ShouldBeNil)
    92  			So(decoded, ShouldResemble, []rawEntry{
    93  				{
    94  					InsertID: "abc",
    95  					// Note that outer bigquery.Value deserializes into bqapi.JsonValue,
    96  					// but the inner one don't. This is fine, since at the end bqapi just
    97  					// encodes the whole thing to JSON and it doesn't matter what alias
    98  					// of any is used for that.
    99  					Data: map[string]bqapi.JsonValue{
   100  						"a": json.RawMessage(`{"b":"c"}`),
   101  					},
   102  				},
   103  				{
   104  					InsertID: "def",
   105  				},
   106  			})
   107  		})
   108  
   109  		Convey("null insert works", func() {
   110  			err := testingLog.Insert(ctx)
   111  			So(err, ShouldBeNil)
   112  			tasks := tq.GetScheduledTasks()["pull-queue"]
   113  			So(len(tasks), ShouldEqual, 0)
   114  		})
   115  	})
   116  }
   117  
   118  func TestFlush(t *testing.T) {
   119  	Convey("With mock context", t, func() {
   120  		ctx := gaetesting.TestingContext()
   121  		ctx = mathrand.Set(ctx, rand.New(rand.NewSource(12345)))
   122  		ctx, tc := testclock.UseTime(ctx, time.Time{})
   123  		tq := taskqueue.GetTestable(ctx)
   124  
   125  		tq.CreatePullQueue("pull-queue")
   126  
   127  		Convey("No concurrency, no batches", func() {
   128  			testingLog := testingLog
   129  			testingLog.MaxParallelUploads = 1
   130  			testingLog.BatchesPerRequest = 20
   131  
   132  			for i := 0; i < 3; i++ {
   133  				err := testingLog.Insert(ctx, testEntry{
   134  					Data: map[string]bigquery.Value{"i": i},
   135  				})
   136  				So(err, ShouldBeNil)
   137  				tc.Add(time.Millisecond) // emulate passage of time to sort entries
   138  			}
   139  
   140  			reqs := []*bqapi.TableDataInsertAllRequest{}
   141  			mockInsertAll(&testingLog, &reqs)
   142  
   143  			count, err := testingLog.Flush(ctx)
   144  			So(err, ShouldBeNil)
   145  			So(count, ShouldEqual, 3)
   146  
   147  			So(len(reqs), ShouldEqual, 1)
   148  
   149  			blob, _ := json.MarshalIndent(reqs[0], "", "\t")
   150  			So(string(blob), ShouldEqual, `{
   151  	"rows": [
   152  		{
   153  			"insertId": "bqlog:5119905750835961307:0",
   154  			"json": {
   155  				"i": 0
   156  			}
   157  		},
   158  		{
   159  			"insertId": "bqlog:5119905750835961308:0",
   160  			"json": {
   161  				"i": 1
   162  			}
   163  		},
   164  		{
   165  			"insertId": "bqlog:5119905750835961309:0",
   166  			"json": {
   167  				"i": 2
   168  			}
   169  		}
   170  	],
   171  	"skipInvalidRows": true
   172  }`)
   173  
   174  			// Bump time to make sure all pull queue leases (if any) expire.
   175  			tc.Add(time.Hour)
   176  			So(len(tq.GetScheduledTasks()["pull-queue"]), ShouldEqual, 0)
   177  
   178  			// Nothing to flush.
   179  			count, err = testingLog.Flush(ctx)
   180  			So(err, ShouldBeNil)
   181  			So(count, ShouldEqual, 0)
   182  			So(len(reqs), ShouldEqual, 1)
   183  		})
   184  
   185  		Convey("Concurrency and batches", func() {
   186  			testingLog := testingLog
   187  			testingLog.MaxParallelUploads = 5
   188  			testingLog.BatchesPerRequest = 2
   189  
   190  			for i := 0; i < 20; i++ {
   191  				err := testingLog.Insert(ctx, testEntry{
   192  					Data: map[string]bigquery.Value{"i": i},
   193  				})
   194  				So(err, ShouldBeNil)
   195  				tc.Add(time.Millisecond) // emulate passage of time to sort entries
   196  			}
   197  
   198  			reqs := []*bqapi.TableDataInsertAllRequest{}
   199  			mockInsertAll(&testingLog, &reqs)
   200  
   201  			count, err := testingLog.Flush(ctx)
   202  			So(err, ShouldBeNil)
   203  			So(count, ShouldEqual, 20)
   204  
   205  			So(len(reqs), ShouldEqual, 10)
   206  
   207  			// Make sure all data has been sent and insertIDs are all different.
   208  			ints := stringset.New(0)
   209  			ids := stringset.New(0)
   210  			for _, req := range reqs {
   211  				for _, row := range req.Rows {
   212  					ids.Add(row.InsertId)
   213  					ints.Add(string(row.Json["i"].(json.RawMessage)))
   214  				}
   215  			}
   216  			So(ints.Len(), ShouldEqual, 20)
   217  			So(ids.Len(), ShouldEqual, 20)
   218  
   219  			// Bump time to make sure all pull queue leases (if any) expire.
   220  			tc.Add(time.Hour)
   221  			So(len(tq.GetScheduledTasks()["pull-queue"]), ShouldEqual, 0)
   222  		})
   223  
   224  		Convey("Stops enumerating by timeout", func() {
   225  			testingLog := testingLog
   226  			testingLog.MaxParallelUploads = 1
   227  			testingLog.BatchesPerRequest = 1
   228  			testingLog.FlushTimeout = 5 * time.Second
   229  
   230  			for i := 0; i < 10; i++ {
   231  				err := testingLog.Insert(ctx, testEntry{
   232  					Data: map[string]bigquery.Value{"i": i},
   233  				})
   234  				So(err, ShouldBeNil)
   235  				tc.Add(time.Millisecond) // emulate passage of time to sort entries
   236  			}
   237  
   238  			// Roll time in same goroutine that fetches tasks from the queue.
   239  			testingLog.beforeSendChunk = func(context.Context, []*taskqueue.Task) {
   240  				tc.Add(time.Second)
   241  			}
   242  
   243  			reqs := []*bqapi.TableDataInsertAllRequest{}
   244  			mockInsertAll(&testingLog, &reqs)
   245  
   246  			// First batch (until timeout).
   247  			count, err := testingLog.Flush(ctx)
   248  			So(err, ShouldBeNil)
   249  			So(count, ShouldEqual, 5)
   250  
   251  			// The rest.
   252  			count, err = testingLog.Flush(ctx)
   253  			So(err, ShouldBeNil)
   254  			So(count, ShouldEqual, 5)
   255  
   256  			// Total number of requests.
   257  			So(len(reqs), ShouldEqual, 10)
   258  
   259  			// Bump time to make sure all pull queue leases (if any) expire.
   260  			tc.Add(time.Hour)
   261  			So(len(tq.GetScheduledTasks()["pull-queue"]), ShouldEqual, 0)
   262  		})
   263  
   264  		Convey("Handles fatal bq failure", func() {
   265  			testingLog := testingLog
   266  			testingLog.MaxParallelUploads = 5
   267  			testingLog.BatchesPerRequest = 2
   268  
   269  			for i := 0; i < 20; i++ {
   270  				err := testingLog.Insert(ctx, testEntry{
   271  					Data: map[string]bigquery.Value{"i": i},
   272  				})
   273  				So(err, ShouldBeNil)
   274  				tc.Add(time.Millisecond) // emulate passage of time to sort entries
   275  			}
   276  
   277  			testingLog.insertMock = func(_ context.Context, r *bqapi.TableDataInsertAllRequest) (*bqapi.TableDataInsertAllResponse, error) {
   278  				return nil, fmt.Errorf("omg, error")
   279  			}
   280  
   281  			count, err := testingLog.Flush(ctx)
   282  			So(err.Error(), ShouldEqual, "omg, error (and 9 other errors)")
   283  			So(count, ShouldEqual, 0)
   284  
   285  			// Bump time to make sure all pull queue leases (if any) expire. On fatal
   286  			// errors, we drop the data.
   287  			tc.Add(time.Hour)
   288  			So(len(tq.GetScheduledTasks()["pull-queue"]), ShouldEqual, 0)
   289  		})
   290  
   291  		// TODO(vadimsh): This test is flaky.
   292  		SkipConvey("Handles transient bq failure", func() {
   293  			testingLog := testingLog
   294  			testingLog.MaxParallelUploads = 1
   295  			testingLog.BatchesPerRequest = 2
   296  
   297  			for i := 0; i < 20; i++ {
   298  				err := testingLog.Insert(ctx, testEntry{
   299  					Data: map[string]bigquery.Value{"i": i},
   300  				})
   301  				So(err, ShouldBeNil)
   302  				tc.Add(time.Millisecond) // emulate passage of time to sort entries
   303  			}
   304  
   305  			testingLog.insertMock = func(_ context.Context, r *bqapi.TableDataInsertAllRequest) (*bqapi.TableDataInsertAllResponse, error) {
   306  				return nil, errors.New("omg, transient error", transient.Tag)
   307  			}
   308  
   309  			tc.SetTimerCallback(func(d time.Duration, t clock.Timer) {
   310  				if testclock.HasTags(t, "insert-retry") {
   311  					tc.Add(d)
   312  				}
   313  			})
   314  
   315  			count, err := testingLog.Flush(ctx)
   316  			So(err.Error(), ShouldEqual, "omg, transient error (and 2 other errors)")
   317  			So(count, ShouldEqual, 0)
   318  
   319  			// Bump time to make sure all pull queue leases (if any) expire. On
   320  			// transient error we keep the data.
   321  			tc.Add(time.Hour)
   322  			So(len(tq.GetScheduledTasks()["pull-queue"]), ShouldEqual, 20)
   323  		})
   324  
   325  		Convey("Handles Lease failure", func() {
   326  			testingLog := testingLog
   327  			testingLog.MaxParallelUploads = 5
   328  			testingLog.BatchesPerRequest = 2
   329  
   330  			for i := 0; i < 20; i++ {
   331  				err := testingLog.Insert(ctx, testEntry{
   332  					Data: map[string]bigquery.Value{"i": i},
   333  				})
   334  				So(err, ShouldBeNil)
   335  				tc.Add(time.Millisecond) // emulate passage of time to sort entries
   336  			}
   337  
   338  			testingLog.insertMock = func(_ context.Context, r *bqapi.TableDataInsertAllRequest) (*bqapi.TableDataInsertAllResponse, error) {
   339  				panic("must not be called")
   340  			}
   341  
   342  			ctx, fb := featureBreaker.FilterTQ(ctx, nil)
   343  			fb.BreakFeatures(fmt.Errorf("lease error"), "Lease")
   344  			tc.SetTimerCallback(func(d time.Duration, t clock.Timer) {
   345  				if testclock.HasTags(t, "lease-retry") {
   346  					tc.Add(d)
   347  				}
   348  			})
   349  
   350  			count, err := testingLog.Flush(ctx)
   351  			So(err.Error(), ShouldEqual, "lease error")
   352  			So(count, ShouldEqual, 0)
   353  
   354  			// Bump time to make sure all pull queue leases (if any) expire.
   355  			tc.Add(time.Hour)
   356  			So(len(tq.GetScheduledTasks()["pull-queue"]), ShouldEqual, 20)
   357  		})
   358  	})
   359  }
   360  
   361  func mockInsertAll(l *Log, reqs *[]*bqapi.TableDataInsertAllRequest) {
   362  	lock := sync.Mutex{}
   363  	l.insertMock = func(ctx context.Context, r *bqapi.TableDataInsertAllRequest) (*bqapi.TableDataInsertAllResponse, error) {
   364  		lock.Lock()
   365  		defer lock.Unlock()
   366  		*reqs = append(*reqs, r)
   367  		return &bqapi.TableDataInsertAllResponse{}, nil
   368  	}
   369  }