go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/appengine/bqlog/bqlog_test.go (about) 1 // Copyright 2016 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bqlog 16 17 import ( 18 "bytes" 19 "context" 20 "encoding/gob" 21 "encoding/json" 22 "fmt" 23 "math/rand" 24 "sync" 25 "testing" 26 "time" 27 28 "cloud.google.com/go/bigquery" 29 bqapi "google.golang.org/api/bigquery/v2" 30 31 "go.chromium.org/luci/appengine/gaetesting" 32 "go.chromium.org/luci/common/clock" 33 "go.chromium.org/luci/common/clock/testclock" 34 "go.chromium.org/luci/common/data/rand/mathrand" 35 "go.chromium.org/luci/common/data/stringset" 36 "go.chromium.org/luci/common/errors" 37 "go.chromium.org/luci/common/retry/transient" 38 "go.chromium.org/luci/gae/filter/featureBreaker" 39 "go.chromium.org/luci/gae/service/taskqueue" 40 41 . "github.com/smartystreets/goconvey/convey" 42 ) 43 44 var testingLog = Log{ 45 QueueName: "pull-queue", 46 ProjectID: "projectID", 47 DatasetID: "datasetID", 48 TableID: "tableID", 49 DumpEntriesToLogger: true, 50 } 51 52 type testEntry struct { 53 InsertID string 54 Data map[string]bigquery.Value 55 } 56 57 func (e testEntry) Save() (map[string]bigquery.Value, string, error) { 58 return e.Data, e.InsertID, nil 59 } 60 61 func TestInsert(t *testing.T) { 62 Convey("With mock context", t, func() { 63 ctx := gaetesting.TestingContext() 64 ctx = mathrand.Set(ctx, rand.New(rand.NewSource(12345))) 65 tq := taskqueue.GetTestable(ctx) 66 67 tq.CreatePullQueue("pull-queue") 68 69 Convey("simple insert works", func() { 70 err := testingLog.Insert(ctx, 71 testEntry{ 72 InsertID: "abc", 73 Data: map[string]bigquery.Value{ 74 "a": map[string]bigquery.Value{"b": "c"}, 75 }, 76 }, 77 testEntry{ 78 InsertID: "def", 79 }) 80 So(err, ShouldBeNil) 81 82 tasks := tq.GetScheduledTasks()["pull-queue"] 83 So(len(tasks), ShouldEqual, 1) 84 var task *taskqueue.Task 85 for _, t := range tasks { 86 task = t 87 break 88 } 89 90 decoded := []rawEntry{} 91 So(gob.NewDecoder(bytes.NewReader(task.Payload)).Decode(&decoded), ShouldBeNil) 92 So(decoded, ShouldResemble, []rawEntry{ 93 { 94 InsertID: "abc", 95 // Note that outer bigquery.Value deserializes into bqapi.JsonValue, 96 // but the inner one don't. This is fine, since at the end bqapi just 97 // encodes the whole thing to JSON and it doesn't matter what alias 98 // of any is used for that. 99 Data: map[string]bqapi.JsonValue{ 100 "a": json.RawMessage(`{"b":"c"}`), 101 }, 102 }, 103 { 104 InsertID: "def", 105 }, 106 }) 107 }) 108 109 Convey("null insert works", func() { 110 err := testingLog.Insert(ctx) 111 So(err, ShouldBeNil) 112 tasks := tq.GetScheduledTasks()["pull-queue"] 113 So(len(tasks), ShouldEqual, 0) 114 }) 115 }) 116 } 117 118 func TestFlush(t *testing.T) { 119 Convey("With mock context", t, func() { 120 ctx := gaetesting.TestingContext() 121 ctx = mathrand.Set(ctx, rand.New(rand.NewSource(12345))) 122 ctx, tc := testclock.UseTime(ctx, time.Time{}) 123 tq := taskqueue.GetTestable(ctx) 124 125 tq.CreatePullQueue("pull-queue") 126 127 Convey("No concurrency, no batches", func() { 128 testingLog := testingLog 129 testingLog.MaxParallelUploads = 1 130 testingLog.BatchesPerRequest = 20 131 132 for i := 0; i < 3; i++ { 133 err := testingLog.Insert(ctx, testEntry{ 134 Data: map[string]bigquery.Value{"i": i}, 135 }) 136 So(err, ShouldBeNil) 137 tc.Add(time.Millisecond) // emulate passage of time to sort entries 138 } 139 140 reqs := []*bqapi.TableDataInsertAllRequest{} 141 mockInsertAll(&testingLog, &reqs) 142 143 count, err := testingLog.Flush(ctx) 144 So(err, ShouldBeNil) 145 So(count, ShouldEqual, 3) 146 147 So(len(reqs), ShouldEqual, 1) 148 149 blob, _ := json.MarshalIndent(reqs[0], "", "\t") 150 So(string(blob), ShouldEqual, `{ 151 "rows": [ 152 { 153 "insertId": "bqlog:5119905750835961307:0", 154 "json": { 155 "i": 0 156 } 157 }, 158 { 159 "insertId": "bqlog:5119905750835961308:0", 160 "json": { 161 "i": 1 162 } 163 }, 164 { 165 "insertId": "bqlog:5119905750835961309:0", 166 "json": { 167 "i": 2 168 } 169 } 170 ], 171 "skipInvalidRows": true 172 }`) 173 174 // Bump time to make sure all pull queue leases (if any) expire. 175 tc.Add(time.Hour) 176 So(len(tq.GetScheduledTasks()["pull-queue"]), ShouldEqual, 0) 177 178 // Nothing to flush. 179 count, err = testingLog.Flush(ctx) 180 So(err, ShouldBeNil) 181 So(count, ShouldEqual, 0) 182 So(len(reqs), ShouldEqual, 1) 183 }) 184 185 Convey("Concurrency and batches", func() { 186 testingLog := testingLog 187 testingLog.MaxParallelUploads = 5 188 testingLog.BatchesPerRequest = 2 189 190 for i := 0; i < 20; i++ { 191 err := testingLog.Insert(ctx, testEntry{ 192 Data: map[string]bigquery.Value{"i": i}, 193 }) 194 So(err, ShouldBeNil) 195 tc.Add(time.Millisecond) // emulate passage of time to sort entries 196 } 197 198 reqs := []*bqapi.TableDataInsertAllRequest{} 199 mockInsertAll(&testingLog, &reqs) 200 201 count, err := testingLog.Flush(ctx) 202 So(err, ShouldBeNil) 203 So(count, ShouldEqual, 20) 204 205 So(len(reqs), ShouldEqual, 10) 206 207 // Make sure all data has been sent and insertIDs are all different. 208 ints := stringset.New(0) 209 ids := stringset.New(0) 210 for _, req := range reqs { 211 for _, row := range req.Rows { 212 ids.Add(row.InsertId) 213 ints.Add(string(row.Json["i"].(json.RawMessage))) 214 } 215 } 216 So(ints.Len(), ShouldEqual, 20) 217 So(ids.Len(), ShouldEqual, 20) 218 219 // Bump time to make sure all pull queue leases (if any) expire. 220 tc.Add(time.Hour) 221 So(len(tq.GetScheduledTasks()["pull-queue"]), ShouldEqual, 0) 222 }) 223 224 Convey("Stops enumerating by timeout", func() { 225 testingLog := testingLog 226 testingLog.MaxParallelUploads = 1 227 testingLog.BatchesPerRequest = 1 228 testingLog.FlushTimeout = 5 * time.Second 229 230 for i := 0; i < 10; i++ { 231 err := testingLog.Insert(ctx, testEntry{ 232 Data: map[string]bigquery.Value{"i": i}, 233 }) 234 So(err, ShouldBeNil) 235 tc.Add(time.Millisecond) // emulate passage of time to sort entries 236 } 237 238 // Roll time in same goroutine that fetches tasks from the queue. 239 testingLog.beforeSendChunk = func(context.Context, []*taskqueue.Task) { 240 tc.Add(time.Second) 241 } 242 243 reqs := []*bqapi.TableDataInsertAllRequest{} 244 mockInsertAll(&testingLog, &reqs) 245 246 // First batch (until timeout). 247 count, err := testingLog.Flush(ctx) 248 So(err, ShouldBeNil) 249 So(count, ShouldEqual, 5) 250 251 // The rest. 252 count, err = testingLog.Flush(ctx) 253 So(err, ShouldBeNil) 254 So(count, ShouldEqual, 5) 255 256 // Total number of requests. 257 So(len(reqs), ShouldEqual, 10) 258 259 // Bump time to make sure all pull queue leases (if any) expire. 260 tc.Add(time.Hour) 261 So(len(tq.GetScheduledTasks()["pull-queue"]), ShouldEqual, 0) 262 }) 263 264 Convey("Handles fatal bq failure", func() { 265 testingLog := testingLog 266 testingLog.MaxParallelUploads = 5 267 testingLog.BatchesPerRequest = 2 268 269 for i := 0; i < 20; i++ { 270 err := testingLog.Insert(ctx, testEntry{ 271 Data: map[string]bigquery.Value{"i": i}, 272 }) 273 So(err, ShouldBeNil) 274 tc.Add(time.Millisecond) // emulate passage of time to sort entries 275 } 276 277 testingLog.insertMock = func(_ context.Context, r *bqapi.TableDataInsertAllRequest) (*bqapi.TableDataInsertAllResponse, error) { 278 return nil, fmt.Errorf("omg, error") 279 } 280 281 count, err := testingLog.Flush(ctx) 282 So(err.Error(), ShouldEqual, "omg, error (and 9 other errors)") 283 So(count, ShouldEqual, 0) 284 285 // Bump time to make sure all pull queue leases (if any) expire. On fatal 286 // errors, we drop the data. 287 tc.Add(time.Hour) 288 So(len(tq.GetScheduledTasks()["pull-queue"]), ShouldEqual, 0) 289 }) 290 291 // TODO(vadimsh): This test is flaky. 292 SkipConvey("Handles transient bq failure", func() { 293 testingLog := testingLog 294 testingLog.MaxParallelUploads = 1 295 testingLog.BatchesPerRequest = 2 296 297 for i := 0; i < 20; i++ { 298 err := testingLog.Insert(ctx, testEntry{ 299 Data: map[string]bigquery.Value{"i": i}, 300 }) 301 So(err, ShouldBeNil) 302 tc.Add(time.Millisecond) // emulate passage of time to sort entries 303 } 304 305 testingLog.insertMock = func(_ context.Context, r *bqapi.TableDataInsertAllRequest) (*bqapi.TableDataInsertAllResponse, error) { 306 return nil, errors.New("omg, transient error", transient.Tag) 307 } 308 309 tc.SetTimerCallback(func(d time.Duration, t clock.Timer) { 310 if testclock.HasTags(t, "insert-retry") { 311 tc.Add(d) 312 } 313 }) 314 315 count, err := testingLog.Flush(ctx) 316 So(err.Error(), ShouldEqual, "omg, transient error (and 2 other errors)") 317 So(count, ShouldEqual, 0) 318 319 // Bump time to make sure all pull queue leases (if any) expire. On 320 // transient error we keep the data. 321 tc.Add(time.Hour) 322 So(len(tq.GetScheduledTasks()["pull-queue"]), ShouldEqual, 20) 323 }) 324 325 Convey("Handles Lease failure", func() { 326 testingLog := testingLog 327 testingLog.MaxParallelUploads = 5 328 testingLog.BatchesPerRequest = 2 329 330 for i := 0; i < 20; i++ { 331 err := testingLog.Insert(ctx, testEntry{ 332 Data: map[string]bigquery.Value{"i": i}, 333 }) 334 So(err, ShouldBeNil) 335 tc.Add(time.Millisecond) // emulate passage of time to sort entries 336 } 337 338 testingLog.insertMock = func(_ context.Context, r *bqapi.TableDataInsertAllRequest) (*bqapi.TableDataInsertAllResponse, error) { 339 panic("must not be called") 340 } 341 342 ctx, fb := featureBreaker.FilterTQ(ctx, nil) 343 fb.BreakFeatures(fmt.Errorf("lease error"), "Lease") 344 tc.SetTimerCallback(func(d time.Duration, t clock.Timer) { 345 if testclock.HasTags(t, "lease-retry") { 346 tc.Add(d) 347 } 348 }) 349 350 count, err := testingLog.Flush(ctx) 351 So(err.Error(), ShouldEqual, "lease error") 352 So(count, ShouldEqual, 0) 353 354 // Bump time to make sure all pull queue leases (if any) expire. 355 tc.Add(time.Hour) 356 So(len(tq.GetScheduledTasks()["pull-queue"]), ShouldEqual, 20) 357 }) 358 }) 359 } 360 361 func mockInsertAll(l *Log, reqs *[]*bqapi.TableDataInsertAllRequest) { 362 lock := sync.Mutex{} 363 l.insertMock = func(ctx context.Context, r *bqapi.TableDataInsertAllRequest) (*bqapi.TableDataInsertAllResponse, error) { 364 lock.Lock() 365 defer lock.Unlock() 366 *reqs = append(*reqs, r) 367 return &bqapi.TableDataInsertAllResponse{}, nil 368 } 369 }