go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/swarming/server/bq/writer.go (about) 1 // Copyright 2023 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bq 16 17 import ( 18 "context" 19 20 "golang.org/x/sync/errgroup" 21 "google.golang.org/protobuf/proto" 22 23 "go.chromium.org/luci/common/logging" 24 ) 25 26 type resultWaiter func(ctx context.Context) error 27 28 type streamClient interface { 29 appendRows(ctx context.Context, rows [][]byte) (resultWaiter, error) 30 streamName() string 31 finalize(ctx context.Context) error 32 } 33 34 // bqWriter is a stateful client which handles writes to bigquery. 35 // This object is intended to be used for single thread exports. 36 type bqWriter struct { 37 // maxSizePerWrite is a configurable number of bytes per appendRows call 38 // the maxium number of bytes allowed is 10_000_000 but this is configurable 39 // for unit test purposes. 40 maxSizePerWrite int 41 stream streamClient 42 results *errgroup.Group 43 } 44 45 // writeProtos will write msgs using stream . It will break up input into 46 // multiple AppendRows calls if it detects that the byte size of msgs will 47 // exceed the maximum number of bytes per AppendRows call. 48 // Each call to appendRows will add the managedwriter.AppendRowsResult to 49 // results of the bqWriter so that they can be remembered for later. 50 // As soon as the AppendRows is called on all of msgs this will exit. 51 // writeProtos is guaranteed to call AppendRows at least once so long as msgs is 52 // non-empty. 53 func (w *bqWriter) writeProtos(ctx context.Context, msgs []proto.Message) error { 54 l := len(msgs) 55 encoded := make([][]byte, l) 56 logging.Infof(ctx, "Starting write of %d protos to writestream %s", l, w.stream.streamName()) 57 for i, msg := range msgs { 58 b, err := proto.Marshal(msg) 59 if err != nil { 60 return err 61 } 62 encoded[i] = b 63 } 64 write := func(batch [][]byte) error { 65 logging.Infof(ctx, "AppendRows for %d rows to writestream %s", len(batch), w.stream.streamName()) 66 r, err := w.stream.appendRows(ctx, batch) 67 if err != nil { 68 return err 69 } 70 w.results.Go(func() error { 71 return r(ctx) 72 }) 73 return nil 74 } 75 for len(encoded) > 0 { 76 batchSize := 0 77 batchLen := 0 78 for batchLen < len(encoded) && batchSize < w.maxSizePerWrite { 79 batchSize += len(encoded[batchLen]) 80 batchLen += 1 81 } 82 err := write(encoded[:batchLen]) 83 if err != nil { 84 return err 85 } 86 encoded = encoded[batchLen:] 87 } 88 return nil 89 } 90 91 // finalize waits for all appendRows operations to complete, then calls finalize on the writeStream. 92 func (w *bqWriter) finalize(ctx context.Context) error { 93 logging.Infof(ctx, "Waiting on appendRow calls to writestream %s", w.stream.streamName()) 94 err := w.results.Wait() 95 if err != nil { 96 return err 97 } 98 logging.Infof(ctx, "Finalizing writestream %s", w.stream.streamName()) 99 return w.stream.finalize(ctx) 100 }