go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/swarming/server/bq/writer.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //	http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bq
    16  
    17  import (
    18  	"context"
    19  
    20  	"golang.org/x/sync/errgroup"
    21  	"google.golang.org/protobuf/proto"
    22  
    23  	"go.chromium.org/luci/common/logging"
    24  )
    25  
    26  type resultWaiter func(ctx context.Context) error
    27  
    28  type streamClient interface {
    29  	appendRows(ctx context.Context, rows [][]byte) (resultWaiter, error)
    30  	streamName() string
    31  	finalize(ctx context.Context) error
    32  }
    33  
    34  // bqWriter is a stateful client which handles writes to bigquery.
    35  // This object is intended to be used for single thread exports.
    36  type bqWriter struct {
    37  	// maxSizePerWrite is a configurable number of bytes per appendRows call
    38  	// the maxium number of bytes allowed is 10_000_000 but this is configurable
    39  	// for unit test purposes.
    40  	maxSizePerWrite int
    41  	stream          streamClient
    42  	results         *errgroup.Group
    43  }
    44  
    45  // writeProtos will write msgs using stream . It will break up input into
    46  // multiple AppendRows calls if it detects that the byte size of msgs will
    47  // exceed the maximum number of bytes per AppendRows call.
    48  // Each call to appendRows will add the managedwriter.AppendRowsResult to
    49  // results of the bqWriter so that they can be remembered for later.
    50  // As soon as the AppendRows is called on all of msgs this will exit.
    51  // writeProtos is guaranteed to call AppendRows at least once so long as msgs is
    52  // non-empty.
    53  func (w *bqWriter) writeProtos(ctx context.Context, msgs []proto.Message) error {
    54  	l := len(msgs)
    55  	encoded := make([][]byte, l)
    56  	logging.Infof(ctx, "Starting write of %d protos to writestream %s", l, w.stream.streamName())
    57  	for i, msg := range msgs {
    58  		b, err := proto.Marshal(msg)
    59  		if err != nil {
    60  			return err
    61  		}
    62  		encoded[i] = b
    63  	}
    64  	write := func(batch [][]byte) error {
    65  		logging.Infof(ctx, "AppendRows for %d rows to writestream %s", len(batch), w.stream.streamName())
    66  		r, err := w.stream.appendRows(ctx, batch)
    67  		if err != nil {
    68  			return err
    69  		}
    70  		w.results.Go(func() error {
    71  			return r(ctx)
    72  		})
    73  		return nil
    74  	}
    75  	for len(encoded) > 0 {
    76  		batchSize := 0
    77  		batchLen := 0
    78  		for batchLen < len(encoded) && batchSize < w.maxSizePerWrite {
    79  			batchSize += len(encoded[batchLen])
    80  			batchLen += 1
    81  		}
    82  		err := write(encoded[:batchLen])
    83  		if err != nil {
    84  			return err
    85  		}
    86  		encoded = encoded[batchLen:]
    87  	}
    88  	return nil
    89  }
    90  
    91  // finalize waits for all appendRows operations to complete, then calls finalize on the writeStream.
    92  func (w *bqWriter) finalize(ctx context.Context) error {
    93  	logging.Infof(ctx, "Waiting on appendRow calls to writestream %s", w.stream.streamName())
    94  	err := w.results.Wait()
    95  	if err != nil {
    96  		return err
    97  	}
    98  	logging.Infof(ctx, "Finalizing writestream %s", w.stream.streamName())
    99  	return w.stream.finalize(ctx)
   100  }