go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/bqutil/insert.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bqutil
    16  
    17  import (
    18  	"context"
    19  	"net/http"
    20  
    21  	"cloud.google.com/go/bigquery"
    22  	"google.golang.org/api/googleapi"
    23  	"google.golang.org/api/option"
    24  
    25  	"go.chromium.org/luci/common/bq"
    26  	"go.chromium.org/luci/common/errors"
    27  	"go.chromium.org/luci/common/retry"
    28  	"go.chromium.org/luci/common/retry/transient"
    29  	"go.chromium.org/luci/server/auth"
    30  )
    31  
    32  // Client returns a new BigQuery client for use with the given GCP project,
    33  // that authenticates as LUCI Analysis itself. Only use this method if the
    34  // specification of the BigQuery dataset to access is not under the
    35  // control of the project (e.g. via configuration).
    36  func Client(ctx context.Context, gcpProject string) (*bigquery.Client, error) {
    37  	if gcpProject == "" {
    38  		return nil, errors.New("GCP Project must be specified")
    39  	}
    40  	tr, err := auth.GetRPCTransport(ctx, auth.AsSelf, auth.WithScopes(bigquery.Scope))
    41  	if err != nil {
    42  		return nil, err
    43  	}
    44  	return bigquery.NewClient(ctx, gcpProject, option.WithHTTPClient(&http.Client{
    45  		Transport: tr,
    46  	}))
    47  }
    48  
    49  // Inserter provides methods to insert rows into a BigQuery table.
    50  type Inserter struct {
    51  	table     *bigquery.Table
    52  	batchSize int
    53  }
    54  
    55  // NewInserter initialises a new inserter.
    56  func NewInserter(table *bigquery.Table, batchSize int) *Inserter {
    57  	return &Inserter{
    58  		table:     table,
    59  		batchSize: batchSize,
    60  	}
    61  }
    62  
    63  // Put inserts the given rows into BigQuery.
    64  func (i *Inserter) Put(ctx context.Context, rows []*bq.Row) error {
    65  	inserter := i.table.Inserter()
    66  	for i, batch := range i.batch(rows) {
    67  		if err := inserter.Put(ctx, batch); err != nil {
    68  			return errors.Annotate(err, "putting batch %v", i).Err()
    69  		}
    70  	}
    71  	return nil
    72  }
    73  
    74  // batch divides the rows to be inserted into batches of at most batchSize.
    75  func (i *Inserter) batch(rows []*bq.Row) [][]*bq.Row {
    76  	var result [][]*bq.Row
    77  	pages := (len(rows) + (i.batchSize - 1)) / i.batchSize
    78  	for p := 0; p < pages; p++ {
    79  		start := p * i.batchSize
    80  		end := start + i.batchSize
    81  		if end > len(rows) {
    82  			end = len(rows)
    83  		}
    84  		page := rows[start:end]
    85  		result = append(result, page)
    86  	}
    87  	return result
    88  }
    89  
    90  func hasReason(apiErr *googleapi.Error, reason string) bool {
    91  	for _, e := range apiErr.Errors {
    92  		if e.Reason == reason {
    93  			return true
    94  		}
    95  	}
    96  	return false
    97  }
    98  
    99  // PutWithRetries puts rows into BigQuery.
   100  // Retries on transient errors.
   101  func (i *Inserter) PutWithRetries(ctx context.Context, rows []*bq.Row) error {
   102  	return retry.Retry(ctx, transient.Only(retry.Default), func() error {
   103  		err := i.Put(ctx, rows)
   104  
   105  		switch e := err.(type) {
   106  		case *googleapi.Error:
   107  			if e.Code == http.StatusForbidden && hasReason(e, "quotaExceeded") {
   108  				err = transient.Tag.Apply(err)
   109  			}
   110  		}
   111  
   112  		return err
   113  	}, retry.LogCallback(ctx, "bigquery_put"))
   114  }
   115  
   116  // FatalError returns true if the error is a known fatal error.
   117  func FatalError(err error) bool {
   118  	if apiErr, ok := err.(*googleapi.Error); ok && apiErr.Code == http.StatusForbidden && hasReason(apiErr, "accessDenied") {
   119  		return true
   120  	}
   121  	return false
   122  }