go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/services/bqexporter/bqexporter.go (about)

     1  // Copyright 2019 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bqexporter
    16  
    17  import (
    18  	"bufio"
    19  	"context"
    20  	"crypto/sha512"
    21  	"encoding/hex"
    22  	"fmt"
    23  	"net/http"
    24  	"time"
    25  
    26  	"cloud.google.com/go/bigquery"
    27  	sppb "cloud.google.com/go/spanner/apiv1/spannerpb"
    28  	"golang.org/x/sync/errgroup"
    29  	"golang.org/x/sync/semaphore"
    30  	"golang.org/x/time/rate"
    31  	"google.golang.org/api/googleapi"
    32  	"google.golang.org/api/option"
    33  	"google.golang.org/genproto/googleapis/bytestream"
    34  	"google.golang.org/protobuf/proto"
    35  
    36  	"go.chromium.org/luci/common/bq"
    37  	"go.chromium.org/luci/common/errors"
    38  	"go.chromium.org/luci/common/retry"
    39  	"go.chromium.org/luci/common/retry/transient"
    40  	"go.chromium.org/luci/server"
    41  	"go.chromium.org/luci/server/auth"
    42  	"go.chromium.org/luci/server/auth/realms"
    43  	"go.chromium.org/luci/server/span"
    44  	"go.chromium.org/luci/server/tq"
    45  
    46  	"go.chromium.org/luci/resultdb/internal/artifactcontent"
    47  	"go.chromium.org/luci/resultdb/internal/invocations"
    48  	"go.chromium.org/luci/resultdb/internal/tasks/taskspb"
    49  	pb "go.chromium.org/luci/resultdb/proto/v1"
    50  
    51  	// Add support for Spanner transactions in TQ.
    52  	_ "go.chromium.org/luci/server/tq/txn/spanner"
    53  )
    54  
    55  const partitionExpirationTime = 540 * 24 * time.Hour // ~1.5y
    56  
    57  // schemaApplyer ensures BQ schema matches the row proto definitons.
    58  var schemaApplyer = bq.NewSchemaApplyer(bq.RegisterSchemaApplyerCache(50))
    59  
    60  // Options is bqexporter configuration.
    61  type Options struct {
    62  	// Whether to use InsertIDs in BigQuery Streaming Inserts.
    63  	UseInsertIDs bool
    64  
    65  	// Maximum number of rows in a batch.
    66  	MaxBatchRowCount int
    67  
    68  	// Maximum size of a batch in bytes, approximate.
    69  	MaxBatchSizeApprox int
    70  
    71  	// Maximum size of all batches held in memory, approximate.
    72  	MaxBatchTotalSizeApprox int
    73  
    74  	// Maximum rate for BigQuery Streaming Inserts.
    75  	RateLimit rate.Limit
    76  
    77  	// ArtifactRBEInstance is the name of the RBE instance to use for artifact
    78  	// storage. Example: "projects/luci-resultdb/instances/artifacts".
    79  	ArtifactRBEInstance string
    80  }
    81  
    82  // DefaultOptions returns Options with default values.
    83  func DefaultOptions() Options {
    84  	return Options{
    85  		// 500 is recommended
    86  		// https://cloud.google.com/bigquery/quotas#streaming_inserts
    87  		MaxBatchRowCount: 500,
    88  		// HTTP request size limit is 10 MiB according to
    89  		// https://cloud.google.com/bigquery/quotas#streaming_inserts
    90  		// Use a smaller size as the limit since we are only using the size of
    91  		// test results to estimate the whole payload size.
    92  		MaxBatchSizeApprox:      6 * 1024 * 1024,        // 6 MiB
    93  		MaxBatchTotalSizeApprox: 2 * 1024 * 1024 * 1024, // 2 GiB
    94  		RateLimit:               100,
    95  	}
    96  }
    97  
    98  type bqExporter struct {
    99  	*Options
   100  
   101  	// putLimiter limits the rate of bigquery.Inserter.Put calls.
   102  	putLimiter *rate.Limiter
   103  
   104  	// batchSem limits the number of batches we hold in memory at a time.
   105  	//
   106  	// Strictly speaking, this is not the exact number of batches.
   107  	// The exact number is batchSemWeight + taskWorkers*2,
   108  	// but this is good enough.
   109  	batchSem *semaphore.Weighted
   110  
   111  	// Client to read from RBE-CAS.
   112  	rbecasClient bytestream.ByteStreamClient
   113  
   114  	// Max size of a token the scanner can buffer when reading artifact content.
   115  	maxTokenSize int
   116  }
   117  
   118  // TestResultTasks describes how to route bq test result export tasks.
   119  var TestResultTasks = tq.RegisterTaskClass(tq.TaskClass{
   120  	ID:            "bq-test-result-export",
   121  	Prototype:     &taskspb.ExportInvocationTestResultsToBQ{},
   122  	Kind:          tq.Transactional,
   123  	Queue:         "bqtestresultexports",
   124  	RoutingPrefix: "/internal/tasks/bqexporter",
   125  })
   126  
   127  // ArtifactTasks describes how to route bq artifact export tasks.
   128  var ArtifactTasks = tq.RegisterTaskClass(tq.TaskClass{
   129  	ID:            "bq-artifact-export",
   130  	Prototype:     &taskspb.ExportInvocationArtifactsToBQ{},
   131  	Kind:          tq.Transactional,
   132  	Queue:         "bqartifactexports",
   133  	RoutingPrefix: "/internal/tasks/bqexporter",
   134  })
   135  
   136  // InitServer initializes a bqexporter server.
   137  func InitServer(srv *server.Server, opts Options) error {
   138  	if opts.ArtifactRBEInstance == "" {
   139  		return errors.Reason("opts.ArtifactRBEInstance is required").Err()
   140  	}
   141  
   142  	conn, err := artifactcontent.RBEConn(srv.Context)
   143  	if err != nil {
   144  		return err
   145  	}
   146  	b := &bqExporter{
   147  		Options:      &opts,
   148  		putLimiter:   rate.NewLimiter(opts.RateLimit, 1),
   149  		batchSem:     semaphore.NewWeighted(int64(opts.MaxBatchTotalSizeApprox / opts.MaxBatchSizeApprox)),
   150  		rbecasClient: bytestream.NewByteStreamClient(conn),
   151  		maxTokenSize: bufio.MaxScanTokenSize,
   152  	}
   153  	TestResultTasks.AttachHandler(func(ctx context.Context, msg proto.Message) error {
   154  		task := msg.(*taskspb.ExportInvocationTestResultsToBQ)
   155  		return b.exportResultsToBigQuery(ctx, invocations.ID(task.InvocationId), task.BqExport)
   156  	})
   157  	ArtifactTasks.AttachHandler(func(ctx context.Context, msg proto.Message) error {
   158  		task := msg.(*taskspb.ExportInvocationArtifactsToBQ)
   159  		return b.exportResultsToBigQuery(ctx, invocations.ID(task.InvocationId), task.BqExport)
   160  	})
   161  	return nil
   162  }
   163  
   164  // inserter is implemented by bigquery.Inserter.
   165  type inserter interface {
   166  	// Put uploads one or more rows to the BigQuery service.
   167  	Put(ctx context.Context, src any) error
   168  }
   169  
   170  func getLUCIProject(ctx context.Context, invID invocations.ID) (string, error) {
   171  	realm, err := invocations.ReadRealm(span.Single(ctx), invID)
   172  	if err != nil {
   173  		return "", err
   174  	}
   175  
   176  	project, _ := realms.Split(realm)
   177  	return project, nil
   178  }
   179  
   180  func getBQClient(ctx context.Context, luciProject string, bqExport *pb.BigQueryExport) (*bigquery.Client, error) {
   181  	tr, err := auth.GetRPCTransport(ctx, auth.AsProject, auth.WithProject(luciProject), auth.WithScopes(bigquery.Scope))
   182  	if err != nil {
   183  		return nil, err
   184  	}
   185  
   186  	return bigquery.NewClient(ctx, bqExport.Project, option.WithHTTPClient(&http.Client{
   187  		Transport: tr,
   188  	}))
   189  }
   190  
   191  func hasReason(apiErr *googleapi.Error, reason string) bool {
   192  	for _, e := range apiErr.Errors {
   193  		if e.Reason == reason {
   194  			return true
   195  		}
   196  	}
   197  	return false
   198  }
   199  
   200  // rowInput is information required to generate a BigQuery row.
   201  type rowInput interface {
   202  	// row returns a BigQuery row.
   203  	row() proto.Message
   204  
   205  	// id returns an identifier for the row.
   206  	id() []byte
   207  }
   208  
   209  func (b *bqExporter) batchExportRows(ctx context.Context, ins inserter, batchC chan []rowInput, errorLogger func(ctx context.Context, err bigquery.PutMultiError, rows []*bq.Row)) error {
   210  	eg, ctx := errgroup.WithContext(ctx)
   211  	defer eg.Wait()
   212  
   213  	for rows := range batchC {
   214  		rows := rows
   215  		if err := b.batchSem.Acquire(ctx, 1); err != nil {
   216  			return err
   217  		}
   218  
   219  		eg.Go(func() error {
   220  			defer b.batchSem.Release(1)
   221  			err := b.insertRowsWithRetries(ctx, ins, rows, errorLogger)
   222  			if apiErr, ok := err.(*googleapi.Error); ok && apiErr.Code == http.StatusForbidden && hasReason(apiErr, "accessDenied") {
   223  				err = tq.Fatal.Apply(err)
   224  			}
   225  			return err
   226  		})
   227  	}
   228  
   229  	return eg.Wait()
   230  }
   231  
   232  // insertRowsWithRetries inserts rows into BigQuery.
   233  // Retries on quotaExceeded errors.
   234  func (b *bqExporter) insertRowsWithRetries(ctx context.Context, ins inserter, rowInputs []rowInput, errorLogger func(ctx context.Context, err bigquery.PutMultiError, rows []*bq.Row)) error {
   235  	if err := b.putLimiter.Wait(ctx); err != nil {
   236  		return err
   237  	}
   238  
   239  	rows := make([]*bq.Row, 0, len(rowInputs))
   240  	for _, ri := range rowInputs {
   241  		row := &bq.Row{Message: ri.row()}
   242  
   243  		if b.UseInsertIDs {
   244  			// InsertID cannot exceed 128 bytes.
   245  			// https://cloud.google.com/bigquery/quotas#streaming_inserts
   246  			// Use SHA512 which is exactly 128 bytes in hex.
   247  			hash := sha512.Sum512(ri.id())
   248  			row.InsertID = hex.EncodeToString(hash[:])
   249  		} else {
   250  			row.InsertID = bigquery.NoDedupeID
   251  		}
   252  		rows = append(rows, row)
   253  	}
   254  
   255  	return retry.Retry(ctx, quotaErrorIteratorFactory(), func() error {
   256  		err := ins.Put(ctx, rows)
   257  
   258  		if bqErr, ok := err.(bigquery.PutMultiError); ok {
   259  			// TODO(nodir): increment a counter.
   260  			errorLogger(ctx, bqErr, rows)
   261  		}
   262  
   263  		return err
   264  	}, retry.LogCallback(ctx, "bigquery_put"))
   265  }
   266  
   267  // exportResultsToBigQuery exports results of an invocation to a BigQuery table.
   268  func (b *bqExporter) exportResultsToBigQuery(ctx context.Context, invID invocations.ID, bqExport *pb.BigQueryExport) error {
   269  	ctx, cancel := context.WithTimeout(ctx, 10*time.Minute)
   270  	defer cancel()
   271  	ctx = span.ModifyRequestOptions(ctx, func(opts *span.RequestOptions) {
   272  		opts.Priority = sppb.RequestOptions_PRIORITY_MEDIUM
   273  		opts.Tag = "bqexporter"
   274  	})
   275  
   276  	luciProject, err := getLUCIProject(ctx, invID)
   277  	if err != nil {
   278  		return err
   279  	}
   280  	ctx = span.ModifyRequestOptions(ctx, func(opts *span.RequestOptions) {
   281  		opts.Tag = "bqexporter,proj=" + luciProject
   282  	})
   283  
   284  	client, err := getBQClient(ctx, luciProject, bqExport)
   285  	if err != nil {
   286  		return errors.Annotate(err, "new bq client").Err()
   287  	}
   288  	defer client.Close()
   289  
   290  	table := client.Dataset(bqExport.Dataset).Table(bqExport.Table)
   291  	ins := table.Inserter()
   292  
   293  	// Both test results and test artifacts tables are partitioned by partition_time.
   294  	tableMetadata := &bigquery.TableMetadata{
   295  		TimePartitioning: &bigquery.TimePartitioning{
   296  			Field:      "partition_time",
   297  			Expiration: partitionExpirationTime,
   298  		},
   299  	}
   300  
   301  	switch bqExport.ResultType.(type) {
   302  	case *pb.BigQueryExport_TestResults_:
   303  		tableMetadata.Schema = testResultRowSchema.Relax()
   304  		if err := schemaApplyer.EnsureTable(ctx, table, tableMetadata); err != nil {
   305  			if !transient.Tag.In(err) {
   306  				err = tq.Fatal.Apply(err)
   307  			}
   308  			return errors.Annotate(err, "ensure test results bq table").Err()
   309  		}
   310  		return errors.Annotate(b.exportTestResultsToBigQuery(ctx, ins, invID, bqExport), "export test results").Err()
   311  	case *pb.BigQueryExport_TextArtifacts_:
   312  		tableMetadata.Schema = textArtifactRowSchema.Relax()
   313  		if err := schemaApplyer.EnsureTable(ctx, table, tableMetadata); err != nil {
   314  			if !transient.Tag.In(err) {
   315  				err = tq.Fatal.Apply(err)
   316  			}
   317  			return errors.Annotate(err, "ensure text artifacts bq table").Err()
   318  		}
   319  		return errors.Annotate(b.exportTextArtifactsToBigQuery(ctx, ins, invID, bqExport), "export text artifacts").Err()
   320  	case nil:
   321  		return fmt.Errorf("bqExport.ResultType is unspecified")
   322  	default:
   323  		panic("impossible")
   324  	}
   325  }
   326  
   327  // Schedule schedules tasks for all the given invocation's BigQuery Exports.
   328  func Schedule(ctx context.Context, invID invocations.ID) error {
   329  	var bqExports [][]byte
   330  	if err := invocations.ReadColumns(ctx, invID, map[string]any{"BigqueryExports": &bqExports}); err != nil {
   331  		return err
   332  	}
   333  	for i, buf := range bqExports {
   334  		bqx := &pb.BigQueryExport{}
   335  		if err := proto.Unmarshal(buf, bqx); err != nil {
   336  			return err
   337  		}
   338  		switch bqx.ResultType.(type) {
   339  		case *pb.BigQueryExport_TestResults_:
   340  			tq.MustAddTask(ctx, &tq.Task{
   341  				Payload: &taskspb.ExportInvocationTestResultsToBQ{
   342  					BqExport:     bqx,
   343  					InvocationId: string(invID),
   344  				},
   345  				Title: fmt.Sprintf("%s:%d", invID, i),
   346  			})
   347  		case *pb.BigQueryExport_TextArtifacts_:
   348  			tq.MustAddTask(ctx, &tq.Task{
   349  				Payload: &taskspb.ExportInvocationArtifactsToBQ{
   350  					BqExport:     bqx,
   351  					InvocationId: string(invID),
   352  				},
   353  				Title: fmt.Sprintf("%s:%d", invID, i),
   354  			})
   355  		default:
   356  			return errors.Reason("bqexport.ResultType is required").Err()
   357  		}
   358  
   359  	}
   360  	return nil
   361  }