go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/services/bqexporter/test_result_row.go

go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/services/bqexporter/test_result_row.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bqexporter
    16  
    17  import (
    18  	"context"
    19  
    20  	"cloud.google.com/go/bigquery"
    21  	"cloud.google.com/go/spanner"
    22  	"github.com/golang/protobuf/descriptor"
    23  	desc "github.com/golang/protobuf/protoc-gen-go/descriptor"
    24  	"golang.org/x/sync/errgroup"
    25  	"google.golang.org/protobuf/proto"
    26  
    27  	"go.chromium.org/luci/common/bq"
    28  	"go.chromium.org/luci/common/errors"
    29  	"go.chromium.org/luci/common/logging"
    30  	"go.chromium.org/luci/server/span"
    31  
    32  	"go.chromium.org/luci/resultdb/bqutil"
    33  	"go.chromium.org/luci/resultdb/internal/invocations"
    34  	"go.chromium.org/luci/resultdb/internal/invocations/graph"
    35  	"go.chromium.org/luci/resultdb/internal/spanutil"
    36  	"go.chromium.org/luci/resultdb/internal/testresults"
    37  	"go.chromium.org/luci/resultdb/pbutil"
    38  	bqpb "go.chromium.org/luci/resultdb/proto/bq"
    39  	pb "go.chromium.org/luci/resultdb/proto/v1"
    40  )
    41  
    42  var testResultRowSchema bigquery.Schema
    43  
    44  const testResultRowMessage = "luci.resultdb.bq.TestResultRow"
    45  
    46  func init() {
    47  	var err error
    48  	if testResultRowSchema, err = generateTestResultRowSchema(); err != nil {
    49  		panic(err)
    50  	}
    51  }
    52  
    53  func generateTestResultRowSchema() (schema bigquery.Schema, err error) {
    54  	fd, _ := descriptor.MessageDescriptorProto(&bqpb.TestResultRow{})
    55  	// We also need to get FileDescriptorProto for StringPair, TestMetadata, Sources and FailureReason
    56  	// because they are defined in different files.
    57  	fdsp, _ := descriptor.MessageDescriptorProto(&pb.StringPair{})
    58  	fdtmd, _ := descriptor.MessageDescriptorProto(&pb.TestMetadata{})
    59  	fds, _ := descriptor.MessageDescriptorProto(&pb.Sources{})
    60  	fdfr, _ := descriptor.MessageDescriptorProto(&pb.FailureReason{})
    61  	fdinv, _ := descriptor.MessageDescriptorProto(&bqpb.InvocationRecord{})
    62  	fdset := &desc.FileDescriptorSet{File: []*desc.FileDescriptorProto{fd, fdsp, fdtmd, fds, fdfr, fdinv}}
    63  	return bqutil.GenerateSchema(fdset, testResultRowMessage)
    64  }
    65  
    66  // Row size limit is 5MB according to
    67  // https://cloud.google.com/bigquery/quotas#streaming_inserts
    68  // Cap the summaryHTML's length to 4MB to ensure the row size is under
    69  // limit.
    70  const maxSummaryLength = 4e6
    71  
    72  func invocationProtoToRecord(inv *pb.Invocation) *bqpb.InvocationRecord {
    73  	return &bqpb.InvocationRecord{
    74  		Id:         string(invocations.MustParseName(inv.Name)),
    75  		Tags:       inv.Tags,
    76  		Properties: inv.Properties,
    77  		Realm:      inv.Realm,
    78  	}
    79  }
    80  
    81  // testResultRowInput is information required to generate a TestResult BigQuery row.
    82  type testResultRowInput struct {
    83  	exported   *pb.Invocation
    84  	parent     *pb.Invocation
    85  	tr         *pb.TestResult
    86  	sources    *pb.Sources
    87  	exonerated bool
    88  }
    89  
    90  func (i *testResultRowInput) row() proto.Message {
    91  	tr := i.tr
    92  
    93  	ret := &bqpb.TestResultRow{
    94  		Exported:      invocationProtoToRecord(i.exported),
    95  		Parent:        invocationProtoToRecord(i.parent),
    96  		Name:          tr.Name,
    97  		TestId:        tr.TestId,
    98  		ResultId:      tr.ResultId,
    99  		Variant:       pbutil.VariantToStringPairs(tr.Variant),
   100  		VariantHash:   tr.VariantHash,
   101  		Expected:      tr.Expected,
   102  		Status:        tr.Status.String(),
   103  		SummaryHtml:   tr.SummaryHtml,
   104  		StartTime:     tr.StartTime,
   105  		Duration:      tr.Duration,
   106  		Tags:          tr.Tags,
   107  		Exonerated:    i.exonerated,
   108  		Sources:       i.sources,
   109  		PartitionTime: i.exported.CreateTime,
   110  		TestMetadata:  tr.TestMetadata,
   111  		FailureReason: tr.FailureReason,
   112  		Properties:    tr.Properties,
   113  	}
   114  
   115  	if tr.Status == pb.TestStatus_SKIP {
   116  		ret.SkipReason = tr.SkipReason.String()
   117  	}
   118  
   119  	if len(ret.SummaryHtml) > maxSummaryLength {
   120  		ret.SummaryHtml = "[Trimmed] " + ret.SummaryHtml[:maxSummaryLength]
   121  	}
   122  
   123  	return ret
   124  }
   125  
   126  func (i *testResultRowInput) id() []byte {
   127  	return []byte(i.tr.Name)
   128  }
   129  
   130  type testVariantKey struct {
   131  	testID      string
   132  	variantHash string
   133  }
   134  
   135  // queryExoneratedTestVariants reads exonerated test variants matching the predicate.
   136  func queryExoneratedTestVariants(ctx context.Context, invs invocations.IDSet) (map[testVariantKey]struct{}, error) {
   137  	st := spanner.NewStatement(`
   138  		SELECT DISTINCT TestId, VariantHash,
   139  		FROM TestExonerations
   140  		WHERE InvocationId IN UNNEST(@invIDs)
   141  	`)
   142  	st.Params["invIDs"] = invs
   143  	tvs := map[testVariantKey]struct{}{}
   144  	var b spanutil.Buffer
   145  	err := spanutil.Query(ctx, st, func(row *spanner.Row) error {
   146  		var key testVariantKey
   147  		if err := b.FromSpanner(row, &key.testID, &key.variantHash); err != nil {
   148  			return err
   149  		}
   150  		tvs[key] = struct{}{}
   151  		return nil
   152  	})
   153  	if err != nil {
   154  		return nil, err
   155  	}
   156  	return tvs, nil
   157  }
   158  
   159  func (b *bqExporter) queryTestResults(
   160  	ctx context.Context,
   161  	reachableInvs graph.ReachableInvocations,
   162  	exported *pb.Invocation,
   163  	predicate *pb.TestResultPredicate,
   164  	exoneratedTestVariants map[testVariantKey]struct{},
   165  	batchC chan []rowInput) error {
   166  	invocationIds, err := reachableInvs.WithTestResultsIDSet()
   167  	if err != nil {
   168  		return err
   169  	}
   170  	q := testresults.Query{
   171  		Predicate:     predicate,
   172  		InvocationIDs: invocationIds,
   173  		Mask:          testresults.AllFields,
   174  	}
   175  
   176  	invs, err := invocations.ReadBatch(ctx, invocationIds)
   177  	if err != nil {
   178  		return err
   179  	}
   180  
   181  	rows := make([]rowInput, 0, b.MaxBatchRowCount)
   182  	batchSize := 0 // Estimated size of rows in bytes.
   183  	rowCount := 0
   184  	err = q.Run(ctx, func(tr *pb.TestResult) error {
   185  		_, exonerated := exoneratedTestVariants[testVariantKey{testID: tr.TestId, variantHash: tr.VariantHash}]
   186  		parentID, _, _ := testresults.MustParseName(tr.Name)
   187  		sourceHash := reachableInvs.Invocations[parentID].SourceHash
   188  		var sources *pb.Sources
   189  		if sourceHash != graph.EmptySourceHash {
   190  			sources = reachableInvs.Sources[sourceHash]
   191  		}
   192  
   193  		rows = append(rows, &testResultRowInput{
   194  			exported:   exported,
   195  			parent:     invs[parentID],
   196  			tr:         tr,
   197  			sources:    sources,
   198  			exonerated: exonerated,
   199  		})
   200  		batchSize += proto.Size(tr)
   201  		rowCount++
   202  		if len(rows) >= b.MaxBatchRowCount || batchSize >= b.MaxBatchSizeApprox {
   203  			select {
   204  			case <-ctx.Done():
   205  				return ctx.Err()
   206  			case batchC <- rows:
   207  			}
   208  			rows = make([]rowInput, 0, b.MaxBatchRowCount)
   209  			batchSize = 0
   210  		}
   211  		return nil
   212  	})
   213  
   214  	if err != nil {
   215  		return err
   216  	}
   217  
   218  	if len(rows) > 0 {
   219  		select {
   220  		case <-ctx.Done():
   221  			return ctx.Err()
   222  		case batchC <- rows:
   223  		}
   224  	}
   225  
   226  	// Log the number of fetched rows so that later we can compare it to
   227  	// the value in QueryTestResultsStatistics. This is to help debugging
   228  	// crbug.com/1090671.
   229  	logging.Debugf(ctx, "fetched %d rows for invocations %q", rowCount, q.InvocationIDs)
   230  	return nil
   231  }
   232  
   233  // exportTestResultsToBigQuery queries test results in Spanner then exports them to BigQuery.
   234  func (b *bqExporter) exportTestResultsToBigQuery(ctx context.Context, ins inserter, invID invocations.ID, bqExport *pb.BigQueryExport) error {
   235  	ctx, cancel := span.ReadOnlyTransaction(ctx)
   236  	defer cancel()
   237  
   238  	exported, err := invocations.Read(ctx, invID)
   239  	if err != nil {
   240  		return err
   241  	}
   242  	if exported.State != pb.Invocation_FINALIZED {
   243  		return errors.Reason("%s is not finalized yet", invID.Name()).Err()
   244  	}
   245  
   246  	invs, err := graph.Reachable(ctx, invocations.NewIDSet(invID))
   247  	if err != nil {
   248  		return errors.Annotate(err, "querying reachable invocations").Err()
   249  	}
   250  
   251  	exonerationInvocationIds, err := invs.WithExonerationsIDSet()
   252  	if err != nil {
   253  		return err
   254  	}
   255  	exoneratedTestVariants, err := queryExoneratedTestVariants(ctx, exonerationInvocationIds)
   256  	if err != nil {
   257  		return errors.Annotate(err, "query exoneration").Err()
   258  	}
   259  
   260  	// Query test results in batches of invocations.
   261  	for _, batch := range invs.Batches() {
   262  		// Within each batch of invocations, batch the querying of
   263  		// test results and export to BigQuery.
   264  		batchC := make(chan []rowInput)
   265  
   266  		// Batch exports rows to BigQuery.
   267  		eg, ctx := errgroup.WithContext(ctx)
   268  
   269  		eg.Go(func() error {
   270  			return b.batchExportRows(ctx, ins, batchC, func(ctx context.Context, err bigquery.PutMultiError, rows []*bq.Row) {
   271  				// Print up to 10 errors.
   272  				for i := 0; i < 10 && i < len(err); i++ {
   273  					tr := rows[err[i].RowIndex].Message.(*bqpb.TestResultRow)
   274  					logging.Errorf(ctx, "failed to insert row for %s: %s", pbutil.TestResultName(tr.Parent.Id, tr.TestId, tr.ResultId), err[i].Error())
   275  				}
   276  				if len(err) > 10 {
   277  					logging.Errorf(ctx, "%d more row insertions failed", len(err)-10)
   278  				}
   279  			})
   280  		})
   281  
   282  		eg.Go(func() error {
   283  			defer close(batchC)
   284  			predicate := bqExport.GetTestResults().GetPredicate()
   285  			return b.queryTestResults(ctx, batch, exported, predicate, exoneratedTestVariants, batchC)
   286  		})
   287  
   288  		if err := eg.Wait(); err != nil {
   289  			return errors.Annotate(err, "exporting batch").Err()
   290  		}
   291  	}
   292  	return nil
   293  }