go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/services/bqexporter/test_result_row.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bqexporter 16 17 import ( 18 "context" 19 20 "cloud.google.com/go/bigquery" 21 "cloud.google.com/go/spanner" 22 "github.com/golang/protobuf/descriptor" 23 desc "github.com/golang/protobuf/protoc-gen-go/descriptor" 24 "golang.org/x/sync/errgroup" 25 "google.golang.org/protobuf/proto" 26 27 "go.chromium.org/luci/common/bq" 28 "go.chromium.org/luci/common/errors" 29 "go.chromium.org/luci/common/logging" 30 "go.chromium.org/luci/server/span" 31 32 "go.chromium.org/luci/resultdb/bqutil" 33 "go.chromium.org/luci/resultdb/internal/invocations" 34 "go.chromium.org/luci/resultdb/internal/invocations/graph" 35 "go.chromium.org/luci/resultdb/internal/spanutil" 36 "go.chromium.org/luci/resultdb/internal/testresults" 37 "go.chromium.org/luci/resultdb/pbutil" 38 bqpb "go.chromium.org/luci/resultdb/proto/bq" 39 pb "go.chromium.org/luci/resultdb/proto/v1" 40 ) 41 42 var testResultRowSchema bigquery.Schema 43 44 const testResultRowMessage = "luci.resultdb.bq.TestResultRow" 45 46 func init() { 47 var err error 48 if testResultRowSchema, err = generateTestResultRowSchema(); err != nil { 49 panic(err) 50 } 51 } 52 53 func generateTestResultRowSchema() (schema bigquery.Schema, err error) { 54 fd, _ := descriptor.MessageDescriptorProto(&bqpb.TestResultRow{}) 55 // We also need to get FileDescriptorProto for StringPair, TestMetadata, Sources and FailureReason 56 // because they are defined in different files. 57 fdsp, _ := descriptor.MessageDescriptorProto(&pb.StringPair{}) 58 fdtmd, _ := descriptor.MessageDescriptorProto(&pb.TestMetadata{}) 59 fds, _ := descriptor.MessageDescriptorProto(&pb.Sources{}) 60 fdfr, _ := descriptor.MessageDescriptorProto(&pb.FailureReason{}) 61 fdinv, _ := descriptor.MessageDescriptorProto(&bqpb.InvocationRecord{}) 62 fdset := &desc.FileDescriptorSet{File: []*desc.FileDescriptorProto{fd, fdsp, fdtmd, fds, fdfr, fdinv}} 63 return bqutil.GenerateSchema(fdset, testResultRowMessage) 64 } 65 66 // Row size limit is 5MB according to 67 // https://cloud.google.com/bigquery/quotas#streaming_inserts 68 // Cap the summaryHTML's length to 4MB to ensure the row size is under 69 // limit. 70 const maxSummaryLength = 4e6 71 72 func invocationProtoToRecord(inv *pb.Invocation) *bqpb.InvocationRecord { 73 return &bqpb.InvocationRecord{ 74 Id: string(invocations.MustParseName(inv.Name)), 75 Tags: inv.Tags, 76 Properties: inv.Properties, 77 Realm: inv.Realm, 78 } 79 } 80 81 // testResultRowInput is information required to generate a TestResult BigQuery row. 82 type testResultRowInput struct { 83 exported *pb.Invocation 84 parent *pb.Invocation 85 tr *pb.TestResult 86 sources *pb.Sources 87 exonerated bool 88 } 89 90 func (i *testResultRowInput) row() proto.Message { 91 tr := i.tr 92 93 ret := &bqpb.TestResultRow{ 94 Exported: invocationProtoToRecord(i.exported), 95 Parent: invocationProtoToRecord(i.parent), 96 Name: tr.Name, 97 TestId: tr.TestId, 98 ResultId: tr.ResultId, 99 Variant: pbutil.VariantToStringPairs(tr.Variant), 100 VariantHash: tr.VariantHash, 101 Expected: tr.Expected, 102 Status: tr.Status.String(), 103 SummaryHtml: tr.SummaryHtml, 104 StartTime: tr.StartTime, 105 Duration: tr.Duration, 106 Tags: tr.Tags, 107 Exonerated: i.exonerated, 108 Sources: i.sources, 109 PartitionTime: i.exported.CreateTime, 110 TestMetadata: tr.TestMetadata, 111 FailureReason: tr.FailureReason, 112 Properties: tr.Properties, 113 } 114 115 if tr.Status == pb.TestStatus_SKIP { 116 ret.SkipReason = tr.SkipReason.String() 117 } 118 119 if len(ret.SummaryHtml) > maxSummaryLength { 120 ret.SummaryHtml = "[Trimmed] " + ret.SummaryHtml[:maxSummaryLength] 121 } 122 123 return ret 124 } 125 126 func (i *testResultRowInput) id() []byte { 127 return []byte(i.tr.Name) 128 } 129 130 type testVariantKey struct { 131 testID string 132 variantHash string 133 } 134 135 // queryExoneratedTestVariants reads exonerated test variants matching the predicate. 136 func queryExoneratedTestVariants(ctx context.Context, invs invocations.IDSet) (map[testVariantKey]struct{}, error) { 137 st := spanner.NewStatement(` 138 SELECT DISTINCT TestId, VariantHash, 139 FROM TestExonerations 140 WHERE InvocationId IN UNNEST(@invIDs) 141 `) 142 st.Params["invIDs"] = invs 143 tvs := map[testVariantKey]struct{}{} 144 var b spanutil.Buffer 145 err := spanutil.Query(ctx, st, func(row *spanner.Row) error { 146 var key testVariantKey 147 if err := b.FromSpanner(row, &key.testID, &key.variantHash); err != nil { 148 return err 149 } 150 tvs[key] = struct{}{} 151 return nil 152 }) 153 if err != nil { 154 return nil, err 155 } 156 return tvs, nil 157 } 158 159 func (b *bqExporter) queryTestResults( 160 ctx context.Context, 161 reachableInvs graph.ReachableInvocations, 162 exported *pb.Invocation, 163 predicate *pb.TestResultPredicate, 164 exoneratedTestVariants map[testVariantKey]struct{}, 165 batchC chan []rowInput) error { 166 invocationIds, err := reachableInvs.WithTestResultsIDSet() 167 if err != nil { 168 return err 169 } 170 q := testresults.Query{ 171 Predicate: predicate, 172 InvocationIDs: invocationIds, 173 Mask: testresults.AllFields, 174 } 175 176 invs, err := invocations.ReadBatch(ctx, invocationIds) 177 if err != nil { 178 return err 179 } 180 181 rows := make([]rowInput, 0, b.MaxBatchRowCount) 182 batchSize := 0 // Estimated size of rows in bytes. 183 rowCount := 0 184 err = q.Run(ctx, func(tr *pb.TestResult) error { 185 _, exonerated := exoneratedTestVariants[testVariantKey{testID: tr.TestId, variantHash: tr.VariantHash}] 186 parentID, _, _ := testresults.MustParseName(tr.Name) 187 sourceHash := reachableInvs.Invocations[parentID].SourceHash 188 var sources *pb.Sources 189 if sourceHash != graph.EmptySourceHash { 190 sources = reachableInvs.Sources[sourceHash] 191 } 192 193 rows = append(rows, &testResultRowInput{ 194 exported: exported, 195 parent: invs[parentID], 196 tr: tr, 197 sources: sources, 198 exonerated: exonerated, 199 }) 200 batchSize += proto.Size(tr) 201 rowCount++ 202 if len(rows) >= b.MaxBatchRowCount || batchSize >= b.MaxBatchSizeApprox { 203 select { 204 case <-ctx.Done(): 205 return ctx.Err() 206 case batchC <- rows: 207 } 208 rows = make([]rowInput, 0, b.MaxBatchRowCount) 209 batchSize = 0 210 } 211 return nil 212 }) 213 214 if err != nil { 215 return err 216 } 217 218 if len(rows) > 0 { 219 select { 220 case <-ctx.Done(): 221 return ctx.Err() 222 case batchC <- rows: 223 } 224 } 225 226 // Log the number of fetched rows so that later we can compare it to 227 // the value in QueryTestResultsStatistics. This is to help debugging 228 // crbug.com/1090671. 229 logging.Debugf(ctx, "fetched %d rows for invocations %q", rowCount, q.InvocationIDs) 230 return nil 231 } 232 233 // exportTestResultsToBigQuery queries test results in Spanner then exports them to BigQuery. 234 func (b *bqExporter) exportTestResultsToBigQuery(ctx context.Context, ins inserter, invID invocations.ID, bqExport *pb.BigQueryExport) error { 235 ctx, cancel := span.ReadOnlyTransaction(ctx) 236 defer cancel() 237 238 exported, err := invocations.Read(ctx, invID) 239 if err != nil { 240 return err 241 } 242 if exported.State != pb.Invocation_FINALIZED { 243 return errors.Reason("%s is not finalized yet", invID.Name()).Err() 244 } 245 246 invs, err := graph.Reachable(ctx, invocations.NewIDSet(invID)) 247 if err != nil { 248 return errors.Annotate(err, "querying reachable invocations").Err() 249 } 250 251 exonerationInvocationIds, err := invs.WithExonerationsIDSet() 252 if err != nil { 253 return err 254 } 255 exoneratedTestVariants, err := queryExoneratedTestVariants(ctx, exonerationInvocationIds) 256 if err != nil { 257 return errors.Annotate(err, "query exoneration").Err() 258 } 259 260 // Query test results in batches of invocations. 261 for _, batch := range invs.Batches() { 262 // Within each batch of invocations, batch the querying of 263 // test results and export to BigQuery. 264 batchC := make(chan []rowInput) 265 266 // Batch exports rows to BigQuery. 267 eg, ctx := errgroup.WithContext(ctx) 268 269 eg.Go(func() error { 270 return b.batchExportRows(ctx, ins, batchC, func(ctx context.Context, err bigquery.PutMultiError, rows []*bq.Row) { 271 // Print up to 10 errors. 272 for i := 0; i < 10 && i < len(err); i++ { 273 tr := rows[err[i].RowIndex].Message.(*bqpb.TestResultRow) 274 logging.Errorf(ctx, "failed to insert row for %s: %s", pbutil.TestResultName(tr.Parent.Id, tr.TestId, tr.ResultId), err[i].Error()) 275 } 276 if len(err) > 10 { 277 logging.Errorf(ctx, "%d more row insertions failed", len(err)-10) 278 } 279 }) 280 }) 281 282 eg.Go(func() error { 283 defer close(batchC) 284 predicate := bqExport.GetTestResults().GetPredicate() 285 return b.queryTestResults(ctx, batch, exported, predicate, exoneratedTestVariants, batchC) 286 }) 287 288 if err := eg.Wait(); err != nil { 289 return errors.Annotate(err, "exporting batch").Err() 290 } 291 } 292 return nil 293 }