go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/services/recorder/batch_create_artifacts.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package recorder 16 17 import ( 18 "context" 19 "crypto/sha256" 20 "encoding/hex" 21 "fmt" 22 "hash/fnv" 23 "mime" 24 "time" 25 26 "cloud.google.com/go/spanner" 27 repb "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" 28 "golang.org/x/sync/errgroup" 29 "google.golang.org/grpc" 30 "google.golang.org/grpc/codes" 31 "google.golang.org/protobuf/types/known/timestamppb" 32 33 "go.chromium.org/luci/common/errors" 34 "go.chromium.org/luci/common/logging" 35 "go.chromium.org/luci/common/tsmon/field" 36 "go.chromium.org/luci/common/tsmon/metric" 37 "go.chromium.org/luci/grpc/appstatus" 38 "go.chromium.org/luci/server/auth" 39 "go.chromium.org/luci/server/auth/realms" 40 "go.chromium.org/luci/server/span" 41 42 "go.chromium.org/luci/resultdb/bqutil" 43 "go.chromium.org/luci/resultdb/internal/artifacts" 44 "go.chromium.org/luci/resultdb/internal/config" 45 "go.chromium.org/luci/resultdb/internal/gsutil" 46 "go.chromium.org/luci/resultdb/internal/invocations" 47 "go.chromium.org/luci/resultdb/internal/spanutil" 48 "go.chromium.org/luci/resultdb/pbutil" 49 bqpb "go.chromium.org/luci/resultdb/proto/bq" 50 pb "go.chromium.org/luci/resultdb/proto/v1" 51 "go.chromium.org/luci/resultdb/util" 52 ) 53 54 // TODO(crbug.com/1177213) - make this configurable. 55 const MaxBatchCreateArtifactSize = 10 * 1024 * 1024 56 57 // MaxShardContentSize is the maximum content size in BQ row. 58 // Artifacts content bigger than this size needs to be sharded. 59 // Leave 10 KB for other fields, the rest is content. 60 const MaxShardContentSize = bqutil.RowMaxBytes - 10*1024 61 62 // LookbackWindow is used when chunking. It specifies how many bytes we should 63 // look back to find new line/white space characters to split the chunks. 64 const LookbackWindow = 1024 65 66 var ( 67 artifactExportCounter = metric.NewCounter( 68 "resultdb/artifacts/bqexport", 69 "The number of artifacts rows to export to BigQuery, grouped by project and status.", 70 nil, 71 // The LUCI Project. 72 field.String("project"), 73 // The status of the export. 74 // Possible values: 75 // - "success": The export was successful. 76 // - "failure_input": There was an error with the input artifact 77 // (e.g. artifact contains invalid UTF-8 character). 78 // - "failure_bq": There was an error with BigQuery (e.g. throttling, load shedding), 79 // which made the artifact failed to export. 80 field.String("status"), 81 ) 82 83 artifactContentCounter = metric.NewCounter( 84 "resultdb/artifacts/content", 85 "The number of artifacts for a particular content type.", 86 nil, 87 // The LUCI Project. 88 field.String("project"), 89 // The status of the export. 90 // Possible values: "text", "nontext", "empty". 91 // We record the group instead of the actual value to prevent 92 // the explosion in cardinality. 93 field.String("content_type"), 94 ) 95 ) 96 97 type artifactCreationRequest struct { 98 testID string 99 resultID string 100 artifactID string 101 contentType string 102 103 // hash is a hash of the artifact data. It is not supplied or calculated for GCS artifacts. 104 hash string 105 // size is the size of the artifact data in bytes. In the case of a GCS artifact it is user-specified, optional and not verified. 106 size int64 107 // data is the artifact contents data that will be stored in RBE-CAS. If gcsURI is provided, this must be empty. 108 data []byte 109 // gcsURI is the location of the artifact content if it is stored in GCS. If this is provided, data must be empty. 110 gcsURI string 111 } 112 113 type invocationInfo struct { 114 id string 115 realm string 116 createTime time.Time 117 } 118 119 // BQExportClient is the interface for exporting artifacts. 120 type BQExportClient interface { 121 InsertArtifactRows(ctx context.Context, rows []*bqpb.TextArtifactRow) error 122 } 123 124 // name returns the artifact name. 125 func (a *artifactCreationRequest) name(invID invocations.ID) string { 126 if a.testID == "" { 127 return pbutil.InvocationArtifactName(string(invID), a.artifactID) 128 } 129 return pbutil.TestResultArtifactName(string(invID), a.testID, a.resultID, a.artifactID) 130 } 131 132 // parentID returns the local parent ID of the artifact. 133 func (a *artifactCreationRequest) parentID() string { 134 return artifacts.ParentID(a.testID, a.resultID) 135 } 136 137 func parseCreateArtifactRequest(req *pb.CreateArtifactRequest) (invocations.ID, *artifactCreationRequest, error) { 138 if req.GetArtifact() == nil { 139 return "", nil, errors.Reason("artifact: unspecified").Err() 140 } 141 if err := pbutil.ValidateArtifactID(req.Artifact.ArtifactId); err != nil { 142 return "", nil, errors.Annotate(err, "artifact_id").Err() 143 } 144 if req.Artifact.ContentType != "" { 145 if _, _, err := mime.ParseMediaType(req.Artifact.ContentType); err != nil { 146 return "", nil, errors.Annotate(err, "content_type").Err() 147 } 148 } 149 150 // parent 151 if req.Parent == "" { 152 return "", nil, errors.Reason("parent: unspecified").Err() 153 } 154 invIDStr, testID, resultID, err := pbutil.ParseTestResultName(req.Parent) 155 if err != nil { 156 if invIDStr, err = pbutil.ParseInvocationName(req.Parent); err != nil { 157 return "", nil, errors.Reason("parent: neither valid invocation name nor valid test result name").Err() 158 } 159 } 160 161 if len(req.Artifact.Contents) != 0 && req.Artifact.GcsUri != "" { 162 return "", nil, errors.Reason("only one of contents and gcs_uri can be given").Err() 163 } 164 165 sizeBytes := int64(len(req.Artifact.Contents)) 166 167 if sizeBytes != 0 && req.Artifact.SizeBytes != 0 && sizeBytes != req.Artifact.SizeBytes { 168 return "", nil, errors.Reason("sizeBytes and contents are specified but don't match").Err() 169 } 170 171 // If contents field is empty, try to set size from the request instead. 172 if sizeBytes == 0 { 173 if req.Artifact.SizeBytes != 0 { 174 sizeBytes = req.Artifact.SizeBytes 175 } 176 } 177 178 return invocations.ID(invIDStr), &artifactCreationRequest{ 179 artifactID: req.Artifact.ArtifactId, 180 contentType: req.Artifact.ContentType, 181 data: req.Artifact.Contents, 182 size: sizeBytes, 183 testID: testID, 184 resultID: resultID, 185 gcsURI: req.Artifact.GcsUri, 186 }, nil 187 } 188 189 // parseBatchCreateArtifactsRequest parses a batch request and returns 190 // artifactCreationRequests for each of the artifacts w/o hash computation. 191 // It returns an error, if 192 // - any of the artifact IDs or contentTypes are invalid, 193 // - the total size exceeds MaxBatchCreateArtifactSize, or 194 // - there are more than one invocations associated with the artifacts. 195 // - both data and a GCS URI are supplied 196 func parseBatchCreateArtifactsRequest(in *pb.BatchCreateArtifactsRequest) (invocations.ID, []*artifactCreationRequest, error) { 197 var tSize int64 198 var invID invocations.ID 199 200 if err := pbutil.ValidateBatchRequestCount(len(in.Requests)); err != nil { 201 return "", nil, err 202 } 203 arts := make([]*artifactCreationRequest, len(in.Requests)) 204 for i, req := range in.Requests { 205 inv, art, err := parseCreateArtifactRequest(req) 206 if err != nil { 207 return "", nil, errors.Annotate(err, "requests[%d]", i).Err() 208 } 209 switch { 210 case invID == "": 211 invID = inv 212 case invID != inv: 213 return "", nil, errors.Reason("requests[%d]: only one invocation is allowed: %q, %q", i, invID, inv).Err() 214 } 215 216 // TODO(ddoman): limit the max request body size in prpc level. 217 tSize += art.size 218 if tSize > MaxBatchCreateArtifactSize { 219 return "", nil, errors.Reason("the total size of artifact contents exceeded %d", MaxBatchCreateArtifactSize).Err() 220 } 221 arts[i] = art 222 } 223 return invID, arts, nil 224 } 225 226 // findNewArtifacts returns a list of the artifacts that don't have states yet. 227 // If one exists w/ different hash/size, this returns an error. 228 func findNewArtifacts(ctx context.Context, invID invocations.ID, arts []*artifactCreationRequest) ([]*artifactCreationRequest, error) { 229 // artifacts are not expected to exist in most cases, and this map would likely 230 // be empty. 231 type state struct { 232 hash string 233 size int64 234 gcsURI string 235 } 236 var states map[string]state 237 ks := spanner.KeySets() 238 for _, a := range arts { 239 ks = spanner.KeySets(invID.Key(a.parentID(), a.artifactID), ks) 240 } 241 var b spanutil.Buffer 242 err := span.Read(ctx, "Artifacts", ks, []string{"ParentId", "ArtifactId", "RBECASHash", "Size", "GcsURI"}).Do( 243 func(row *spanner.Row) (err error) { 244 var pid, aid string 245 var hash string 246 var size = new(int64) 247 var gcsURI string 248 if err = b.FromSpanner(row, &pid, &aid, &hash, &size, &gcsURI); err != nil { 249 return 250 } 251 if states == nil { 252 states = make(map[string]state) 253 } 254 // treat non-existing size as 0. 255 if size == nil { 256 size = new(int64) 257 } 258 // The artifact exists. 259 states[invID.Key(pid, aid).String()] = state{hash, *size, gcsURI} 260 return 261 }, 262 ) 263 if err != nil { 264 return nil, appstatus.Errorf(codes.Internal, "%s", err) 265 } 266 267 newArts := make([]*artifactCreationRequest, 0, len(arts)-len(states)) 268 for _, a := range arts { 269 // Save the hash, so that it can be reused in the post-verification 270 // after rbecase.UpdateBlob(). 271 if a.gcsURI == "" && a.hash == "" { 272 h := sha256.Sum256(a.data) 273 a.hash = artifacts.AddHashPrefix(hex.EncodeToString(h[:])) 274 } 275 st, ok := states[invID.Key(a.parentID(), a.artifactID).String()] 276 if !ok { 277 newArts = append(newArts, a) 278 continue 279 } 280 if (a.gcsURI == "") != (st.gcsURI == "") { 281 // Can't change from GCS to non-GCS and vice-versa 282 return nil, appstatus.Errorf(codes.AlreadyExists, `%q: exists w/ different storage scheme`, a.name(invID)) 283 } 284 if a.size != st.size { 285 return nil, appstatus.Errorf(codes.AlreadyExists, `%q: exists w/ different size: %d != %d`, a.name(invID), a.size, st.size) 286 } 287 if a.gcsURI != "" { 288 if a.gcsURI != st.gcsURI { 289 return nil, appstatus.Errorf(codes.AlreadyExists, `%q: exists w/ different GCS URI: %s != %s`, a.name(invID), a.gcsURI, st.gcsURI) 290 } 291 } else { 292 if a.hash != st.hash { 293 return nil, appstatus.Errorf(codes.AlreadyExists, `%q: exists w/ different hash`, a.name(invID)) 294 } 295 } 296 } 297 return newArts, nil 298 } 299 300 // checkArtStates checks if the states of the associated invocation and artifacts are 301 // compatible with creation of the artifacts. On success, it returns a list of 302 // the artifactCreationRequests of which artifact don't have states in Spanner yet. 303 func checkArtStates(ctx context.Context, invID invocations.ID, arts []*artifactCreationRequest) (reqs []*artifactCreationRequest, invInfo *invocationInfo, err error) { 304 var invState pb.Invocation_State 305 var createTime time.Time 306 var realm string 307 308 eg, ctx := errgroup.WithContext(ctx) 309 eg.Go(func() error { 310 return invocations.ReadColumns(ctx, invID, map[string]any{ 311 "State": &invState, "Realm": &realm, "CreateTime": &createTime, 312 }) 313 }) 314 315 eg.Go(func() (err error) { 316 reqs, err = findNewArtifacts(ctx, invID, arts) 317 return 318 }) 319 320 switch err := eg.Wait(); { 321 case err != nil: 322 return nil, nil, err 323 case invState != pb.Invocation_ACTIVE: 324 return nil, nil, appstatus.Errorf(codes.FailedPrecondition, "%s is not active", invID.Name()) 325 } 326 return reqs, &invocationInfo{ 327 id: string(invID), 328 realm: realm, 329 createTime: createTime, 330 }, nil 331 } 332 333 // createArtifactStates creates the states of given artifacts in Spanner. 334 func createArtifactStates(ctx context.Context, realm string, invID invocations.ID, arts []*artifactCreationRequest) error { 335 var noStateArts []*artifactCreationRequest 336 _, err := span.ReadWriteTransaction(ctx, func(ctx context.Context) (err error) { 337 // Verify all the states again. 338 noStateArts, _, err = checkArtStates(ctx, invID, arts) 339 if err != nil { 340 return err 341 } 342 if len(noStateArts) == 0 { 343 logging.Warningf(ctx, "The states of all the artifacts already exist.") 344 } 345 for _, a := range noStateArts { 346 span.BufferWrite(ctx, spanutil.InsertMap("Artifacts", map[string]any{ 347 "InvocationId": invID, 348 "ParentId": a.parentID(), 349 "ArtifactId": a.artifactID, 350 "ContentType": a.contentType, 351 "Size": a.size, 352 "RBECASHash": a.hash, 353 "GcsURI": a.gcsURI, 354 })) 355 } 356 return nil 357 }) 358 if err != nil { 359 return errors.Annotate(err, "failed to write artifact to Spanner").Err() 360 } 361 spanutil.IncRowCount(ctx, len(noStateArts), spanutil.Artifacts, spanutil.Inserted, realm) 362 return nil 363 } 364 365 func uploadArtifactBlobs(ctx context.Context, rbeIns string, casClient repb.ContentAddressableStorageClient, invID invocations.ID, arts []*artifactCreationRequest) error { 366 casReq := &repb.BatchUpdateBlobsRequest{InstanceName: rbeIns} 367 for _, a := range arts { 368 casReq.Requests = append(casReq.Requests, &repb.BatchUpdateBlobsRequest_Request{ 369 Digest: &repb.Digest{Hash: artifacts.TrimHashPrefix(a.hash), SizeBytes: a.size}, 370 Data: a.data, 371 }) 372 } 373 resp, err := casClient.BatchUpdateBlobs(ctx, casReq, &grpc.MaxSendMsgSizeCallOption{MaxSendMsgSize: MaxBatchCreateArtifactSize}) 374 if err != nil { 375 // If BatchUpdateBlobs() returns INVALID_ARGUMENT, it means that 376 // the total size of the artifact contents was bigger than the max size that 377 // BatchUpdateBlobs() can accept. 378 return errors.Annotate(err, "cas.BatchUpdateBlobs failed").Err() 379 } 380 for i, r := range resp.GetResponses() { 381 cd := codes.Code(r.Status.Code) 382 if cd != codes.OK { 383 // Each individual error can be due to resource exhausted or unmatched digest. 384 // If unmatched digest, this RPC has a bug and needs to be fixed. 385 // If resource exhausted, the RBE server quota needs to be adjusted. 386 // 387 // Either case, it's a server-error, and an internal error will be returned. 388 return errors.Reason("artifact %q: cas.BatchUpdateBlobs failed", arts[i].name(invID)).Err() 389 } 390 } 391 return nil 392 } 393 394 // allowedBucketsForUser returns the GCS buckets a user is allowed to reference by reading 395 // the project config. 396 // If no config exists for the user, an empty map will be returned, rather than an error. 397 func allowedBucketsForUser(ctx context.Context, project, user string) (allowedBuckets map[string]bool, err error) { 398 allowedBuckets = map[string]bool{} 399 // This is cached for 1 minute, so no need to re-optimize here. 400 cfg, err := config.Project(ctx, project) 401 if err != nil { 402 if errors.Is(err, config.ErrNotFoundProjectConfig) { 403 return allowedBuckets, nil 404 } 405 return nil, err 406 } 407 408 for _, list := range cfg.GcsAllowList { 409 for _, listUser := range list.Users { 410 if listUser == user { 411 for _, bucket := range list.Buckets { 412 allowedBuckets[bucket] = true 413 } 414 return allowedBuckets, nil 415 } 416 } 417 } 418 return allowedBuckets, nil 419 } 420 421 // BatchCreateArtifacts implements pb.RecorderServer. 422 // This functions uploads the artifacts to RBE-CAS. 423 // If the artifact is a text-based artifact, it will also get uploaded to BigQuery. 424 // We have a percentage control to determine how many percent of artifacts got 425 // uploaded to BigQuery. 426 func (s *recorderServer) BatchCreateArtifacts(ctx context.Context, in *pb.BatchCreateArtifactsRequest) (*pb.BatchCreateArtifactsResponse, error) { 427 token, err := extractUpdateToken(ctx) 428 if err != nil { 429 return nil, err 430 } 431 if len(in.Requests) == 0 { 432 logging.Debugf(ctx, "Received a BatchCreateArtifactsRequest with 0 requests; returning") 433 return &pb.BatchCreateArtifactsResponse{}, nil 434 } 435 invID, arts, err := parseBatchCreateArtifactsRequest(in) 436 if err != nil { 437 return nil, appstatus.BadRequest(err) 438 } 439 if err := validateInvocationToken(ctx, token, invID); err != nil { 440 return nil, appstatus.Errorf(codes.PermissionDenied, "invalid update token") 441 } 442 443 var artsToCreate []*artifactCreationRequest 444 var invInfo *invocationInfo 445 func() { 446 ctx, cancel := span.ReadOnlyTransaction(ctx) 447 defer cancel() 448 artsToCreate, invInfo, err = checkArtStates(ctx, invID, arts) 449 }() 450 if err != nil { 451 return nil, err 452 } 453 if len(artsToCreate) == 0 { 454 logging.Debugf(ctx, "Found no artifacts to create") 455 return &pb.BatchCreateArtifactsResponse{}, nil 456 } 457 realm := invInfo.realm 458 project, _ := realms.Split(realm) 459 user := auth.CurrentUser(ctx).Identity 460 461 var allowedBuckets map[string]bool = nil 462 artsToUpload := make([]*artifactCreationRequest, 0, len(artsToCreate)) 463 for _, a := range artsToCreate { 464 // Only upload to RBE CAS the ones that are not in GCS 465 if a.gcsURI == "" { 466 artsToUpload = append(artsToUpload, a) 467 } else { 468 // Check this GCS reference is allowed by the project config. 469 // Delay construction of the checker (which may occasionally involve an RPC) until we know we 470 // actually need it. 471 if allowedBuckets == nil { 472 allowedBuckets, err = allowedBucketsForUser(ctx, project, string(user)) 473 if err != nil { 474 return nil, errors.Annotate(err, "fetch allowed buckets for user %s", string(user)).Err() 475 } 476 } 477 bucket, _ := gsutil.Split(a.gcsURI) 478 if _, ok := allowedBuckets[bucket]; !ok { 479 return nil, errors.New(fmt.Sprintf("the user %s does not have permission to reference GCS objects in bucket %s in project %s", string(user), bucket, project)) 480 } 481 } 482 } 483 484 if err := uploadArtifactBlobs(ctx, s.ArtifactRBEInstance, s.casClient, invID, artsToUpload); err != nil { 485 return nil, err 486 } 487 if err := createArtifactStates(ctx, realm, invID, artsToCreate); err != nil { 488 return nil, err 489 } 490 491 // Upload text artifact to BQ. 492 shouldUpload, err := shouldUploadToBQ(ctx) 493 if err != nil { 494 // Just log here, the feature is still in experiment, and we do not want 495 // to disturb the main flow. 496 err = errors.Annotate(err, "getting should upload to BQ").Err() 497 logging.Errorf(ctx, err.Error()) 498 } else { 499 if !shouldUpload { 500 // Just disable the logging for now because the feature is disabled. 501 // We will enable back when we enable the export. 502 // logging.Infof(ctx, "Uploading artifacts to BQ is disabled") 503 } else { 504 err = processBQUpload(ctx, s.bqExportClient, artsToCreate, invInfo) 505 if err != nil { 506 // Just log here, the feature is still in experiment, and we do not want 507 // to disturb the main flow. 508 err = errors.Annotate(err, "processBQUpload").Err() 509 logging.Errorf(ctx, err.Error()) 510 } 511 } 512 } 513 514 // Return all the artifacts to indicate that they were created. 515 ret := &pb.BatchCreateArtifactsResponse{Artifacts: make([]*pb.Artifact, len(arts))} 516 for i, a := range arts { 517 ret.Artifacts[i] = &pb.Artifact{ 518 Name: a.name(invID), 519 ArtifactId: a.artifactID, 520 ContentType: a.contentType, 521 SizeBytes: a.size, 522 } 523 } 524 return ret, nil 525 } 526 527 // processBQUpload filters text artifacts and upload to BigQuery. 528 func processBQUpload(ctx context.Context, client BQExportClient, artifactRequests []*artifactCreationRequest, invInfo *invocationInfo) error { 529 if client == nil { 530 return errors.New("bq export client should not be nil") 531 } 532 textArtifactRequests := filterTextArtifactRequests(ctx, artifactRequests, invInfo) 533 percent, err := percentOfArtifactsToBQ(ctx) 534 if err != nil { 535 return errors.Annotate(err, "getting percent of artifact to upload to BQ").Err() 536 } 537 textArtifactRequests, err = throttleArtifactsForBQ(textArtifactRequests, percent) 538 if err != nil { 539 return errors.Annotate(err, "throttle artifacts for bq").Err() 540 } else { 541 err = uploadArtifactsToBQ(ctx, client, textArtifactRequests, invInfo) 542 if err != nil { 543 return errors.Annotate(err, "uploadArtifactsToBQ").Err() 544 } 545 } 546 return nil 547 } 548 549 // filterTextArtifactRequests filters only text artifacts. 550 func filterTextArtifactRequests(ctx context.Context, artifactRequests []*artifactCreationRequest, invInfo *invocationInfo) []*artifactCreationRequest { 551 project, _ := realms.Split(invInfo.realm) 552 results := []*artifactCreationRequest{} 553 for _, req := range artifactRequests { 554 if req.contentType == "" { 555 artifactContentCounter.Add(ctx, 1, project, "empty") 556 } else { 557 if pbutil.IsTextArtifact(req.contentType) { 558 results = append(results, req) 559 artifactContentCounter.Add(ctx, 1, project, "text") 560 } else { 561 artifactContentCounter.Add(ctx, 1, project, "nontext") 562 } 563 } 564 } 565 return results 566 } 567 568 // throttleArtifactsForBQ limits the artifacts being to BigQuery based on percentage. 569 // It will allow us to roll out the feature slowly. 570 func throttleArtifactsForBQ(artifactRequests []*artifactCreationRequest, percent int) ([]*artifactCreationRequest, error) { 571 results := []*artifactCreationRequest{} 572 for _, req := range artifactRequests { 573 hashStr := fmt.Sprintf("%s%s", req.testID, req.artifactID) 574 hashVal := hash64([]byte(hashStr)) 575 if hashVal%100 < uint64(percent) { 576 results = append(results, req) 577 } 578 } 579 return results, nil 580 } 581 582 // hash64 returns a hash value (uint64) for a given string. 583 func hash64(bt []byte) uint64 { 584 hasher := fnv.New64a() 585 hasher.Write(bt) 586 return hasher.Sum64() 587 } 588 589 // percentOfArtifactsToBQ returns how many percents of artifact to be uploaded. 590 // Return value is an integer between [0, 100]. 591 func percentOfArtifactsToBQ(ctx context.Context) (int, error) { 592 cfg, err := config.GetServiceConfig(ctx) 593 if err != nil { 594 return 0, errors.Annotate(err, "get service config").Err() 595 } 596 return int(cfg.GetBqArtifactExportConfig().GetExportPercent()), nil 597 } 598 599 // shouldUploadToBQ returns true if we should upload artifacts to BigQuery. 600 // Note: Although we can also disable upload by setting percentOfArtifactsToBQ = 0, 601 // but it will also run some BQ exporter code. 602 // Disable shouldUploadToBQ flag will run no exporter code, therefore it is the safer option. 603 func shouldUploadToBQ(ctx context.Context) (bool, error) { 604 cfg, err := config.GetServiceConfig(ctx) 605 if err != nil { 606 return false, errors.Annotate(err, "get service config").Err() 607 } 608 return cfg.GetBqArtifactExportConfig().GetEnabled(), nil 609 } 610 611 func uploadArtifactsToBQ(ctx context.Context, client BQExportClient, reqs []*artifactCreationRequest, invInfo *invocationInfo) error { 612 rowsToUpload := []*bqpb.TextArtifactRow{} 613 for _, req := range reqs { 614 rows, err := reqToProtos(ctx, req, invInfo, MaxShardContentSize, LookbackWindow) 615 if err != nil { 616 return errors.Annotate(err, "req to protos").Err() 617 } 618 rowsToUpload = append(rowsToUpload, rows...) 619 } 620 logging.Infof(ctx, "Uploading %d rows BQ", len(rowsToUpload)) 621 if len(rowsToUpload) > 0 { 622 err := client.InsertArtifactRows(ctx, rowsToUpload) 623 if err != nil { 624 // Data is invalid. 625 if _, ok := errors.TagValueIn(bqutil.InvalidRowTagKey, err); ok { 626 artifactExportCounter.Add(ctx, int64(len(rowsToUpload)), rowsToUpload[0].Project, "failure_input") 627 } else { 628 artifactExportCounter.Add(ctx, int64(len(rowsToUpload)), rowsToUpload[0].Project, "failure_bq") 629 } 630 return errors.Annotate(err, "insert artifact rows").Err() 631 } else { 632 artifactExportCounter.Add(ctx, int64(len(rowsToUpload)), rowsToUpload[0].Project, "success") 633 } 634 } 635 return nil 636 } 637 638 func reqToProtos(ctx context.Context, req *artifactCreationRequest, invInfo *invocationInfo, maxSize int, lookbackWindow int) ([]*bqpb.TextArtifactRow, error) { 639 chunks, err := util.SplitToChunks(req.data, maxSize, lookbackWindow) 640 if err != nil { 641 return nil, errors.Annotate(err, "split to chunk").Err() 642 } 643 results := []*bqpb.TextArtifactRow{} 644 project, realm := realms.Split(invInfo.realm) 645 for i, chunk := range chunks { 646 results = append(results, &bqpb.TextArtifactRow{ 647 Project: project, 648 Realm: realm, 649 InvocationId: invInfo.id, 650 TestId: req.testID, 651 ResultId: req.resultID, 652 ArtifactId: req.artifactID, 653 ContentType: req.contentType, 654 NumShards: int32(len(chunks)), 655 ShardId: int32(i), 656 Content: chunk, 657 ShardContentSize: int32(len(chunk)), 658 ArtifactContentSize: int32(req.size), 659 PartitionTime: timestamppb.New(invInfo.createTime), 660 ArtifactShard: fmt.Sprintf("%s:%d", req.artifactID, i), 661 }) 662 } 663 return results, nil 664 }