go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/services/recorder/batch_create_artifacts.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package recorder
    16  
    17  import (
    18  	"context"
    19  	"crypto/sha256"
    20  	"encoding/hex"
    21  	"fmt"
    22  	"hash/fnv"
    23  	"mime"
    24  	"time"
    25  
    26  	"cloud.google.com/go/spanner"
    27  	repb "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2"
    28  	"golang.org/x/sync/errgroup"
    29  	"google.golang.org/grpc"
    30  	"google.golang.org/grpc/codes"
    31  	"google.golang.org/protobuf/types/known/timestamppb"
    32  
    33  	"go.chromium.org/luci/common/errors"
    34  	"go.chromium.org/luci/common/logging"
    35  	"go.chromium.org/luci/common/tsmon/field"
    36  	"go.chromium.org/luci/common/tsmon/metric"
    37  	"go.chromium.org/luci/grpc/appstatus"
    38  	"go.chromium.org/luci/server/auth"
    39  	"go.chromium.org/luci/server/auth/realms"
    40  	"go.chromium.org/luci/server/span"
    41  
    42  	"go.chromium.org/luci/resultdb/bqutil"
    43  	"go.chromium.org/luci/resultdb/internal/artifacts"
    44  	"go.chromium.org/luci/resultdb/internal/config"
    45  	"go.chromium.org/luci/resultdb/internal/gsutil"
    46  	"go.chromium.org/luci/resultdb/internal/invocations"
    47  	"go.chromium.org/luci/resultdb/internal/spanutil"
    48  	"go.chromium.org/luci/resultdb/pbutil"
    49  	bqpb "go.chromium.org/luci/resultdb/proto/bq"
    50  	pb "go.chromium.org/luci/resultdb/proto/v1"
    51  	"go.chromium.org/luci/resultdb/util"
    52  )
    53  
    54  // TODO(crbug.com/1177213) - make this configurable.
    55  const MaxBatchCreateArtifactSize = 10 * 1024 * 1024
    56  
    57  // MaxShardContentSize is the maximum content size in BQ row.
    58  // Artifacts content bigger than this size needs to be sharded.
    59  // Leave 10 KB for other fields, the rest is content.
    60  const MaxShardContentSize = bqutil.RowMaxBytes - 10*1024
    61  
    62  // LookbackWindow is used when chunking. It specifies how many bytes we should
    63  // look back to find new line/white space characters to split the chunks.
    64  const LookbackWindow = 1024
    65  
    66  var (
    67  	artifactExportCounter = metric.NewCounter(
    68  		"resultdb/artifacts/bqexport",
    69  		"The number of artifacts rows to export to BigQuery, grouped by project and status.",
    70  		nil,
    71  		// The LUCI Project.
    72  		field.String("project"),
    73  		// The status of the export.
    74  		// Possible values:
    75  		// - "success": The export was successful.
    76  		// - "failure_input": There was an error with the input artifact
    77  		// (e.g. artifact contains invalid UTF-8 character).
    78  		// - "failure_bq": There was an error with BigQuery (e.g. throttling, load shedding),
    79  		// which made the artifact failed to export.
    80  		field.String("status"),
    81  	)
    82  
    83  	artifactContentCounter = metric.NewCounter(
    84  		"resultdb/artifacts/content",
    85  		"The number of artifacts for a particular content type.",
    86  		nil,
    87  		// The LUCI Project.
    88  		field.String("project"),
    89  		// The status of the export.
    90  		// Possible values: "text", "nontext", "empty".
    91  		// We record the group instead of the actual value to prevent
    92  		// the explosion in cardinality.
    93  		field.String("content_type"),
    94  	)
    95  )
    96  
    97  type artifactCreationRequest struct {
    98  	testID      string
    99  	resultID    string
   100  	artifactID  string
   101  	contentType string
   102  
   103  	// hash is a hash of the artifact data.  It is not supplied or calculated for GCS artifacts.
   104  	hash string
   105  	// size is the size of the artifact data in bytes.  In the case of a GCS artifact it is user-specified, optional and not verified.
   106  	size int64
   107  	// data is the artifact contents data that will be stored in RBE-CAS.  If gcsURI is provided, this must be empty.
   108  	data []byte
   109  	// gcsURI is the location of the artifact content if it is stored in GCS.  If this is provided, data must be empty.
   110  	gcsURI string
   111  }
   112  
   113  type invocationInfo struct {
   114  	id         string
   115  	realm      string
   116  	createTime time.Time
   117  }
   118  
   119  // BQExportClient is the interface for exporting artifacts.
   120  type BQExportClient interface {
   121  	InsertArtifactRows(ctx context.Context, rows []*bqpb.TextArtifactRow) error
   122  }
   123  
   124  // name returns the artifact name.
   125  func (a *artifactCreationRequest) name(invID invocations.ID) string {
   126  	if a.testID == "" {
   127  		return pbutil.InvocationArtifactName(string(invID), a.artifactID)
   128  	}
   129  	return pbutil.TestResultArtifactName(string(invID), a.testID, a.resultID, a.artifactID)
   130  }
   131  
   132  // parentID returns the local parent ID of the artifact.
   133  func (a *artifactCreationRequest) parentID() string {
   134  	return artifacts.ParentID(a.testID, a.resultID)
   135  }
   136  
   137  func parseCreateArtifactRequest(req *pb.CreateArtifactRequest) (invocations.ID, *artifactCreationRequest, error) {
   138  	if req.GetArtifact() == nil {
   139  		return "", nil, errors.Reason("artifact: unspecified").Err()
   140  	}
   141  	if err := pbutil.ValidateArtifactID(req.Artifact.ArtifactId); err != nil {
   142  		return "", nil, errors.Annotate(err, "artifact_id").Err()
   143  	}
   144  	if req.Artifact.ContentType != "" {
   145  		if _, _, err := mime.ParseMediaType(req.Artifact.ContentType); err != nil {
   146  			return "", nil, errors.Annotate(err, "content_type").Err()
   147  		}
   148  	}
   149  
   150  	// parent
   151  	if req.Parent == "" {
   152  		return "", nil, errors.Reason("parent: unspecified").Err()
   153  	}
   154  	invIDStr, testID, resultID, err := pbutil.ParseTestResultName(req.Parent)
   155  	if err != nil {
   156  		if invIDStr, err = pbutil.ParseInvocationName(req.Parent); err != nil {
   157  			return "", nil, errors.Reason("parent: neither valid invocation name nor valid test result name").Err()
   158  		}
   159  	}
   160  
   161  	if len(req.Artifact.Contents) != 0 && req.Artifact.GcsUri != "" {
   162  		return "", nil, errors.Reason("only one of contents and gcs_uri can be given").Err()
   163  	}
   164  
   165  	sizeBytes := int64(len(req.Artifact.Contents))
   166  
   167  	if sizeBytes != 0 && req.Artifact.SizeBytes != 0 && sizeBytes != req.Artifact.SizeBytes {
   168  		return "", nil, errors.Reason("sizeBytes and contents are specified but don't match").Err()
   169  	}
   170  
   171  	// If contents field is empty, try to set size from the request instead.
   172  	if sizeBytes == 0 {
   173  		if req.Artifact.SizeBytes != 0 {
   174  			sizeBytes = req.Artifact.SizeBytes
   175  		}
   176  	}
   177  
   178  	return invocations.ID(invIDStr), &artifactCreationRequest{
   179  		artifactID:  req.Artifact.ArtifactId,
   180  		contentType: req.Artifact.ContentType,
   181  		data:        req.Artifact.Contents,
   182  		size:        sizeBytes,
   183  		testID:      testID,
   184  		resultID:    resultID,
   185  		gcsURI:      req.Artifact.GcsUri,
   186  	}, nil
   187  }
   188  
   189  // parseBatchCreateArtifactsRequest parses a batch request and returns
   190  // artifactCreationRequests for each of the artifacts w/o hash computation.
   191  // It returns an error, if
   192  // - any of the artifact IDs or contentTypes are invalid,
   193  // - the total size exceeds MaxBatchCreateArtifactSize, or
   194  // - there are more than one invocations associated with the artifacts.
   195  // - both data and a GCS URI are supplied
   196  func parseBatchCreateArtifactsRequest(in *pb.BatchCreateArtifactsRequest) (invocations.ID, []*artifactCreationRequest, error) {
   197  	var tSize int64
   198  	var invID invocations.ID
   199  
   200  	if err := pbutil.ValidateBatchRequestCount(len(in.Requests)); err != nil {
   201  		return "", nil, err
   202  	}
   203  	arts := make([]*artifactCreationRequest, len(in.Requests))
   204  	for i, req := range in.Requests {
   205  		inv, art, err := parseCreateArtifactRequest(req)
   206  		if err != nil {
   207  			return "", nil, errors.Annotate(err, "requests[%d]", i).Err()
   208  		}
   209  		switch {
   210  		case invID == "":
   211  			invID = inv
   212  		case invID != inv:
   213  			return "", nil, errors.Reason("requests[%d]: only one invocation is allowed: %q, %q", i, invID, inv).Err()
   214  		}
   215  
   216  		// TODO(ddoman): limit the max request body size in prpc level.
   217  		tSize += art.size
   218  		if tSize > MaxBatchCreateArtifactSize {
   219  			return "", nil, errors.Reason("the total size of artifact contents exceeded %d", MaxBatchCreateArtifactSize).Err()
   220  		}
   221  		arts[i] = art
   222  	}
   223  	return invID, arts, nil
   224  }
   225  
   226  // findNewArtifacts returns a list of the artifacts that don't have states yet.
   227  // If one exists w/ different hash/size, this returns an error.
   228  func findNewArtifacts(ctx context.Context, invID invocations.ID, arts []*artifactCreationRequest) ([]*artifactCreationRequest, error) {
   229  	// artifacts are not expected to exist in most cases, and this map would likely
   230  	// be empty.
   231  	type state struct {
   232  		hash   string
   233  		size   int64
   234  		gcsURI string
   235  	}
   236  	var states map[string]state
   237  	ks := spanner.KeySets()
   238  	for _, a := range arts {
   239  		ks = spanner.KeySets(invID.Key(a.parentID(), a.artifactID), ks)
   240  	}
   241  	var b spanutil.Buffer
   242  	err := span.Read(ctx, "Artifacts", ks, []string{"ParentId", "ArtifactId", "RBECASHash", "Size", "GcsURI"}).Do(
   243  		func(row *spanner.Row) (err error) {
   244  			var pid, aid string
   245  			var hash string
   246  			var size = new(int64)
   247  			var gcsURI string
   248  			if err = b.FromSpanner(row, &pid, &aid, &hash, &size, &gcsURI); err != nil {
   249  				return
   250  			}
   251  			if states == nil {
   252  				states = make(map[string]state)
   253  			}
   254  			// treat non-existing size as 0.
   255  			if size == nil {
   256  				size = new(int64)
   257  			}
   258  			// The artifact exists.
   259  			states[invID.Key(pid, aid).String()] = state{hash, *size, gcsURI}
   260  			return
   261  		},
   262  	)
   263  	if err != nil {
   264  		return nil, appstatus.Errorf(codes.Internal, "%s", err)
   265  	}
   266  
   267  	newArts := make([]*artifactCreationRequest, 0, len(arts)-len(states))
   268  	for _, a := range arts {
   269  		// Save the hash, so that it can be reused in the post-verification
   270  		// after rbecase.UpdateBlob().
   271  		if a.gcsURI == "" && a.hash == "" {
   272  			h := sha256.Sum256(a.data)
   273  			a.hash = artifacts.AddHashPrefix(hex.EncodeToString(h[:]))
   274  		}
   275  		st, ok := states[invID.Key(a.parentID(), a.artifactID).String()]
   276  		if !ok {
   277  			newArts = append(newArts, a)
   278  			continue
   279  		}
   280  		if (a.gcsURI == "") != (st.gcsURI == "") {
   281  			// Can't change from GCS to non-GCS and vice-versa
   282  			return nil, appstatus.Errorf(codes.AlreadyExists, `%q: exists w/ different storage scheme`, a.name(invID))
   283  		}
   284  		if a.size != st.size {
   285  			return nil, appstatus.Errorf(codes.AlreadyExists, `%q: exists w/ different size: %d != %d`, a.name(invID), a.size, st.size)
   286  		}
   287  		if a.gcsURI != "" {
   288  			if a.gcsURI != st.gcsURI {
   289  				return nil, appstatus.Errorf(codes.AlreadyExists, `%q: exists w/ different GCS URI: %s != %s`, a.name(invID), a.gcsURI, st.gcsURI)
   290  			}
   291  		} else {
   292  			if a.hash != st.hash {
   293  				return nil, appstatus.Errorf(codes.AlreadyExists, `%q: exists w/ different hash`, a.name(invID))
   294  			}
   295  		}
   296  	}
   297  	return newArts, nil
   298  }
   299  
   300  // checkArtStates checks if the states of the associated invocation and artifacts are
   301  // compatible with creation of the artifacts. On success, it returns a list of
   302  // the artifactCreationRequests of which artifact don't have states in Spanner yet.
   303  func checkArtStates(ctx context.Context, invID invocations.ID, arts []*artifactCreationRequest) (reqs []*artifactCreationRequest, invInfo *invocationInfo, err error) {
   304  	var invState pb.Invocation_State
   305  	var createTime time.Time
   306  	var realm string
   307  
   308  	eg, ctx := errgroup.WithContext(ctx)
   309  	eg.Go(func() error {
   310  		return invocations.ReadColumns(ctx, invID, map[string]any{
   311  			"State": &invState, "Realm": &realm, "CreateTime": &createTime,
   312  		})
   313  	})
   314  
   315  	eg.Go(func() (err error) {
   316  		reqs, err = findNewArtifacts(ctx, invID, arts)
   317  		return
   318  	})
   319  
   320  	switch err := eg.Wait(); {
   321  	case err != nil:
   322  		return nil, nil, err
   323  	case invState != pb.Invocation_ACTIVE:
   324  		return nil, nil, appstatus.Errorf(codes.FailedPrecondition, "%s is not active", invID.Name())
   325  	}
   326  	return reqs, &invocationInfo{
   327  		id:         string(invID),
   328  		realm:      realm,
   329  		createTime: createTime,
   330  	}, nil
   331  }
   332  
   333  // createArtifactStates creates the states of given artifacts in Spanner.
   334  func createArtifactStates(ctx context.Context, realm string, invID invocations.ID, arts []*artifactCreationRequest) error {
   335  	var noStateArts []*artifactCreationRequest
   336  	_, err := span.ReadWriteTransaction(ctx, func(ctx context.Context) (err error) {
   337  		// Verify all the states again.
   338  		noStateArts, _, err = checkArtStates(ctx, invID, arts)
   339  		if err != nil {
   340  			return err
   341  		}
   342  		if len(noStateArts) == 0 {
   343  			logging.Warningf(ctx, "The states of all the artifacts already exist.")
   344  		}
   345  		for _, a := range noStateArts {
   346  			span.BufferWrite(ctx, spanutil.InsertMap("Artifacts", map[string]any{
   347  				"InvocationId": invID,
   348  				"ParentId":     a.parentID(),
   349  				"ArtifactId":   a.artifactID,
   350  				"ContentType":  a.contentType,
   351  				"Size":         a.size,
   352  				"RBECASHash":   a.hash,
   353  				"GcsURI":       a.gcsURI,
   354  			}))
   355  		}
   356  		return nil
   357  	})
   358  	if err != nil {
   359  		return errors.Annotate(err, "failed to write artifact to Spanner").Err()
   360  	}
   361  	spanutil.IncRowCount(ctx, len(noStateArts), spanutil.Artifacts, spanutil.Inserted, realm)
   362  	return nil
   363  }
   364  
   365  func uploadArtifactBlobs(ctx context.Context, rbeIns string, casClient repb.ContentAddressableStorageClient, invID invocations.ID, arts []*artifactCreationRequest) error {
   366  	casReq := &repb.BatchUpdateBlobsRequest{InstanceName: rbeIns}
   367  	for _, a := range arts {
   368  		casReq.Requests = append(casReq.Requests, &repb.BatchUpdateBlobsRequest_Request{
   369  			Digest: &repb.Digest{Hash: artifacts.TrimHashPrefix(a.hash), SizeBytes: a.size},
   370  			Data:   a.data,
   371  		})
   372  	}
   373  	resp, err := casClient.BatchUpdateBlobs(ctx, casReq, &grpc.MaxSendMsgSizeCallOption{MaxSendMsgSize: MaxBatchCreateArtifactSize})
   374  	if err != nil {
   375  		// If BatchUpdateBlobs() returns INVALID_ARGUMENT, it means that
   376  		// the total size of the artifact contents was bigger than the max size that
   377  		// BatchUpdateBlobs() can accept.
   378  		return errors.Annotate(err, "cas.BatchUpdateBlobs failed").Err()
   379  	}
   380  	for i, r := range resp.GetResponses() {
   381  		cd := codes.Code(r.Status.Code)
   382  		if cd != codes.OK {
   383  			// Each individual error can be due to resource exhausted or unmatched digest.
   384  			// If unmatched digest, this RPC has a bug and needs to be fixed.
   385  			// If resource exhausted, the RBE server quota needs to be adjusted.
   386  			//
   387  			// Either case, it's a server-error, and an internal error will be returned.
   388  			return errors.Reason("artifact %q: cas.BatchUpdateBlobs failed", arts[i].name(invID)).Err()
   389  		}
   390  	}
   391  	return nil
   392  }
   393  
   394  // allowedBucketsForUser returns the GCS buckets a user is allowed to reference by reading
   395  // the project config.
   396  // If no config exists for the user, an empty map will be returned, rather than an error.
   397  func allowedBucketsForUser(ctx context.Context, project, user string) (allowedBuckets map[string]bool, err error) {
   398  	allowedBuckets = map[string]bool{}
   399  	// This is cached for 1 minute, so no need to re-optimize here.
   400  	cfg, err := config.Project(ctx, project)
   401  	if err != nil {
   402  		if errors.Is(err, config.ErrNotFoundProjectConfig) {
   403  			return allowedBuckets, nil
   404  		}
   405  		return nil, err
   406  	}
   407  
   408  	for _, list := range cfg.GcsAllowList {
   409  		for _, listUser := range list.Users {
   410  			if listUser == user {
   411  				for _, bucket := range list.Buckets {
   412  					allowedBuckets[bucket] = true
   413  				}
   414  				return allowedBuckets, nil
   415  			}
   416  		}
   417  	}
   418  	return allowedBuckets, nil
   419  }
   420  
   421  // BatchCreateArtifacts implements pb.RecorderServer.
   422  // This functions uploads the artifacts to RBE-CAS.
   423  // If the artifact is a text-based artifact, it will also get uploaded to BigQuery.
   424  // We have a percentage control to determine how many percent of artifacts got
   425  // uploaded to BigQuery.
   426  func (s *recorderServer) BatchCreateArtifacts(ctx context.Context, in *pb.BatchCreateArtifactsRequest) (*pb.BatchCreateArtifactsResponse, error) {
   427  	token, err := extractUpdateToken(ctx)
   428  	if err != nil {
   429  		return nil, err
   430  	}
   431  	if len(in.Requests) == 0 {
   432  		logging.Debugf(ctx, "Received a BatchCreateArtifactsRequest with 0 requests; returning")
   433  		return &pb.BatchCreateArtifactsResponse{}, nil
   434  	}
   435  	invID, arts, err := parseBatchCreateArtifactsRequest(in)
   436  	if err != nil {
   437  		return nil, appstatus.BadRequest(err)
   438  	}
   439  	if err := validateInvocationToken(ctx, token, invID); err != nil {
   440  		return nil, appstatus.Errorf(codes.PermissionDenied, "invalid update token")
   441  	}
   442  
   443  	var artsToCreate []*artifactCreationRequest
   444  	var invInfo *invocationInfo
   445  	func() {
   446  		ctx, cancel := span.ReadOnlyTransaction(ctx)
   447  		defer cancel()
   448  		artsToCreate, invInfo, err = checkArtStates(ctx, invID, arts)
   449  	}()
   450  	if err != nil {
   451  		return nil, err
   452  	}
   453  	if len(artsToCreate) == 0 {
   454  		logging.Debugf(ctx, "Found no artifacts to create")
   455  		return &pb.BatchCreateArtifactsResponse{}, nil
   456  	}
   457  	realm := invInfo.realm
   458  	project, _ := realms.Split(realm)
   459  	user := auth.CurrentUser(ctx).Identity
   460  
   461  	var allowedBuckets map[string]bool = nil
   462  	artsToUpload := make([]*artifactCreationRequest, 0, len(artsToCreate))
   463  	for _, a := range artsToCreate {
   464  		// Only upload to RBE CAS the ones that are not in GCS
   465  		if a.gcsURI == "" {
   466  			artsToUpload = append(artsToUpload, a)
   467  		} else {
   468  			// Check this GCS reference is allowed by the project config.
   469  			// Delay construction of the checker (which may occasionally involve an RPC) until we know we
   470  			// actually need it.
   471  			if allowedBuckets == nil {
   472  				allowedBuckets, err = allowedBucketsForUser(ctx, project, string(user))
   473  				if err != nil {
   474  					return nil, errors.Annotate(err, "fetch allowed buckets for user %s", string(user)).Err()
   475  				}
   476  			}
   477  			bucket, _ := gsutil.Split(a.gcsURI)
   478  			if _, ok := allowedBuckets[bucket]; !ok {
   479  				return nil, errors.New(fmt.Sprintf("the user %s does not have permission to reference GCS objects in bucket %s in project %s", string(user), bucket, project))
   480  			}
   481  		}
   482  	}
   483  
   484  	if err := uploadArtifactBlobs(ctx, s.ArtifactRBEInstance, s.casClient, invID, artsToUpload); err != nil {
   485  		return nil, err
   486  	}
   487  	if err := createArtifactStates(ctx, realm, invID, artsToCreate); err != nil {
   488  		return nil, err
   489  	}
   490  
   491  	// Upload text artifact to BQ.
   492  	shouldUpload, err := shouldUploadToBQ(ctx)
   493  	if err != nil {
   494  		// Just log here, the feature is still in experiment, and we do not want
   495  		// to disturb the main flow.
   496  		err = errors.Annotate(err, "getting should upload to BQ").Err()
   497  		logging.Errorf(ctx, err.Error())
   498  	} else {
   499  		if !shouldUpload {
   500  			// Just disable the logging for now because the feature is disabled.
   501  			// We will enable back when we enable the export.
   502  			// logging.Infof(ctx, "Uploading artifacts to BQ is disabled")
   503  		} else {
   504  			err = processBQUpload(ctx, s.bqExportClient, artsToCreate, invInfo)
   505  			if err != nil {
   506  				// Just log here, the feature is still in experiment, and we do not want
   507  				// to disturb the main flow.
   508  				err = errors.Annotate(err, "processBQUpload").Err()
   509  				logging.Errorf(ctx, err.Error())
   510  			}
   511  		}
   512  	}
   513  
   514  	// Return all the artifacts to indicate that they were created.
   515  	ret := &pb.BatchCreateArtifactsResponse{Artifacts: make([]*pb.Artifact, len(arts))}
   516  	for i, a := range arts {
   517  		ret.Artifacts[i] = &pb.Artifact{
   518  			Name:        a.name(invID),
   519  			ArtifactId:  a.artifactID,
   520  			ContentType: a.contentType,
   521  			SizeBytes:   a.size,
   522  		}
   523  	}
   524  	return ret, nil
   525  }
   526  
   527  // processBQUpload filters text artifacts and upload to BigQuery.
   528  func processBQUpload(ctx context.Context, client BQExportClient, artifactRequests []*artifactCreationRequest, invInfo *invocationInfo) error {
   529  	if client == nil {
   530  		return errors.New("bq export client should not be nil")
   531  	}
   532  	textArtifactRequests := filterTextArtifactRequests(ctx, artifactRequests, invInfo)
   533  	percent, err := percentOfArtifactsToBQ(ctx)
   534  	if err != nil {
   535  		return errors.Annotate(err, "getting percent of artifact to upload to BQ").Err()
   536  	}
   537  	textArtifactRequests, err = throttleArtifactsForBQ(textArtifactRequests, percent)
   538  	if err != nil {
   539  		return errors.Annotate(err, "throttle artifacts for bq").Err()
   540  	} else {
   541  		err = uploadArtifactsToBQ(ctx, client, textArtifactRequests, invInfo)
   542  		if err != nil {
   543  			return errors.Annotate(err, "uploadArtifactsToBQ").Err()
   544  		}
   545  	}
   546  	return nil
   547  }
   548  
   549  // filterTextArtifactRequests filters only text artifacts.
   550  func filterTextArtifactRequests(ctx context.Context, artifactRequests []*artifactCreationRequest, invInfo *invocationInfo) []*artifactCreationRequest {
   551  	project, _ := realms.Split(invInfo.realm)
   552  	results := []*artifactCreationRequest{}
   553  	for _, req := range artifactRequests {
   554  		if req.contentType == "" {
   555  			artifactContentCounter.Add(ctx, 1, project, "empty")
   556  		} else {
   557  			if pbutil.IsTextArtifact(req.contentType) {
   558  				results = append(results, req)
   559  				artifactContentCounter.Add(ctx, 1, project, "text")
   560  			} else {
   561  				artifactContentCounter.Add(ctx, 1, project, "nontext")
   562  			}
   563  		}
   564  	}
   565  	return results
   566  }
   567  
   568  // throttleArtifactsForBQ limits the artifacts being to BigQuery based on percentage.
   569  // It will allow us to roll out the feature slowly.
   570  func throttleArtifactsForBQ(artifactRequests []*artifactCreationRequest, percent int) ([]*artifactCreationRequest, error) {
   571  	results := []*artifactCreationRequest{}
   572  	for _, req := range artifactRequests {
   573  		hashStr := fmt.Sprintf("%s%s", req.testID, req.artifactID)
   574  		hashVal := hash64([]byte(hashStr))
   575  		if hashVal%100 < uint64(percent) {
   576  			results = append(results, req)
   577  		}
   578  	}
   579  	return results, nil
   580  }
   581  
   582  // hash64 returns a hash value (uint64) for a given string.
   583  func hash64(bt []byte) uint64 {
   584  	hasher := fnv.New64a()
   585  	hasher.Write(bt)
   586  	return hasher.Sum64()
   587  }
   588  
   589  // percentOfArtifactsToBQ returns how many percents of artifact to be uploaded.
   590  // Return value is an integer between [0, 100].
   591  func percentOfArtifactsToBQ(ctx context.Context) (int, error) {
   592  	cfg, err := config.GetServiceConfig(ctx)
   593  	if err != nil {
   594  		return 0, errors.Annotate(err, "get service config").Err()
   595  	}
   596  	return int(cfg.GetBqArtifactExportConfig().GetExportPercent()), nil
   597  }
   598  
   599  // shouldUploadToBQ returns true if we should upload artifacts to BigQuery.
   600  // Note: Although we can also disable upload by setting percentOfArtifactsToBQ = 0,
   601  // but it will also run some BQ exporter code.
   602  // Disable shouldUploadToBQ flag will run no exporter code, therefore it is the safer option.
   603  func shouldUploadToBQ(ctx context.Context) (bool, error) {
   604  	cfg, err := config.GetServiceConfig(ctx)
   605  	if err != nil {
   606  		return false, errors.Annotate(err, "get service config").Err()
   607  	}
   608  	return cfg.GetBqArtifactExportConfig().GetEnabled(), nil
   609  }
   610  
   611  func uploadArtifactsToBQ(ctx context.Context, client BQExportClient, reqs []*artifactCreationRequest, invInfo *invocationInfo) error {
   612  	rowsToUpload := []*bqpb.TextArtifactRow{}
   613  	for _, req := range reqs {
   614  		rows, err := reqToProtos(ctx, req, invInfo, MaxShardContentSize, LookbackWindow)
   615  		if err != nil {
   616  			return errors.Annotate(err, "req to protos").Err()
   617  		}
   618  		rowsToUpload = append(rowsToUpload, rows...)
   619  	}
   620  	logging.Infof(ctx, "Uploading %d rows BQ", len(rowsToUpload))
   621  	if len(rowsToUpload) > 0 {
   622  		err := client.InsertArtifactRows(ctx, rowsToUpload)
   623  		if err != nil {
   624  			// Data is invalid.
   625  			if _, ok := errors.TagValueIn(bqutil.InvalidRowTagKey, err); ok {
   626  				artifactExportCounter.Add(ctx, int64(len(rowsToUpload)), rowsToUpload[0].Project, "failure_input")
   627  			} else {
   628  				artifactExportCounter.Add(ctx, int64(len(rowsToUpload)), rowsToUpload[0].Project, "failure_bq")
   629  			}
   630  			return errors.Annotate(err, "insert artifact rows").Err()
   631  		} else {
   632  			artifactExportCounter.Add(ctx, int64(len(rowsToUpload)), rowsToUpload[0].Project, "success")
   633  		}
   634  	}
   635  	return nil
   636  }
   637  
   638  func reqToProtos(ctx context.Context, req *artifactCreationRequest, invInfo *invocationInfo, maxSize int, lookbackWindow int) ([]*bqpb.TextArtifactRow, error) {
   639  	chunks, err := util.SplitToChunks(req.data, maxSize, lookbackWindow)
   640  	if err != nil {
   641  		return nil, errors.Annotate(err, "split to chunk").Err()
   642  	}
   643  	results := []*bqpb.TextArtifactRow{}
   644  	project, realm := realms.Split(invInfo.realm)
   645  	for i, chunk := range chunks {
   646  		results = append(results, &bqpb.TextArtifactRow{
   647  			Project:             project,
   648  			Realm:               realm,
   649  			InvocationId:        invInfo.id,
   650  			TestId:              req.testID,
   651  			ResultId:            req.resultID,
   652  			ArtifactId:          req.artifactID,
   653  			ContentType:         req.contentType,
   654  			NumShards:           int32(len(chunks)),
   655  			ShardId:             int32(i),
   656  			Content:             chunk,
   657  			ShardContentSize:    int32(len(chunk)),
   658  			ArtifactContentSize: int32(req.size),
   659  			PartitionTime:       timestamppb.New(invInfo.createTime),
   660  			ArtifactShard:       fmt.Sprintf("%s:%d", req.artifactID, i),
   661  		})
   662  	}
   663  	return results, nil
   664  }