go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/run/bq/bq.go (about)

     1  // Copyright 2021 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bq
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"sort"
    21  	"strings"
    22  	"sync"
    23  
    24  	"google.golang.org/protobuf/types/known/timestamppb"
    25  
    26  	"go.chromium.org/luci/common/clock"
    27  	"go.chromium.org/luci/common/errors"
    28  	"go.chromium.org/luci/common/logging"
    29  	"go.chromium.org/luci/common/retry/transient"
    30  	"go.chromium.org/luci/gae/service/datastore"
    31  
    32  	gerritpb "go.chromium.org/luci/common/proto/gerrit"
    33  	cvbqpb "go.chromium.org/luci/cv/api/bigquery/v1"
    34  	"go.chromium.org/luci/cv/internal/common"
    35  	cvbq "go.chromium.org/luci/cv/internal/common/bq"
    36  	"go.chromium.org/luci/cv/internal/metrics"
    37  	"go.chromium.org/luci/cv/internal/run"
    38  	"go.chromium.org/luci/cv/internal/tryjob"
    39  )
    40  
    41  const (
    42  	// CV's own dataset/table.
    43  	CVDataset = "raw"
    44  	CVTable   = "attempts_cv"
    45  
    46  	// Legacy CQ dataset.
    47  	legacyProject    = "commit-queue"
    48  	legacyProjectDev = "commit-queue-dev"
    49  	legacyDataset    = "raw"
    50  	legacyTable      = "attempts"
    51  )
    52  
    53  func send(ctx context.Context, env *common.Env, client cvbq.Client, id common.RunID) error {
    54  	r := &run.Run{ID: id}
    55  	switch err := datastore.Get(ctx, r); {
    56  	case err == datastore.ErrNoSuchEntity:
    57  		return errors.Reason("Run not found").Err()
    58  	case err != nil:
    59  		return errors.Annotate(err, "failed to fetch Run").Tag(transient.Tag).Err()
    60  	case !run.IsEnded(r.Status):
    61  		panic(fmt.Errorf("the Run status must be final before sending to BQ"))
    62  	}
    63  
    64  	if r.Mode != run.DryRun && r.Mode != run.FullRun {
    65  		// only export dry run and full run to bq
    66  		return nil
    67  	}
    68  	// Load CLs and convert them to GerritChanges including submit status.
    69  	cls, err := run.LoadRunCLs(ctx, r.ID, r.CLs)
    70  	if err != nil {
    71  		return err
    72  	}
    73  
    74  	a, err := makeAttempt(ctx, r, cls)
    75  	if err != nil {
    76  		return errors.Annotate(err, "failed to make Attempt").Err()
    77  	}
    78  
    79  	var wg sync.WaitGroup
    80  	var exportErr error
    81  	wg.Add(2)
    82  	go func() {
    83  		defer wg.Done()
    84  		logging.Debugf(ctx, "CV exporting Run to CQ BQ table")
    85  		project := legacyProject
    86  		if env.IsGAEDev {
    87  			project = legacyProjectDev
    88  		}
    89  		exportErr = client.SendRow(ctx, cvbq.Row{
    90  			CloudProject: project,
    91  			Dataset:      legacyDataset,
    92  			Table:        legacyTable,
    93  			OperationID:  "run-" + string(id),
    94  			Payload:      a,
    95  		})
    96  		if exportErr == nil {
    97  			delay := clock.Since(ctx, r.EndTime).Milliseconds()
    98  			metrics.Internal.BigQueryExportDelay.Add(ctx, float64(delay),
    99  				r.ID.LUCIProject(),
   100  				r.ConfigGroupID.Name(),
   101  				string(r.Mode))
   102  		}
   103  	}()
   104  
   105  	go func() {
   106  		defer wg.Done()
   107  		// *Always* export to the local CV dataset but the error won't fail the
   108  		// task.
   109  		err := client.SendRow(ctx, cvbq.Row{
   110  			Dataset:     CVDataset,
   111  			Table:       CVTable,
   112  			OperationID: "run-" + string(id),
   113  			Payload:     a,
   114  		})
   115  		if err != nil {
   116  			logging.Warningf(ctx, "failed to export the Run to CV dataset: %s", err)
   117  		}
   118  	}()
   119  	wg.Wait()
   120  	return exportErr
   121  }
   122  
   123  func makeAttempt(ctx context.Context, r *run.Run, cls []*run.RunCL) (*cvbqpb.Attempt, error) {
   124  	builds, err := computeAttemptBuilds(ctx, r)
   125  	if err != nil {
   126  		return nil, err
   127  	}
   128  	// TODO(crbug/1173168, crbug/1105669): We want to change the BQ
   129  	// schema so that StartTime is processing start time and CreateTime is
   130  	// trigger time.
   131  	a := &cvbqpb.Attempt{
   132  		Key:                  r.ID.AttemptKey(),
   133  		LuciProject:          r.ID.LUCIProject(),
   134  		ConfigGroup:          r.ConfigGroupID.Name(),
   135  		ClGroupKey:           run.ComputeCLGroupKey(cls, false),
   136  		EquivalentClGroupKey: run.ComputeCLGroupKey(cls, true),
   137  		// Run.CreateTime is trigger time, which corresponds to what CQD sends for
   138  		// StartTime.
   139  		StartTime:            timestamppb.New(r.CreateTime),
   140  		EndTime:              timestamppb.New(r.EndTime),
   141  		Builds:               builds,
   142  		HasCustomRequirement: len(r.Options.GetIncludedTryjobs()) > 0,
   143  	}
   144  	if !r.StartTime.IsZero() {
   145  		a.ActualStartTime = timestamppb.New(r.StartTime)
   146  	}
   147  	submittedSet := common.MakeCLIDsSet(r.Submission.GetSubmittedCls()...)
   148  	failedSet := common.MakeCLIDsSet(r.Submission.GetFailedCls()...)
   149  	a.GerritChanges = make([]*cvbqpb.GerritChange, len(cls))
   150  	for i, cl := range cls {
   151  		a.GerritChanges[i] = toGerritChange(cl, submittedSet, failedSet, r.Mode)
   152  	}
   153  	a.Status, a.Substatus = attemptStatus(ctx, r)
   154  	return a, nil
   155  }
   156  
   157  // toGerritChange creates a GerritChange for the given RunCL.
   158  //
   159  // This includes the submit status of the CL.
   160  func toGerritChange(cl *run.RunCL, submitted, failed common.CLIDsSet, mode run.Mode) *cvbqpb.GerritChange {
   161  	detail := cl.Detail
   162  	ci := detail.GetGerrit().GetInfo()
   163  	gc := &cvbqpb.GerritChange{
   164  		Host:                       detail.GetGerrit().Host,
   165  		Project:                    ci.Project,
   166  		Change:                     ci.Number,
   167  		Patchset:                   int64(detail.Patchset),
   168  		EarliestEquivalentPatchset: int64(detail.MinEquivalentPatchset),
   169  		TriggerTime:                cl.Trigger.Time,
   170  		Mode:                       mode.BQAttemptMode(),
   171  		SubmitStatus:               cvbqpb.GerritChange_PENDING,
   172  		Owner:                      ci.GetOwner().GetEmail(),
   173  		IsOwnerBot:                 isCLOwnerBot(ci),
   174  	}
   175  
   176  	if mode == run.FullRun {
   177  		// Mark the CL submit status as success if it appears in the submitted CLs
   178  		// list, and failure if it does not.
   179  		switch _, submitted := submitted[cl.ID]; {
   180  		case submitted:
   181  			gc.SubmitStatus = cvbqpb.GerritChange_SUCCESS
   182  		case failed.Has(cl.ID):
   183  			gc.SubmitStatus = cvbqpb.GerritChange_FAILURE
   184  		default:
   185  			gc.SubmitStatus = cvbqpb.GerritChange_PENDING
   186  		}
   187  	}
   188  	return gc
   189  }
   190  
   191  // decides whether CL owner is a bot or not.
   192  func isCLOwnerBot(ci *gerritpb.ChangeInfo) bool {
   193  	for _, tag := range ci.GetOwner().GetTags() {
   194  		if tag == "SERVICE_USER" {
   195  			return true
   196  		}
   197  	}
   198  	switch ownerEmail := strings.ToLower(ci.GetOwner().GetEmail()); {
   199  	case strings.HasSuffix(ownerEmail, "gserviceaccount.com"):
   200  		return true
   201  	case strings.HasSuffix(ownerEmail, "prod.google.com"):
   202  		return true
   203  	}
   204  
   205  	return false
   206  }
   207  
   208  // attemptStatus converts a Run status to Attempt status.
   209  func attemptStatus(ctx context.Context, r *run.Run) (cvbqpb.AttemptStatus, cvbqpb.AttemptSubstatus) {
   210  	switch r.Status {
   211  	case run.Status_SUCCEEDED:
   212  		return cvbqpb.AttemptStatus_SUCCESS, cvbqpb.AttemptSubstatus_NO_SUBSTATUS
   213  	case run.Status_FAILED:
   214  		switch {
   215  		case r.Submission != nil && len(r.Submission.Cls) != len(r.Submission.SubmittedCls):
   216  			// In the case that the checks passed but not all CLs were submitted
   217  			// successfully, the Attempt will still have status set to SUCCESS for
   218  			// backwards compatibility (See: crbug.com/1114686). Note that
   219  			// r.Submission is expected to be set only if a submission is attempted,
   220  			// 	meaning all checks passed.
   221  			//
   222  			// TODO(crbug/1114686): Add a new FAILED_SUBMIT substatus, which
   223  			// should be used in the case that some CLs failed to submit after
   224  			// passing checks. (In this case, for backwards compatibility, we
   225  			// will set status = SUCCESS, substatus = FAILED_SUBMIT.)
   226  			return cvbqpb.AttemptStatus_SUCCESS, cvbqpb.AttemptSubstatus_NO_SUBSTATUS
   227  		case r.Tryjobs.GetState().GetStatus() == tryjob.ExecutionState_FAILED:
   228  			return cvbqpb.AttemptStatus_FAILURE, cvbqpb.AttemptSubstatus_FAILED_TRYJOBS
   229  		default:
   230  			// TODO(crbug/1342810): use the failure reason stored in Run entity to
   231  			// decide accurate sub-status. For now, use unapproved because it is the
   232  			// most common failure reason after failed tryjobs.
   233  			return cvbqpb.AttemptStatus_FAILURE, cvbqpb.AttemptSubstatus_UNAPPROVED
   234  		}
   235  	case run.Status_CANCELLED:
   236  		return cvbqpb.AttemptStatus_ABORTED, cvbqpb.AttemptSubstatus_MANUAL_CANCEL
   237  	default:
   238  		logging.Errorf(ctx, "Unexpected attempt status %q", r.Status)
   239  		return cvbqpb.AttemptStatus_ATTEMPT_STATUS_UNSPECIFIED, cvbqpb.AttemptSubstatus_ATTEMPT_SUBSTATUS_UNSPECIFIED
   240  	}
   241  }
   242  
   243  func computeAttemptBuilds(ctx context.Context, r *run.Run) ([]*cvbqpb.Build, error) {
   244  	var ret []*cvbqpb.Build
   245  	for i, execution := range r.Tryjobs.GetState().GetExecutions() {
   246  		definition := r.Tryjobs.GetState().GetRequirement().GetDefinitions()[i]
   247  		for _, executionAttempt := range execution.GetAttempts() {
   248  			if executionAttempt.GetExternalId() == "" {
   249  				// It's possible that CV fails to launch the tryjob against
   250  				// buildbucket and has missing external ID.
   251  				continue
   252  			}
   253  			host, buildID, err := tryjob.ExternalID(executionAttempt.GetExternalId()).ParseBuildbucketID()
   254  			if err != nil {
   255  				return nil, err
   256  			}
   257  			origin := cvbqpb.Build_NOT_REUSED
   258  			switch {
   259  			case executionAttempt.GetReused():
   260  				origin = cvbqpb.Build_REUSED
   261  			case definition.GetDisableReuse():
   262  				origin = cvbqpb.Build_NOT_REUSABLE
   263  			}
   264  			ret = append(ret, &cvbqpb.Build{
   265  				Host:     host,
   266  				Id:       buildID,
   267  				Critical: definition.GetCritical(),
   268  				Origin:   origin,
   269  			})
   270  		}
   271  	}
   272  	sort.Slice(ret, func(i, j int) bool {
   273  		return ret[i].Id < ret[j].Id
   274  	})
   275  	return ret, nil
   276  }
   277  
   278  // bbBuilderNameFromDef returns Buildbucket builder name from Tryjob Definition.
   279  //
   280  // Returns the builder name in the format of "$project/$bucket/$builder".
   281  // Panics for non-buildbucket backend.
   282  func bbBuilderNameFromDef(def *tryjob.Definition) string {
   283  	if def.GetBuildbucket() == nil {
   284  		panic(fmt.Errorf("non-buildbucket backend is not supported; got %T", def.GetBackend()))
   285  	}
   286  	builder := def.GetBuildbucket().GetBuilder()
   287  	return fmt.Sprintf("%s/%s/%s", builder.Project, builder.Bucket, builder.Builder)
   288  }