go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/run/bq/bq.go (about) 1 // Copyright 2021 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bq 16 17 import ( 18 "context" 19 "fmt" 20 "sort" 21 "strings" 22 "sync" 23 24 "google.golang.org/protobuf/types/known/timestamppb" 25 26 "go.chromium.org/luci/common/clock" 27 "go.chromium.org/luci/common/errors" 28 "go.chromium.org/luci/common/logging" 29 "go.chromium.org/luci/common/retry/transient" 30 "go.chromium.org/luci/gae/service/datastore" 31 32 gerritpb "go.chromium.org/luci/common/proto/gerrit" 33 cvbqpb "go.chromium.org/luci/cv/api/bigquery/v1" 34 "go.chromium.org/luci/cv/internal/common" 35 cvbq "go.chromium.org/luci/cv/internal/common/bq" 36 "go.chromium.org/luci/cv/internal/metrics" 37 "go.chromium.org/luci/cv/internal/run" 38 "go.chromium.org/luci/cv/internal/tryjob" 39 ) 40 41 const ( 42 // CV's own dataset/table. 43 CVDataset = "raw" 44 CVTable = "attempts_cv" 45 46 // Legacy CQ dataset. 47 legacyProject = "commit-queue" 48 legacyProjectDev = "commit-queue-dev" 49 legacyDataset = "raw" 50 legacyTable = "attempts" 51 ) 52 53 func send(ctx context.Context, env *common.Env, client cvbq.Client, id common.RunID) error { 54 r := &run.Run{ID: id} 55 switch err := datastore.Get(ctx, r); { 56 case err == datastore.ErrNoSuchEntity: 57 return errors.Reason("Run not found").Err() 58 case err != nil: 59 return errors.Annotate(err, "failed to fetch Run").Tag(transient.Tag).Err() 60 case !run.IsEnded(r.Status): 61 panic(fmt.Errorf("the Run status must be final before sending to BQ")) 62 } 63 64 if r.Mode != run.DryRun && r.Mode != run.FullRun { 65 // only export dry run and full run to bq 66 return nil 67 } 68 // Load CLs and convert them to GerritChanges including submit status. 69 cls, err := run.LoadRunCLs(ctx, r.ID, r.CLs) 70 if err != nil { 71 return err 72 } 73 74 a, err := makeAttempt(ctx, r, cls) 75 if err != nil { 76 return errors.Annotate(err, "failed to make Attempt").Err() 77 } 78 79 var wg sync.WaitGroup 80 var exportErr error 81 wg.Add(2) 82 go func() { 83 defer wg.Done() 84 logging.Debugf(ctx, "CV exporting Run to CQ BQ table") 85 project := legacyProject 86 if env.IsGAEDev { 87 project = legacyProjectDev 88 } 89 exportErr = client.SendRow(ctx, cvbq.Row{ 90 CloudProject: project, 91 Dataset: legacyDataset, 92 Table: legacyTable, 93 OperationID: "run-" + string(id), 94 Payload: a, 95 }) 96 if exportErr == nil { 97 delay := clock.Since(ctx, r.EndTime).Milliseconds() 98 metrics.Internal.BigQueryExportDelay.Add(ctx, float64(delay), 99 r.ID.LUCIProject(), 100 r.ConfigGroupID.Name(), 101 string(r.Mode)) 102 } 103 }() 104 105 go func() { 106 defer wg.Done() 107 // *Always* export to the local CV dataset but the error won't fail the 108 // task. 109 err := client.SendRow(ctx, cvbq.Row{ 110 Dataset: CVDataset, 111 Table: CVTable, 112 OperationID: "run-" + string(id), 113 Payload: a, 114 }) 115 if err != nil { 116 logging.Warningf(ctx, "failed to export the Run to CV dataset: %s", err) 117 } 118 }() 119 wg.Wait() 120 return exportErr 121 } 122 123 func makeAttempt(ctx context.Context, r *run.Run, cls []*run.RunCL) (*cvbqpb.Attempt, error) { 124 builds, err := computeAttemptBuilds(ctx, r) 125 if err != nil { 126 return nil, err 127 } 128 // TODO(crbug/1173168, crbug/1105669): We want to change the BQ 129 // schema so that StartTime is processing start time and CreateTime is 130 // trigger time. 131 a := &cvbqpb.Attempt{ 132 Key: r.ID.AttemptKey(), 133 LuciProject: r.ID.LUCIProject(), 134 ConfigGroup: r.ConfigGroupID.Name(), 135 ClGroupKey: run.ComputeCLGroupKey(cls, false), 136 EquivalentClGroupKey: run.ComputeCLGroupKey(cls, true), 137 // Run.CreateTime is trigger time, which corresponds to what CQD sends for 138 // StartTime. 139 StartTime: timestamppb.New(r.CreateTime), 140 EndTime: timestamppb.New(r.EndTime), 141 Builds: builds, 142 HasCustomRequirement: len(r.Options.GetIncludedTryjobs()) > 0, 143 } 144 if !r.StartTime.IsZero() { 145 a.ActualStartTime = timestamppb.New(r.StartTime) 146 } 147 submittedSet := common.MakeCLIDsSet(r.Submission.GetSubmittedCls()...) 148 failedSet := common.MakeCLIDsSet(r.Submission.GetFailedCls()...) 149 a.GerritChanges = make([]*cvbqpb.GerritChange, len(cls)) 150 for i, cl := range cls { 151 a.GerritChanges[i] = toGerritChange(cl, submittedSet, failedSet, r.Mode) 152 } 153 a.Status, a.Substatus = attemptStatus(ctx, r) 154 return a, nil 155 } 156 157 // toGerritChange creates a GerritChange for the given RunCL. 158 // 159 // This includes the submit status of the CL. 160 func toGerritChange(cl *run.RunCL, submitted, failed common.CLIDsSet, mode run.Mode) *cvbqpb.GerritChange { 161 detail := cl.Detail 162 ci := detail.GetGerrit().GetInfo() 163 gc := &cvbqpb.GerritChange{ 164 Host: detail.GetGerrit().Host, 165 Project: ci.Project, 166 Change: ci.Number, 167 Patchset: int64(detail.Patchset), 168 EarliestEquivalentPatchset: int64(detail.MinEquivalentPatchset), 169 TriggerTime: cl.Trigger.Time, 170 Mode: mode.BQAttemptMode(), 171 SubmitStatus: cvbqpb.GerritChange_PENDING, 172 Owner: ci.GetOwner().GetEmail(), 173 IsOwnerBot: isCLOwnerBot(ci), 174 } 175 176 if mode == run.FullRun { 177 // Mark the CL submit status as success if it appears in the submitted CLs 178 // list, and failure if it does not. 179 switch _, submitted := submitted[cl.ID]; { 180 case submitted: 181 gc.SubmitStatus = cvbqpb.GerritChange_SUCCESS 182 case failed.Has(cl.ID): 183 gc.SubmitStatus = cvbqpb.GerritChange_FAILURE 184 default: 185 gc.SubmitStatus = cvbqpb.GerritChange_PENDING 186 } 187 } 188 return gc 189 } 190 191 // decides whether CL owner is a bot or not. 192 func isCLOwnerBot(ci *gerritpb.ChangeInfo) bool { 193 for _, tag := range ci.GetOwner().GetTags() { 194 if tag == "SERVICE_USER" { 195 return true 196 } 197 } 198 switch ownerEmail := strings.ToLower(ci.GetOwner().GetEmail()); { 199 case strings.HasSuffix(ownerEmail, "gserviceaccount.com"): 200 return true 201 case strings.HasSuffix(ownerEmail, "prod.google.com"): 202 return true 203 } 204 205 return false 206 } 207 208 // attemptStatus converts a Run status to Attempt status. 209 func attemptStatus(ctx context.Context, r *run.Run) (cvbqpb.AttemptStatus, cvbqpb.AttemptSubstatus) { 210 switch r.Status { 211 case run.Status_SUCCEEDED: 212 return cvbqpb.AttemptStatus_SUCCESS, cvbqpb.AttemptSubstatus_NO_SUBSTATUS 213 case run.Status_FAILED: 214 switch { 215 case r.Submission != nil && len(r.Submission.Cls) != len(r.Submission.SubmittedCls): 216 // In the case that the checks passed but not all CLs were submitted 217 // successfully, the Attempt will still have status set to SUCCESS for 218 // backwards compatibility (See: crbug.com/1114686). Note that 219 // r.Submission is expected to be set only if a submission is attempted, 220 // meaning all checks passed. 221 // 222 // TODO(crbug/1114686): Add a new FAILED_SUBMIT substatus, which 223 // should be used in the case that some CLs failed to submit after 224 // passing checks. (In this case, for backwards compatibility, we 225 // will set status = SUCCESS, substatus = FAILED_SUBMIT.) 226 return cvbqpb.AttemptStatus_SUCCESS, cvbqpb.AttemptSubstatus_NO_SUBSTATUS 227 case r.Tryjobs.GetState().GetStatus() == tryjob.ExecutionState_FAILED: 228 return cvbqpb.AttemptStatus_FAILURE, cvbqpb.AttemptSubstatus_FAILED_TRYJOBS 229 default: 230 // TODO(crbug/1342810): use the failure reason stored in Run entity to 231 // decide accurate sub-status. For now, use unapproved because it is the 232 // most common failure reason after failed tryjobs. 233 return cvbqpb.AttemptStatus_FAILURE, cvbqpb.AttemptSubstatus_UNAPPROVED 234 } 235 case run.Status_CANCELLED: 236 return cvbqpb.AttemptStatus_ABORTED, cvbqpb.AttemptSubstatus_MANUAL_CANCEL 237 default: 238 logging.Errorf(ctx, "Unexpected attempt status %q", r.Status) 239 return cvbqpb.AttemptStatus_ATTEMPT_STATUS_UNSPECIFIED, cvbqpb.AttemptSubstatus_ATTEMPT_SUBSTATUS_UNSPECIFIED 240 } 241 } 242 243 func computeAttemptBuilds(ctx context.Context, r *run.Run) ([]*cvbqpb.Build, error) { 244 var ret []*cvbqpb.Build 245 for i, execution := range r.Tryjobs.GetState().GetExecutions() { 246 definition := r.Tryjobs.GetState().GetRequirement().GetDefinitions()[i] 247 for _, executionAttempt := range execution.GetAttempts() { 248 if executionAttempt.GetExternalId() == "" { 249 // It's possible that CV fails to launch the tryjob against 250 // buildbucket and has missing external ID. 251 continue 252 } 253 host, buildID, err := tryjob.ExternalID(executionAttempt.GetExternalId()).ParseBuildbucketID() 254 if err != nil { 255 return nil, err 256 } 257 origin := cvbqpb.Build_NOT_REUSED 258 switch { 259 case executionAttempt.GetReused(): 260 origin = cvbqpb.Build_REUSED 261 case definition.GetDisableReuse(): 262 origin = cvbqpb.Build_NOT_REUSABLE 263 } 264 ret = append(ret, &cvbqpb.Build{ 265 Host: host, 266 Id: buildID, 267 Critical: definition.GetCritical(), 268 Origin: origin, 269 }) 270 } 271 } 272 sort.Slice(ret, func(i, j int) bool { 273 return ret[i].Id < ret[j].Id 274 }) 275 return ret, nil 276 } 277 278 // bbBuilderNameFromDef returns Buildbucket builder name from Tryjob Definition. 279 // 280 // Returns the builder name in the format of "$project/$bucket/$builder". 281 // Panics for non-buildbucket backend. 282 func bbBuilderNameFromDef(def *tryjob.Definition) string { 283 if def.GetBuildbucket() == nil { 284 panic(fmt.Errorf("non-buildbucket backend is not supported; got %T", def.GetBackend())) 285 } 286 builder := def.GetBuildbucket().GetBuilder() 287 return fmt.Sprintf("%s/%s/%s", builder.Project, builder.Bucket, builder.Builder) 288 }