go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/buildbucket/appengine/tasks/cancel.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tasks 16 17 import ( 18 "context" 19 "fmt" 20 "strings" 21 "time" 22 23 "golang.org/x/sync/errgroup" 24 "google.golang.org/protobuf/types/known/timestamppb" 25 26 "go.chromium.org/luci/auth/identity" 27 "go.chromium.org/luci/common/clock" 28 "go.chromium.org/luci/common/errors" 29 "go.chromium.org/luci/common/logging" 30 "go.chromium.org/luci/gae/service/datastore" 31 "go.chromium.org/luci/server/auth" 32 "go.chromium.org/luci/server/tq" 33 34 "go.chromium.org/luci/buildbucket" 35 "go.chromium.org/luci/buildbucket/appengine/internal/metrics" 36 "go.chromium.org/luci/buildbucket/appengine/internal/perm" 37 "go.chromium.org/luci/buildbucket/appengine/model" 38 taskdefs "go.chromium.org/luci/buildbucket/appengine/tasks/defs" 39 pb "go.chromium.org/luci/buildbucket/proto" 40 "go.chromium.org/luci/buildbucket/protoutil" 41 ) 42 43 // StartCancel starts canceling a build and schedules the delayed task to finally cancel it 44 func StartCancel(ctx context.Context, bID int64, summary string) (*model.Build, error) { 45 bld := &model.Build{ID: bID} 46 err := datastore.RunInTransaction(ctx, func(ctx context.Context) error { 47 switch err := datastore.Get(ctx, bld); { 48 case err == datastore.ErrNoSuchEntity: 49 return perm.NotFoundErr(ctx) 50 case err != nil: 51 return errors.Annotate(err, "failed to fetch build: %d", bld.ID).Err() 52 case protoutil.IsEnded(bld.Proto.Status): 53 return nil 54 case bld.Proto.CancelTime != nil: 55 return nil 56 } 57 now := timestamppb.New(clock.Now(ctx).UTC()) 58 bld.Proto.CancelTime = now 59 bld.Proto.UpdateTime = now 60 bld.Proto.CancellationMarkdown = summary 61 canceledBy := "buildbucket" 62 if auth.CurrentIdentity(ctx) != identity.AnonymousIdentity { 63 canceledBy = string(auth.CurrentIdentity(ctx)) 64 } 65 66 bld.Proto.CanceledBy = canceledBy 67 if err := datastore.Put(ctx, bld); err != nil { 68 return errors.Annotate(err, "failed to store build: %d", bld.ID).Err() 69 } 70 // Enqueue the task to finally cancel the build. 71 if err := ScheduleCancelBuildTask(ctx, bID, buildbucket.MinUpdateBuildInterval+bld.Proto.GracePeriod.AsDuration()); err != nil { 72 return errors.Annotate(err, "failed to enqueue cancel task for build: %d", bld.ID).Err() 73 } 74 return nil 75 }, nil) 76 if err != nil { 77 return bld, errors.Annotate(err, "failed to set the build to CANCELING: %d", bID).Err() 78 } 79 80 // TODO(crbug.com/1031205): alternatively, we could just map out the entire 81 // cancellation tree and then feed those build IDs into a pool to do bulk 82 // cancel. We could add a bool argument to StartCancel to control if the 83 // function should cancel the entire tree or just the build itself. 84 // Discussion: https://chromium-review.googlesource.com/c/infra/luci/luci-go/+/3402796/comments/8aba3108_b4ca9f76 85 if err := CancelChildren(ctx, bID); err != nil { 86 // Failures of canceling children should not block canceling parent. 87 logging.Debugf(ctx, "failed to cancel children of %d: %s", bID, err) 88 } 89 90 return bld, err 91 } 92 93 // CancelChildren cancels a build's children. 94 // NOTE: This process is best-effort; Builds call UpdateBuild at a minimum 95 // frequency and Buildbucket will inform them if they should start the cancel 96 // process (by checking the parent build, if any). 97 // So, even if this fails, the next UpdateBuild will catch it. 98 func CancelChildren(ctx context.Context, bID int64) error { 99 // Look for the build's children to cancel. 100 children, err := childrenToCancel(ctx, bID) 101 if err != nil { 102 return err 103 } 104 if len(children) == 0 { 105 return nil 106 } 107 108 eg, ctx := errgroup.WithContext(ctx) 109 summary := fmt.Sprintf("cancel since the parent %d is canceled", bID) 110 111 for _, child := range children { 112 child := child 113 eg.Go(func() error { 114 _, err := StartCancel(ctx, child.ID, summary) 115 return err 116 }) 117 } 118 return eg.Wait() 119 } 120 121 // childrenToCancel returns the child build ids that should be canceled with 122 // the parent. 123 func childrenToCancel(ctx context.Context, bID int64) (children []*model.Build, err error) { 124 q := datastore.NewQuery(model.BuildKind).Eq("parent_id", bID) 125 err = datastore.Run(ctx, q, func(bld *model.Build) error { 126 switch { 127 case protoutil.IsEnded(bld.Proto.Status): 128 return nil 129 case bld.Proto.CanOutliveParent: 130 return nil 131 default: 132 children = append(children, bld) 133 return nil 134 } 135 }) 136 return 137 } 138 139 // Cancel actually cancels a build. 140 func Cancel(ctx context.Context, bID int64) (*model.Build, error) { 141 bld := &model.Build{ID: bID} 142 canceled := false 143 err := datastore.RunInTransaction(ctx, func(ctx context.Context) error { 144 canceled = false // reset canceled in case of retries. 145 inf := &model.BuildInfra{Build: datastore.KeyForObj(ctx, bld)} 146 stp := &model.BuildSteps{Build: inf.Build} 147 bs := &model.BuildStatus{Build: inf.Build} 148 149 cancelSteps := true 150 if err := datastore.Get(ctx, bld, inf, stp, bs); err != nil { 151 switch merr, ok := err.(errors.MultiError); { 152 case !ok: 153 return errors.Annotate(err, "failed to fetch build: %d", bld.ID).Err() 154 case merr[0] == datastore.ErrNoSuchEntity: 155 return perm.NotFoundErr(ctx) 156 case merr[0] != nil: 157 return errors.Annotate(merr[0], "failed to fetch build: %d", bld.ID).Err() 158 case merr[1] != nil && merr[1] != datastore.ErrNoSuchEntity: 159 return errors.Annotate(merr[1], "failed to fetch build infra: %d", bld.ID).Err() 160 case merr[2] != nil && merr[2] != datastore.ErrNoSuchEntity: 161 return errors.Annotate(merr[2], "failed to fetch build steps: %d", bld.ID).Err() 162 case merr[3] != nil && merr[3] != datastore.ErrNoSuchEntity: 163 // TODO(crbug.com/1430324): also check ErrNoSuchEntity. 164 return errors.Annotate(merr[3], "failed to fetch build status: %d", bld.ID).Err() 165 case merr[2] == datastore.ErrNoSuchEntity: 166 cancelSteps = false 167 } 168 } 169 if protoutil.IsEnded(bld.Proto.Status) { 170 return nil 171 } 172 173 if sw := inf.Proto.GetSwarming(); sw.GetHostname() != "" && sw.TaskId != "" { 174 if err := CancelSwarmingTask(ctx, &taskdefs.CancelSwarmingTaskGo{ 175 Hostname: sw.Hostname, 176 TaskId: sw.TaskId, 177 Realm: bld.Realm(), 178 }); err != nil { 179 return errors.Annotate(err, "failed to enqueue swarming task cancellation task: %d", bld.ID).Err() 180 } 181 } 182 if bk := inf.Proto.GetBackend(); bk.GetTask().GetId().GetId() != "" && bk.GetTask().GetId().GetTarget() != "" { 183 if err := CancelBackendTask(ctx, &taskdefs.CancelBackendTask{ 184 Target: bk.Task.Id.Target, 185 TaskId: bk.Task.Id.Id, 186 Project: bld.Project, 187 }); err != nil { 188 return errors.Annotate(err, "failed to enqueue backend task cancelation task: %d", bld.ID).Err() 189 } 190 } 191 if rdb := inf.Proto.GetResultdb(); rdb.GetHostname() != "" && rdb.Invocation != "" { 192 if err := FinalizeResultDB(ctx, &taskdefs.FinalizeResultDBGo{ 193 BuildId: bld.ID, 194 }); err != nil { 195 return errors.Annotate(err, "failed to enqueue resultdb finalization task: %d", bld.ID).Err() 196 } 197 } 198 if err := ExportBigQuery(ctx, bld.ID, strings.Contains(bld.ExperimentsString(), buildbucket.ExperimentBqExporterGo)); err != nil { 199 return errors.Annotate(err, "failed to enqueue bigquery export task: %d", bld.ID).Err() 200 } 201 if err := NotifyPubSub(ctx, bld); err != nil { 202 return errors.Annotate(err, "failed to enqueue pubsub notification task: %d", bld.ID).Err() 203 } 204 205 now := clock.Now(ctx).UTC() 206 207 bld.Leasee = nil 208 bld.LeaseExpirationDate = time.Time{} 209 bld.LeaseKey = 0 210 211 protoutil.SetStatus(now, bld.Proto, pb.Status_CANCELED) 212 logging.Debugf(ctx, fmt.Sprintf("Build %d status has now been set as canceled.", bld.ID)) 213 canceled = true 214 toPut := []any{bld} 215 216 if bs.Status != pb.Status_STATUS_UNSPECIFIED { 217 bs.Status = pb.Status_CANCELED 218 toPut = append(toPut, bs) 219 } 220 221 if cancelSteps { 222 switch changed, err := stp.CancelIncomplete(ctx, timestamppb.New(now)); { 223 case err != nil: 224 return errors.Annotate(err, "failed to mark steps cancelled: %d", bld.ID).Err() 225 case changed: 226 toPut = append(toPut, stp) 227 } 228 } 229 230 if err := datastore.Put(ctx, toPut...); err != nil { 231 return errors.Annotate(err, "failed to store build: %d", bld.ID).Err() 232 } 233 return nil 234 }, nil) 235 if err != nil { 236 return nil, err 237 } 238 if protoutil.IsEnded(bld.Status) && canceled { 239 metrics.BuildCompleted(ctx, bld) 240 } 241 242 return bld, nil 243 } 244 245 // ScheduleCancelBuildTask enqueues a CancelBuildTask. 246 func ScheduleCancelBuildTask(ctx context.Context, bID int64, delay time.Duration) error { 247 return tq.AddTask(ctx, &tq.Task{ 248 Title: fmt.Sprintf("cancel-%d", bID), 249 Payload: &taskdefs.CancelBuildTask{ 250 BuildId: bID, 251 }, 252 Delay: delay, 253 }) 254 }