go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/buildbucket/appengine/tasks/update_build_task.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tasks 16 17 import ( 18 "context" 19 "encoding/base64" 20 "encoding/json" 21 "fmt" 22 "io" 23 "strconv" 24 "strings" 25 "time" 26 27 "google.golang.org/api/pubsub/v1" 28 "google.golang.org/grpc/codes" 29 "google.golang.org/protobuf/proto" 30 "google.golang.org/protobuf/types/known/timestamppb" 31 32 "go.chromium.org/luci/common/clock" 33 "go.chromium.org/luci/common/errors" 34 "go.chromium.org/luci/common/logging" 35 "go.chromium.org/luci/common/retry/transient" 36 "go.chromium.org/luci/gae/service/datastore" 37 "go.chromium.org/luci/gae/service/info" 38 "go.chromium.org/luci/grpc/appstatus" 39 "go.chromium.org/luci/server/caching" 40 41 "go.chromium.org/luci/buildbucket/appengine/common" 42 "go.chromium.org/luci/buildbucket/appengine/internal/buildstatus" 43 "go.chromium.org/luci/buildbucket/appengine/internal/config" 44 "go.chromium.org/luci/buildbucket/appengine/internal/metrics" 45 "go.chromium.org/luci/buildbucket/appengine/model" 46 pb "go.chromium.org/luci/buildbucket/proto" 47 "go.chromium.org/luci/buildbucket/protoutil" 48 ) 49 50 type pushRequest struct { 51 Message pubsub.PubsubMessage `json:"message"` 52 Subscription string `json:"subscription"` 53 } 54 55 type buildTaskUpdate struct { 56 *pb.BuildTaskUpdate 57 58 // Message id of the pubsub message that sent this request. 59 msgID string 60 61 // Subscription of the pubsub message that sent this request. 62 subscription string 63 } 64 65 func unpackUpdateBuildTaskMsg(ctx context.Context, body io.Reader) (req buildTaskUpdate, err error) { 66 req = buildTaskUpdate{} 67 68 blob, err := io.ReadAll(body) 69 if err != nil { 70 return req, errors.Annotate(err, "failed to read the request body").Tag(transient.Tag).Err() 71 } 72 73 // process pubsub message 74 var msg pushRequest 75 if err := json.Unmarshal(blob, &msg); err != nil { 76 return req, errors.Annotate(err, "failed to unmarshal UpdateBuildTask PubSub message").Err() 77 } 78 // process UpdateBuildTask message data 79 data, err := base64.StdEncoding.DecodeString(msg.Message.Data) 80 if err != nil { 81 return req, errors.Annotate(err, "cannot decode UpdateBuildTask message data as base64").Err() 82 } 83 84 bldTskUpdte := &pb.BuildTaskUpdate{} 85 if err := (proto.UnmarshalOptions{DiscardUnknown: true}).Unmarshal(data, bldTskUpdte); err != nil { 86 return req, errors.Annotate(err, "failed to unmarshal the BuildTaskUpdate pubsub data").Err() 87 } 88 req.BuildTaskUpdate = bldTskUpdte 89 req.subscription = msg.Subscription 90 req.msgID = msg.Message.MessageId 91 return req, nil 92 } 93 94 func validateTaskStatus(taskStatus pb.Status, allowPending bool) error { 95 switch taskStatus { 96 case pb.Status_ENDED_MASK, 97 pb.Status_STATUS_UNSPECIFIED: 98 return errors.Reason("task.status: invalid status %s for UpdateBuildTask", taskStatus).Err() 99 } 100 101 if !allowPending && taskStatus == pb.Status_SCHEDULED { 102 return errors.Reason("task.status: invalid status %s for UpdateBuildTask", taskStatus).Err() 103 } 104 105 if _, ok := pb.Status_value[taskStatus.String()]; !ok { 106 return errors.Reason("task.status: invalid status %s for UpdateBuildTask", taskStatus).Err() 107 } 108 return nil 109 } 110 111 func validatePubsubSubscription(ctx context.Context, req buildTaskUpdate) error { 112 globalCfg, err := config.GetSettingsCfg(ctx) 113 if err != nil { 114 return errors.Annotate(err, "error fetching service config").Err() 115 } 116 117 target := req.GetTask().GetId().GetTarget() 118 if target == "" { 119 return errors.Reason("could not validate message. task.id.target not provided.").Err() 120 } 121 122 isValid := false 123 for _, backend := range globalCfg.Backends { 124 if backend.Target == target { 125 var subscription string 126 switch backend.Mode.(type) { 127 case *pb.BackendSetting_LiteMode_: 128 return errors.Reason("backend target %s is in lite mode. The task update isn't supported", target).Err() 129 case *pb.BackendSetting_FullMode_: 130 subscription = fmt.Sprintf("projects/%s/subscriptions/%s", info.AppID(ctx), backend.GetFullMode().GetPubsubId()) 131 } 132 if subscription == req.subscription { 133 isValid = true 134 } 135 break 136 } 137 } 138 139 if !isValid { 140 return errors.Reason("pubsub subscription %s did not match the one configured for target %s", req.subscription, target).Err() 141 } 142 return nil 143 } 144 145 func validateTask(task *pb.Task, allowPending bool) error { 146 if task.GetId().GetId() == "" { 147 return errors.Reason("task.id: required").Err() 148 } 149 if task.GetUpdateId() == 0 { 150 return errors.Reason("task.UpdateId: required").Err() 151 } 152 if err := validateTaskStatus(task.Status, allowPending); err != nil { 153 return errors.Annotate(err, "task.Status").Err() 154 } 155 detailsInKb := float64(len(task.GetDetails().String()) / 1024) 156 if detailsInKb > 10 { 157 return errors.Reason("task.details is greater than 10 kb").Err() 158 } 159 return nil 160 } 161 162 // validateBuildTaskUpdate ensures that the build_id, task, status, and details 163 // are correctly set be sender. 164 func validateBuildTaskUpdate(ctx context.Context, req *pb.BuildTaskUpdate) error { 165 if req.BuildId == "" { 166 return errors.Reason("build_id required").Err() 167 } 168 return validateTask(req.Task, false) 169 } 170 171 // validateBuildTask ensures that the taskID provided in the request matches 172 // the taskID that is stored in the build model. If there is no task associated 173 // with the build, an error is returned and the update message is lost. 174 func validateBuildTask(ctx context.Context, req *pb.BuildTaskUpdate, infra *model.BuildInfra) error { 175 switch { 176 case infra.Proto.GetBackend() == nil: 177 return appstatus.Errorf(codes.NotFound, "Build %s does not support task backend", req.BuildId) 178 case infra.Proto.Backend.GetTask().GetId().GetId() == "": 179 return appstatus.Errorf(codes.NotFound, "No task is associated with the build. Cannot update.") 180 case infra.Proto.Backend.Task.Id.GetTarget() != req.Task.Id.GetTarget() || (infra.Proto.Backend.Task.Id.GetId() != "" && infra.Proto.Backend.Task.Id.GetId() != req.Task.Id.GetId()): 181 return errors.Reason("TaskID in request does not match TaskID associated with build").Err() 182 } 183 if protoutil.IsEnded(infra.Proto.Backend.Task.Status) { 184 return appstatus.Errorf(codes.FailedPrecondition, "cannot update an ended task") 185 } 186 return nil 187 } 188 189 func prepareUpdate(ctx context.Context, build *model.Build, infra *model.BuildInfra, task *pb.Task) ([]any, error) { 190 if task.UpdateId <= infra.Proto.Backend.Task.UpdateId { 191 // Returning nil since there is no work to do here. 192 // The task in the request is outdated. 193 return nil, nil 194 } 195 // Required fields to change 196 now := clock.Now(ctx) 197 build.Proto.UpdateTime = timestamppb.New(now) 198 proto.Merge(infra.Proto.Backend.Task, task) 199 200 toSave := []any{build, infra} 201 202 bs, steps, err := updateBuildStatusOnTaskStatusChange(ctx, build, nil, &buildstatus.StatusWithDetails{Status: task.Status}, now) 203 if err != nil { 204 return nil, err 205 } 206 if bs != nil { 207 toSave = append(toSave, bs) 208 } 209 if steps != nil { 210 toSave = append(toSave, steps) 211 } 212 return toSave, nil 213 } 214 215 func updateTaskEntity(ctx context.Context, req *pb.BuildTaskUpdate, buildID int64) error { 216 var build *model.Build 217 setBuildToEnd := false 218 txErr := datastore.RunInTransaction(ctx, func(ctx context.Context) error { 219 entities, err := common.GetBuildEntities(ctx, buildID, model.BuildKind, model.BuildInfraKind) 220 if err != nil { 221 return errors.Annotate(err, "invalid Build or BuildInfra").Err() 222 } 223 224 build = entities[0].(*model.Build) 225 infra := entities[1].(*model.BuildInfra) 226 227 if protoutil.IsEnded(build.Status) { 228 // Cannot update an ended build. 229 logging.Infof(ctx, "build %d is ended", build.ID) 230 return nil 231 } 232 233 toSave, err := prepareUpdate(ctx, build, infra, req.Task) 234 if err != nil { 235 return err 236 } 237 238 setBuildToEnd = protoutil.IsEnded(req.Task.Status) 239 return datastore.Put(ctx, toSave) 240 }, nil) 241 242 if txErr != nil { 243 return txErr 244 } 245 246 if setBuildToEnd { 247 metrics.BuildCompleted(ctx, build) 248 } 249 return nil 250 } 251 252 // updateBuildTask allows the Backend to preemptively update the 253 // status of the task (e.g. if it knows that the task has crashed, etc.). 254 func updateBuildTask(ctx context.Context, req buildTaskUpdate) error { 255 buildID, err := strconv.ParseInt(req.GetBuildId(), 10, 64) 256 if err != nil { 257 return errors.Annotate(err, "bad build id").Err() 258 } 259 if err := validatePubsubSubscription(ctx, req); err != nil { 260 return errors.Annotate(err, "pubsub subscription").Err() 261 } 262 if err := validateBuildTaskUpdate(ctx, req.BuildTaskUpdate); err != nil { 263 return errors.Annotate(err, "invalid BuildTaskUpdate").Err() 264 } 265 logging.Infof(ctx, "Received an BuildTaskUpdate message for build %q", req.BuildId) 266 267 // TODO(b/288158829): remove it once the root cause for the Skia failure is found. 268 if strings.Contains(req.subscription, "skia") { 269 logging.Debugf(ctx, "BuildTaskUpdate.Task: %v", req.BuildTaskUpdate.Task) 270 } 271 272 entities, err := common.GetBuildEntities(ctx, buildID, model.BuildInfraKind) 273 if err != nil { 274 return errors.Annotate(err, "invalid buildInfra").Err() 275 } 276 infra := entities[0].(*model.BuildInfra) 277 278 // Pre-check if the task can be updated before updating it with a transaction. 279 // Ensures that the taskID provided in the request matches the taskID that is 280 // stored in the build model. If there is no task associated with the build model, 281 // an error is returned and the update message is lost. 282 err = validateBuildTask(ctx, req.BuildTaskUpdate, infra) 283 if err != nil { 284 return errors.Annotate(err, "invalid task").Err() 285 } 286 287 err = updateTaskEntity(ctx, req.BuildTaskUpdate, buildID) 288 if err != nil { 289 if _, isAppStatusErr := appstatus.Get(err); isAppStatusErr { 290 return err 291 } else { 292 return appstatus.Errorf(codes.Internal, "failed to update the build entity: %s", err) 293 } 294 } 295 296 return nil 297 } 298 299 // UpdateBuildTask handles task backend PubSub push messages produced by various 300 // task backends. 301 // For a retryable error, it will be tagged with transient.Tag. 302 func UpdateBuildTask(ctx context.Context, body io.Reader) error { 303 req, err := unpackUpdateBuildTaskMsg(ctx, body) 304 if err != nil { 305 return err 306 } 307 308 // Try not to process same message more than once. 309 cache := caching.GlobalCache(ctx, "update-build-task-pubsub-msg-id") 310 if cache == nil { 311 return errors.Reason("global cache is not found").Tag(transient.Tag).Err() 312 } 313 msgCached, err := cache.Get(ctx, req.msgID) 314 switch { 315 case err == caching.ErrCacheMiss: // no-op, continue 316 case err != nil: 317 return errors.Annotate(err, "failed to read %s from the global cache", req.msgID).Tag(transient.Tag).Err() 318 case msgCached != nil: 319 logging.Infof(ctx, "seen this message %s before, ignoring", req.msgID) 320 return nil 321 } 322 err = updateBuildTask(ctx, req) 323 if err != nil { 324 return err 325 } 326 327 return cache.Set(ctx, req.msgID, []byte{1}, 10*time.Minute) 328 }