go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/luci_notify/notify/pubsub.go (about) 1 // Copyright 2017 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package notify 16 17 import ( 18 "bytes" 19 "compress/zlib" 20 "context" 21 "encoding/json" 22 "fmt" 23 "io" 24 "math/rand" 25 "net/http" 26 "sort" 27 "strings" 28 "time" 29 30 "google.golang.org/genproto/protobuf/field_mask" 31 "google.golang.org/grpc/codes" 32 "google.golang.org/grpc/status" 33 "google.golang.org/protobuf/encoding/protojson" 34 "google.golang.org/protobuf/proto" 35 36 buildbucketpb "go.chromium.org/luci/buildbucket/proto" 37 bbv1 "go.chromium.org/luci/common/api/buildbucket/buildbucket/v1" 38 "go.chromium.org/luci/common/api/gitiles" 39 "go.chromium.org/luci/common/clock" 40 "go.chromium.org/luci/common/errors" 41 "go.chromium.org/luci/common/logging" 42 "go.chromium.org/luci/common/retry/transient" 43 "go.chromium.org/luci/common/sync/parallel" 44 "go.chromium.org/luci/gae/service/datastore" 45 "go.chromium.org/luci/grpc/grpcutil" 46 "go.chromium.org/luci/grpc/prpc" 47 "go.chromium.org/luci/server/auth" 48 "go.chromium.org/luci/server/router" 49 50 notifypb "go.chromium.org/luci/luci_notify/api/config" 51 "go.chromium.org/luci/luci_notify/config" 52 ) 53 54 func getBuilderID(b *buildbucketpb.Build) string { 55 return fmt.Sprintf("%s/%s", b.Builder.Bucket, b.Builder.Builder) 56 } 57 58 // EmailNotify contains information for delivery and personalization of notification emails. 59 type EmailNotify struct { 60 Email string `json:"email"` 61 Template string `json:"template"` 62 MatchingSteps []*buildbucketpb.Step 63 } 64 65 // sortEmailNotify sorts a list of EmailNotify by Email, then Template. 66 func sortEmailNotify(en []EmailNotify) { 67 sort.Slice(en, func(i, j int) bool { 68 first := en[i] 69 second := en[j] 70 emailResult := strings.Compare(first.Email, second.Email) 71 if emailResult == 0 { 72 return strings.Compare(first.Template, second.Template) < 0 73 } 74 return emailResult < 0 75 }) 76 } 77 78 // extractEmailNotifyValues extracts EmailNotify slice from the build. 79 // TODO(nodir): remove parametersJSON once clients move to properties. 80 func extractEmailNotifyValues(build *buildbucketpb.Build, parametersJSON string) ([]EmailNotify, error) { 81 const propertyName = "email_notify" 82 value := build.GetOutput().GetProperties().GetFields()[propertyName] 83 if value == nil { 84 value = build.GetInput().GetProperties().GetFields()[propertyName] 85 } 86 if value != nil { 87 notifiesPB := value.GetListValue().GetValues() 88 ret := make([]EmailNotify, len(notifiesPB)) 89 for i, notifyPB := range notifiesPB { 90 notifyFields := notifyPB.GetStructValue().GetFields() 91 ret[i] = EmailNotify{ 92 Email: notifyFields["email"].GetStringValue(), 93 Template: notifyFields["template"].GetStringValue(), 94 // MatchingSteps is left blank, as it is only available for recipients 95 // derived from Notifications with step filters. 96 } 97 } 98 return ret, nil 99 } 100 101 if parametersJSON == "" { 102 return nil, nil 103 } 104 // json equivalent: {"email_notify": [{"email": "<address>"}, ...]} 105 var output struct { 106 EmailNotify []EmailNotify `json:"email_notify"` 107 } 108 109 if err := json.NewDecoder(strings.NewReader(parametersJSON)).Decode(&output); err != nil { 110 return nil, errors.Annotate(err, "invalid msg.ParametersJson").Err() 111 } 112 return output.EmailNotify, nil 113 } 114 115 func randInRange(upper, lower float64) float64 { 116 return lower + rand.Float64()*(upper-lower) 117 } 118 119 func putWithRetry(c context.Context, entity any) error { 120 // Retry with exponential backoff. The limit we care about here is the limit 121 // on writes to entities within an entity group, which is once per second. 122 // 123 // By waiting for 1 second we'll guarantee not to conflict with the write 124 // that caused contention the first time we failed. We double the delay each 125 // time, and randomly shift over the interval to avoid clustering. 126 127 delay := 1.0 128 var err error 129 130 // Retry up to 4 times. Past that point the delay is becoming too long to be 131 // reasonable. 132 for i := 0; i < 4; i++ { 133 err = datastore.Put(c, entity) 134 if err == nil { 135 return nil 136 } else if status.Code(err) != codes.Aborted { 137 // Datastore documentation says that Aborted is returned in the case 138 // of contention, which is the only case we want to retry on. 139 // TODO: Perhaps also retry on some others like Unavailable or 140 // DeadlineExceeded? 141 return err 142 } 143 144 clock.Sleep(c, time.Duration(float64(time.Second)*randInRange(delay, delay*2))) 145 delay *= 2 146 } 147 148 return err 149 } 150 151 // handleBuild processes a Build and sends appropriate notifications. 152 // 153 // This function should serve as documentation of the process of going from 154 // a Build to sent notifications. It also should explicitly handle ACLs and 155 // stop the process of handling notifications early to avoid wasting compute 156 // time. 157 // 158 // getCheckout produces the associated source checkout for a build, if available. 159 // It's passed in as a parameter in order to mock it for testing. 160 // 161 // history is a function that contacts gitiles to obtain the git history for 162 // revision ordering purposes. It's passed in as a parameter in order to mock it 163 // for testing. 164 func handleBuild(c context.Context, build *Build, getCheckout CheckoutFunc, history HistoryFunc) error { 165 luciProject := build.Builder.Project 166 project := &config.Project{Name: luciProject} 167 switch ex, err := datastore.Exists(c, project); { 168 case err != nil: 169 return err 170 case !ex.All(): 171 logging.Infof(c, "Build project not tracked by LUCI Notify, ignoring...") 172 return nil // This project is not tracked by luci-notify 173 } 174 175 // checkout is only used to compute the blamelist 176 // As blamelist is not a "critical" feature of luci-notify, if there is an 177 // error getting the checkout (mostly because there is no source manifest) 178 // we should not throw 500, but just log the error, and inform the builder 179 // owner that they are missing source manifest in their builds 180 logdogContext, _ := context.WithTimeout(c, LOGDOG_REQUEST_TIMEOUT) 181 checkout, err := getCheckout(logdogContext, build) 182 if err != nil { 183 // TODO (crbug.com/1058190): log the error and let the owner know 184 logging.Warningf(c, "Got error when getting source manifest for build %v", err) 185 } 186 187 // Get the Builder for the first time, and initialize if there's nothing there. 188 builderID := getBuilderID(&build.Build) 189 templateInput := ¬ifypb.TemplateInput{ 190 BuildbucketHostname: build.BuildbucketHostname, 191 Build: &build.Build, 192 } 193 194 // Set up the initial list of recipients, derived from the build. 195 recipients := make([]EmailNotify, len(build.EmailNotify)) 196 copy(recipients, build.EmailNotify) 197 198 // Helper functions for notifying and updating tree closer status. 199 notifyNoBlame := func(c context.Context, b config.Builder, oldStatus buildbucketpb.Status) error { 200 notifications := Filter(c, &b.Notifications, oldStatus, &build.Build) 201 recipients = append(recipients, ComputeRecipients(c, notifications, nil, nil)...) 202 templateInput.OldStatus = oldStatus 203 return Notify(c, recipients, templateInput) 204 } 205 notifyAndUpdateTrees := func(c context.Context, b config.Builder, oldStatus buildbucketpb.Status) error { 206 return parallel.FanOutIn(func(ch chan<- func() error) { 207 ch <- func() error { return notifyNoBlame(c, b, oldStatus) } 208 ch <- func() error { return UpdateTreeClosers(c, build, oldStatus) } 209 }) 210 } 211 212 gCommit := build.Input.GetGitilesCommit() 213 buildCreateTime := build.CreateTime.AsTime() 214 215 var keepGoing bool 216 var builder config.Builder 217 err = datastore.RunInTransaction(c, func(c context.Context) error { 218 // Reset these values everytime the transaction is retried, to avoid 219 // leaking state from a previous attempt to this attempt. 220 keepGoing = false 221 builder = config.Builder{ 222 ProjectKey: datastore.KeyForObj(c, project), 223 ID: builderID, 224 } 225 226 switch err := datastore.Get(c, &builder); { 227 case err == datastore.ErrNoSuchEntity: 228 // Even if the builder isn't found, we may still want to notify if the build 229 // specifies email addresses to notify. 230 logging.Infof(c, "No builder %q found for project %q", builderID, luciProject) 231 return Notify(c, recipients, templateInput) 232 case err != nil: 233 return errors.Annotate(err, "failed to get builder").Tag(transient.Tag).Err() 234 } 235 236 // Create a new builder as a copy of the old, updated with build information. 237 updatedBuilder := builder 238 updatedBuilder.Status = build.Status 239 updatedBuilder.BuildTime = buildCreateTime 240 if len(checkout) > 0 { 241 updatedBuilder.GitilesCommits = checkout.ToGitilesCommits() 242 } 243 244 if builder.Repository == "" { 245 // No repository specified, so we follow build time ordering. 246 if builder.BuildTime.Before(buildCreateTime) { 247 // The build is in-order with respect to build time, so notify normally. 248 if err := notifyAndUpdateTrees(c, builder, builder.Status); err != nil { 249 return err 250 } 251 return putWithRetry(c, &updatedBuilder) 252 } 253 logging.Infof(c, "Found build with old time") 254 255 // Don't update trees, since it's out of order. 256 return notifyNoBlame(c, builder, 0) 257 } 258 // Repository specified, so we follow commit ordering. 259 260 if gCommit == nil || (gCommit != nil && gCommit.Id == "") { 261 // If there's no revision information, the build must be treated as out- 262 // of-order. For such builds we only perform non-on_change notifications, 263 // and do not update trees. 264 logging.Infof(c, "No revision information found for this build, treating as out-of-order...") 265 return notifyNoBlame(c, builder, 0) 266 } 267 268 builderRepoHost, builderRepoProject, _ := gitiles.ParseRepoURL(builder.Repository) 269 if builderRepoHost != gCommit.Host || builderRepoProject != gCommit.Project { 270 logging.Infof(c, "Builder %s triggered by commit to https://%s/%s"+ 271 "instead of known https://%s, treating as out-of-order...", 272 builderID, gCommit.Host, gCommit.Project, builder.Repository) 273 // Only perform non-on_change notifications. 274 return notifyNoBlame(c, builder, 0) 275 } 276 277 // Update the new builder with revision information as we know it's now available. 278 updatedBuilder.Revision = gCommit.Id 279 280 // If there's no revision information on the Builder, this means the Builder 281 // is uninitialized. Notify about the build as best as we can and then store 282 // the updated builder. 283 if builder.Revision == "" { 284 if err := notifyAndUpdateTrees(c, builder, 0); err != nil { 285 return err 286 } 287 return putWithRetry(c, &updatedBuilder) 288 } 289 keepGoing = true 290 return nil 291 }, nil) 292 if err != nil { 293 return err 294 } 295 if !keepGoing { 296 return nil 297 } 298 299 // We have a builder with a Repository set (i.e. following commit ordering), 300 // and a build with commit ID set. Continue processing notifications. 301 302 // Get the revision history for the build-related commit. 303 commits, err := history(c, luciProject, gCommit.Host, gCommit.Project, builder.Revision, gCommit.Id) 304 if err != nil { 305 return errors.Annotate(err, "failed to retrieve git history for input commit").Err() 306 } 307 if len(commits) == 0 { 308 logging.Debugf(c, "Found build with old commit, not updating tree closers") 309 return notifyNoBlame(c, builder, 0) 310 } 311 312 // Get the blamelist logs, if needed. 313 var aggregateLogs Logs 314 aggregateRepoAllowset := BlamelistRepoAllowset(builder.Notifications) 315 if len(aggregateRepoAllowset) > 0 && len(checkout) > 0 { 316 oldCheckout := NewCheckout(builder.GitilesCommits) 317 aggregateLogs, err = ComputeLogs(c, luciProject, oldCheckout, checkout.Filter(aggregateRepoAllowset), history) 318 if err != nil { 319 return errors.Annotate(err, "failed to compute logs").Err() 320 } 321 } 322 323 // Update `builder`, and check if we need to store a newer version, then store it. 324 oldRepository := builder.Repository 325 err = datastore.RunInTransaction(c, func(c context.Context) error { 326 switch err := datastore.Get(c, &builder); { 327 case err == datastore.ErrNoSuchEntity: 328 return errors.New("builder deleted between datastore.Get calls") 329 case err != nil: 330 return err 331 } 332 333 // If the builder's repository got updated in the meanwhile, we need to throw a 334 // transient error and retry this whole thing. 335 if builder.Repository != oldRepository { 336 return errors.Reason("failed to notify because builder repository updated").Tag(transient.Tag).Err() 337 } 338 339 // Create a new builder as a copy of the old, updated with build information. 340 updatedBuilder := builder 341 updatedBuilder.Status = build.Status 342 updatedBuilder.BuildTime = buildCreateTime 343 updatedBuilder.Revision = gCommit.Id 344 if len(checkout) > 0 { 345 updatedBuilder.GitilesCommits = checkout.ToGitilesCommits() 346 } 347 348 index := commitIndex(commits, builder.Revision) 349 outOfOrder := false 350 switch { 351 // If the revision is not found, we can conclude that the Builder has 352 // advanced beyond gCommit.Revision. This is because: 353 // 1) builder.Revision only ever moves forward. 354 // 2) commits contains the git history up to gCommit.Revision. 355 case index < 0: 356 logging.Debugf(c, "Found build with old commit during transaction.") 357 outOfOrder = true 358 359 // If the revision is current, check build creation time. 360 case index == 0 && builder.BuildTime.After(buildCreateTime): 361 logging.Debugf(c, "Found build with the same commit but an old time.") 362 outOfOrder = true 363 } 364 365 if outOfOrder { 366 // If the build is out-of-order, we want to ignore only on_change notifications, 367 // and not update trees. 368 return notifyNoBlame(c, builder, 0) 369 } 370 371 // Notify, and include the blamelist. 372 n := Filter(c, &builder.Notifications, builder.Status, &build.Build) 373 recipients = append(recipients, ComputeRecipients(c, n, commits[:index], aggregateLogs)...) 374 templateInput.OldStatus = builder.Status 375 376 return parallel.FanOutIn(func(ch chan<- func() error) { 377 ch <- func() error { return Notify(c, recipients, templateInput) } 378 ch <- func() error { return putWithRetry(c, &updatedBuilder) } 379 ch <- func() error { return UpdateTreeClosers(c, build, 0) } 380 }) 381 }, nil) 382 return errors.Annotate(err, "failed to save builder").Tag(transient.Tag).Err() 383 } 384 385 func newBuildsClient(c context.Context, host, project string) (buildbucketpb.BuildsClient, error) { 386 t, err := auth.GetRPCTransport(c, auth.AsProject, auth.WithProject(project)) 387 if err != nil { 388 return nil, err 389 } 390 return buildbucketpb.NewBuildsPRPCClient(&prpc.Client{ 391 C: &http.Client{Transport: t}, 392 Host: host, 393 }), nil 394 } 395 396 // BuildbucketPubSubHandler is the main entrypoint for a new update from buildbucket's pubsub. 397 // 398 // This handler delegates the actual processing of the build to handleBuild. 399 // Its primary purpose is to unwrap context boilerplate and deal with progress-stopping errors. 400 func BuildbucketPubSubHandler(ctx *router.Context) error { 401 c := ctx.Request.Context() 402 build, err := extractBuild(c, ctx.Request) 403 switch { 404 case err != nil: 405 return errors.Annotate(err, "failed to extract build").Err() 406 407 case build == nil: 408 // Ignore. 409 return nil 410 411 default: 412 return handleBuild(c, build, srcmanCheckout, gitilesHistory) 413 } 414 } 415 416 // Build is buildbucketpb.Build along with the parsed 'email_notify' values. 417 type Build struct { 418 BuildbucketHostname string 419 buildbucketpb.Build 420 EmailNotify []EmailNotify 421 } 422 423 // extractBuild constructs a Build from the PubSub HTTP request. 424 func extractBuild(c context.Context, r *http.Request) (*Build, error) { 425 // sent by pubsub. 426 // This struct is just convenient for unwrapping the json message 427 // See https://cloud.google.com/pubsub/docs/push#receive_push 428 var msg struct { 429 Message struct { 430 Data []byte 431 Attributes map[string]any 432 } 433 } 434 if err := json.NewDecoder(r.Body).Decode(&msg); err != nil { 435 return nil, errors.Annotate(err, "could not decode message").Err() 436 } 437 438 // Handle the message from `builds_v2` pubsub topic. 439 if v, ok := msg.Message.Attributes["version"].(string); ok && v == "v2" { 440 buildsV2Msg := &buildbucketpb.BuildsV2PubSub{} 441 opts := protojson.UnmarshalOptions{AllowPartial: true, DiscardUnknown: true} 442 if err := opts.Unmarshal(msg.Message.Data, buildsV2Msg); err != nil { 443 return nil, errors.Annotate(err, "failed to unmarshal pubsub message into BuildsV2PubSub proto").Err() 444 } 445 // TODO(crbug.com/1408909): remove the logging after migration is done. 446 logging.Debugf(c, "receiving builds_v2 pubsub msg for build %d, status %s", buildsV2Msg.Build.GetId(), buildsV2Msg.Build.GetStatus()) 447 // Double check the received build is completed, although the new subscription has a filter for it. 448 if buildsV2Msg.Build.Status&buildbucketpb.Status_ENDED_MASK != buildbucketpb.Status_ENDED_MASK { 449 logging.Infof(c, "Received build %d that hasn't completed yet, ignoring...", buildsV2Msg.Build.GetId()) 450 return nil, nil 451 } 452 largeFieldsData, err := zlibDecompress(buildsV2Msg.BuildLargeFields) 453 if err != nil { 454 return nil, errors.Annotate(err, "failed to decompress build_large_fields for build %d", buildsV2Msg.Build.GetId()).Err() 455 } 456 largeFields := &buildbucketpb.Build{} 457 if err := proto.Unmarshal(largeFieldsData, largeFields); err != nil { 458 return nil, errors.Annotate(err, "failed to unmarshal build_large_fields for build %d", buildsV2Msg.Build.GetId()).Err() 459 } 460 proto.Merge(buildsV2Msg.Build, largeFields) 461 462 emails, err := extractEmailNotifyValues(buildsV2Msg.Build, "") 463 if err != nil { 464 return nil, errors.Annotate(err, "could not decode email_notify in builds_v2 pubsub message for build %d", buildsV2Msg.Build.GetId()).Err() 465 } 466 467 return &Build{ 468 BuildbucketHostname: buildsV2Msg.Build.GetInfra().GetBuildbucket().GetHostname(), 469 Build: *buildsV2Msg.Build, 470 EmailNotify: emails, 471 }, nil 472 } 473 474 // TODO(crbug.com/1408909): remove the handling for old messages after migration is done. 475 // Handle the message from `builds` pubsub topic. 476 var message struct { 477 Build bbv1.LegacyApiCommonBuildMessage 478 Hostname string 479 } 480 switch err := json.Unmarshal(msg.Message.Data, &message); { 481 case err != nil: 482 return nil, errors.Annotate(err, "could not parse pubsub message data").Err() 483 case !strings.HasPrefix(message.Build.Bucket, "luci."): 484 logging.Infof(c, "Received build that isn't part of LUCI, ignoring...") 485 return nil, nil 486 case message.Build.Status != bbv1.StatusCompleted: 487 logging.Infof(c, "Received build that hasn't completed yet, ignoring...") 488 return nil, nil 489 } 490 491 buildsClient, err := newBuildsClient(c, message.Hostname, message.Build.Project) 492 if err != nil { 493 return nil, err 494 } 495 496 logging.Infof(c, "fetching build %d", message.Build.Id) 497 res, err := buildsClient.GetBuild(c, &buildbucketpb.GetBuildRequest{ 498 Id: message.Build.Id, 499 Fields: &field_mask.FieldMask{ 500 Paths: []string{"*"}, 501 }, 502 }) 503 switch { 504 case status.Code(err) == codes.NotFound: 505 logging.Warningf(c, "no access to build %d", message.Build.Id) 506 return nil, nil 507 case err != nil: 508 err = grpcutil.WrapIfTransient(err) 509 err = errors.Annotate(err, "could not fetch buildbucket build %d", message.Build.Id).Err() 510 return nil, err 511 } 512 513 emails, err := extractEmailNotifyValues(res, message.Build.ParametersJson) 514 if err != nil { 515 return nil, errors.Annotate(err, "could not decode email_notify").Err() 516 } 517 518 return &Build{ 519 BuildbucketHostname: message.Hostname, 520 Build: *res, 521 EmailNotify: emails, 522 }, nil 523 } 524 525 // zlibDecompress decompresses data using zlib. 526 func zlibDecompress(compressed []byte) ([]byte, error) { 527 r, err := zlib.NewReader(bytes.NewReader(compressed)) 528 if err != nil { 529 return nil, err 530 } 531 originalData, err := io.ReadAll(r) 532 if err != nil { 533 return nil, err 534 } 535 if err := r.Close(); err != nil { 536 return nil, err 537 } 538 return originalData, nil 539 }