go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/changelist/updater.go (about) 1 // Copyright 2021 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package changelist 16 17 import ( 18 "context" 19 "fmt" 20 "sort" 21 "strconv" 22 "strings" 23 "sync" 24 "time" 25 26 "google.golang.org/protobuf/proto" 27 28 "go.chromium.org/luci/common/clock" 29 "go.chromium.org/luci/common/errors" 30 "go.chromium.org/luci/common/logging" 31 "go.chromium.org/luci/common/retry/transient" 32 "go.chromium.org/luci/common/sync/parallel" 33 "go.chromium.org/luci/gae/service/datastore" 34 "go.chromium.org/luci/server/tq" 35 36 "go.chromium.org/luci/cv/internal/common" 37 "go.chromium.org/luci/cv/internal/gerrit" 38 "go.chromium.org/luci/cv/internal/metrics" 39 ) 40 41 const ( 42 // BatchUpdateCLTaskClass is the Task Class ID of the BatchUpdateCLTask, 43 // which is enqueued only during a transaction. 44 BatchUpdateCLTaskClass = "batch-update-cl" 45 // UpdateCLTaskClass is the Task Class ID of the UpdateCLTask. 46 UpdateCLTaskClass = "update-cl" 47 48 // blindRefreshInterval sets interval between blind refreshes of a CL. 49 blindRefreshInterval = time.Minute 50 51 // knownRefreshInterval sets interval between refreshes of a CL when 52 // updatedHint is known. 53 knownRefreshInterval = 15 * time.Minute 54 55 // autoRefreshAfter makes CLs worthy of "blind" refresh. 56 // 57 // "blind" refresh means that CL is already stored in Datastore and is up to 58 // the date to the best knowledge of CV. 59 autoRefreshAfter = 2 * time.Hour 60 ) 61 62 // UpdaterBackend abstracts out fetching CL details from code review backend. 63 type UpdaterBackend interface { 64 // Kind identifies the backend. 65 // 66 // It's also the first part of the CL's ExternalID, e.g. "gerrit". 67 // Must not contain a slash. 68 Kind() string 69 70 // LookupApplicableConfig returns the latest ApplicableConfig for the previously 71 // saved CL. 72 // 73 // See CL.ApplicableConfig field doc for more details. Roughly, it finds which 74 // LUCI projects are configured to watch this CL. 75 // 76 // Updater calls LookupApplicableConfig() before Fetch() in order to avoid 77 // the unnecessary Fetch() call entirely, e.g. if the CL is up to date or if 78 // the CL is definitely not watched by a specific LUCI project. 79 // 80 // Returns non-nil ApplicableConfig normally. 81 // Returns nil ApplicableConfig if the previously saved CL state isn't 82 // sufficient to confidently determine the ApplicableConfig. 83 LookupApplicableConfig(ctx context.Context, saved *CL) (*ApplicableConfig, error) 84 85 // Fetch fetches the CL in the context of a given project. 86 Fetch(ctx context.Context, input *FetchInput) (UpdateFields, error) 87 88 // HasChanged decides whether the CL in the backend has changed from existing 89 // snapshot in LUCI CV. 90 HasChanged(cvCurrent, backendCurrent *Snapshot) bool 91 92 // TQErrorSpec allows customizing logging and error TQ-specific handling. 93 // 94 // For example, Gerrit backend may wish to retry out of quota errors without 95 // logging detailed stacktrace. 96 TQErrorSpec() common.TQIfy 97 } 98 99 // FetchInput an input for UpdaterBackend.Fetch. 100 // 101 // It contains fields for what to fetch with meta information. 102 type FetchInput struct { 103 // CL of the ChangeList to fetch a snapshot of. 104 // 105 // If CL.ID in the input is 0, it means the CL entity doesn't exist in 106 // Datastore. The cl.ExternalID is always set. 107 CL *CL 108 // Project is the LUCI project to use the scoped account of for the fetch 109 // operation to be performed. 110 Project string 111 // UpdatedHint, if not zero time, is the backend-originating timestamp of 112 // the most recent CL update time. It's sourced by CV by e.g. polling or 113 // PubSub subscription. It is useful to detect and work around backend's 114 // eventual consistency. 115 UpdatedHint time.Time 116 // Requester identifies various scenarios that issued the Fetch invocation. 117 Requester UpdateCLTask_Requester 118 Hint *UpdateCLTask_Hint 119 } 120 121 // NewFetchInput returns FetchInput for a given CL and UpdateCLTask. 122 func NewFetchInput(cl *CL, task *UpdateCLTask) *FetchInput { 123 return &FetchInput{ 124 CL: cl, 125 Project: task.GetLuciProject(), 126 Hint: task.GetHint(), 127 Requester: task.GetRequester(), 128 } 129 } 130 131 // UpdateFields defines what parts of CL to update. 132 // 133 // At least one field must be specified. 134 type UpdateFields struct { 135 // Snapshot overwrites existing CL snapshot if newer according to its 136 // .ExternalUpdateTime. 137 Snapshot *Snapshot 138 139 // ApplicableConfig overwrites existing CL ApplicableConfig if semantically 140 // different from existing one. 141 ApplicableConfig *ApplicableConfig 142 143 // AddDependentMeta adds or overwrites metadata per LUCI project in CL AsDepMeta. 144 // Doesn't affect metadata stored for projects not referenced here. 145 AddDependentMeta *Access 146 147 // DelAccess deletes Access records for the given projects. 148 DelAccess []string 149 } 150 151 // IsEmpty returns true if no updates are necessary. 152 func (u UpdateFields) IsEmpty() bool { 153 return (u.Snapshot == nil && 154 u.ApplicableConfig == nil && 155 len(u.AddDependentMeta.GetByProject()) == 0 && 156 len(u.DelAccess) == 0) 157 } 158 159 func (u UpdateFields) shouldUpdateSnapshot(cl *CL, backend UpdaterBackend) bool { 160 switch { 161 case u.Snapshot == nil: 162 return false 163 case cl.Snapshot == nil: 164 return true 165 case cl.Snapshot.GetOutdated() != nil: 166 return true 167 case cl.Snapshot.GetLuciProject() != u.Snapshot.GetLuciProject(): 168 return true 169 case backend.HasChanged(cl.Snapshot, u.Snapshot): 170 return true 171 default: 172 return false 173 } 174 } 175 176 // Apply applies the UpdatedFields to a given CL. 177 func (u UpdateFields) Apply(cl *CL, backend UpdaterBackend) (changed, changedSnapshot bool) { 178 if u.ApplicableConfig != nil && !cl.ApplicableConfig.SemanticallyEqual(u.ApplicableConfig) { 179 cl.ApplicableConfig = u.ApplicableConfig 180 changed = true 181 } 182 183 if u.shouldUpdateSnapshot(cl, backend) { 184 cl.Snapshot = u.Snapshot 185 changed, changedSnapshot = true, true 186 } 187 188 switch { 189 case u.AddDependentMeta == nil: 190 case cl.Access == nil || cl.Access.GetByProject() == nil: 191 cl.Access = u.AddDependentMeta 192 changed = true 193 default: 194 e := cl.Access.GetByProject() 195 for lProject, v := range u.AddDependentMeta.GetByProject() { 196 if v.GetNoAccessTime() == nil { 197 panic("NoAccessTime must be set") 198 } 199 old, exists := e[lProject] 200 if !exists || old.GetUpdateTime().AsTime().Before(v.GetUpdateTime().AsTime()) { 201 if old.GetNoAccessTime() != nil && old.GetNoAccessTime().AsTime().Before(v.GetNoAccessTime().AsTime()) { 202 v.NoAccessTime = old.NoAccessTime 203 } 204 e[lProject] = v 205 changed = true 206 } 207 } 208 } 209 210 if len(u.DelAccess) > 0 && len(cl.Access.GetByProject()) > 0 { 211 for _, p := range u.DelAccess { 212 if _, exists := cl.Access.GetByProject()[p]; exists { 213 changed = true 214 delete(cl.Access.ByProject, p) 215 if len(cl.Access.GetByProject()) == 0 { 216 cl.Access = nil 217 break 218 } 219 } 220 } 221 } 222 223 return 224 } 225 226 // Updater knows how to update CLs from relevant backend (e.g. Gerrit), 227 // notifying other CV parts as needed. 228 type Updater struct { 229 tqd *tq.Dispatcher 230 mutator *Mutator 231 232 rwmutex sync.RWMutex // guards `backends` 233 backends map[string]UpdaterBackend 234 } 235 236 // NewUpdater creates a new Updater. 237 // 238 // Starts without backends, but they ought to be added via RegisterBackend(). 239 func NewUpdater(tqd *tq.Dispatcher, m *Mutator) *Updater { 240 u := &Updater{ 241 tqd: tqd, 242 mutator: m, 243 backends: make(map[string]UpdaterBackend, 1), 244 } 245 tqd.RegisterTaskClass(tq.TaskClass{ 246 ID: BatchUpdateCLTaskClass, 247 Prototype: &BatchUpdateCLTask{}, 248 Queue: "update-cl", 249 Quiet: true, 250 QuietOnError: true, 251 Kind: tq.Transactional, 252 Handler: func(ctx context.Context, payload proto.Message) error { 253 t := payload.(*BatchUpdateCLTask) 254 err := u.handleBatch(ctx, t) 255 return common.TQifyError(ctx, err) 256 }, 257 }) 258 tqd.RegisterTaskClass(tq.TaskClass{ 259 ID: UpdateCLTaskClass, 260 Prototype: &UpdateCLTask{}, 261 Queue: "update-cl", 262 Quiet: true, 263 QuietOnError: true, 264 Kind: tq.FollowsContext, 265 Handler: func(ctx context.Context, payload proto.Message) error { 266 t := payload.(*UpdateCLTask) 267 // NOTE: unlike other TQ handlers code in CV, the common.TQifyError is 268 // done inside the handler to allow per-backend definition of which errors 269 // are retriable. 270 return u.handleCL(ctx, t) 271 }, 272 }) 273 return u 274 } 275 276 // RegisterBackend registers a backend. 277 // 278 // Panics if backend for the same kind is already registered. 279 func (u *Updater) RegisterBackend(b UpdaterBackend) { 280 kind := b.Kind() 281 if strings.ContainsRune(kind, '/') { 282 panic(fmt.Errorf("backend %T of kind %q must not contain '/'", b, kind)) 283 } 284 u.rwmutex.Lock() 285 defer u.rwmutex.Unlock() 286 if _, exists := u.backends[kind]; exists { 287 panic(fmt.Errorf("backend %q is already registered", kind)) 288 } 289 u.backends[kind] = b 290 } 291 292 // ScheduleBatch schedules update of several CLs. 293 // 294 // If called in a transaction, enqueues exactly one TQ task transactionally. 295 // This allows to write 1 Datastore entity during a transaction instead of N 296 // entities if Schedule() was used for each CL. 297 // 298 // Otherwise, enqueues 1 TQ task per CL non-transactionally and in parallel. 299 func (u *Updater) ScheduleBatch(ctx context.Context, luciProject string, cls []*CL, requester UpdateCLTask_Requester) error { 300 tasks := make([]*UpdateCLTask, len(cls)) 301 for i, cl := range cls { 302 tasks[i] = &UpdateCLTask{ 303 LuciProject: luciProject, 304 ExternalId: string(cl.ExternalID), 305 Id: int64(cl.ID), 306 Requester: requester, 307 } 308 } 309 310 switch { 311 case len(tasks) == 1: 312 // Optimization for the most frequent use-case of single-CL Runs. 313 return u.Schedule(ctx, tasks[0]) 314 case datastore.CurrentTransaction(ctx) == nil: 315 return u.handleBatch(ctx, &BatchUpdateCLTask{Tasks: tasks}) 316 default: 317 return u.tqd.AddTask(ctx, &tq.Task{ 318 Payload: &BatchUpdateCLTask{Tasks: tasks}, 319 Title: fmt.Sprintf("batch-%s-%d", luciProject, len(tasks)), 320 }) 321 } 322 } 323 324 // Schedule dispatches a TQ task. It should be used instead of the direct 325 // tq.AddTask to allow for consistent de-duplication. 326 func (u *Updater) Schedule(ctx context.Context, payload *UpdateCLTask) error { 327 return u.ScheduleDelayed(ctx, payload, 0) 328 } 329 330 // ScheduleDelayed is the same as Schedule but with a delay. 331 func (u *Updater) ScheduleDelayed(ctx context.Context, payload *UpdateCLTask, delay time.Duration) error { 332 task := &tq.Task{ 333 Payload: payload, 334 Delay: delay, 335 Title: makeTQTitleForHumans(payload), 336 } 337 if payload.Requester == UpdateCLTask_REQUESTER_CLASS_UNSPECIFIED { 338 panic(fmt.Errorf("BUG: UpdateCLTask.Requester unspecified: %s", payload)) 339 } 340 if datastore.CurrentTransaction(ctx) == nil { 341 task.DeduplicationKey = makeTaskDeduplicationKey(ctx, payload, delay) 342 } 343 return u.tqd.AddTask(ctx, task) 344 } 345 346 // ResolveAndScheduleDepsUpdate resolves deps, creating new CL entities as 347 // necessary, and schedules an update task for each dep which needs an update. 348 // 349 // It's meant to be used by the Updater backends. 350 // 351 // Returns a sorted slice of Deps by their CL ID, ready to be stored as 352 // CL.Snapshot.Deps. 353 func (u *Updater) ResolveAndScheduleDepsUpdate(ctx context.Context, luciProject string, deps map[ExternalID]DepKind, requester UpdateCLTask_Requester) ([]*Dep, error) { 354 // Optimize for the most frequent case whereby deps are already known to CV 355 // and were updated recently enough that no task scheduling is even necessary. 356 357 // Batch-resolve external IDs to CLIDs, and load all existing CLs. 358 resolvingDeps, err := resolveDeps(ctx, luciProject, deps) 359 if err != nil { 360 return nil, err 361 } 362 // Identify indexes of deps which need to have an update task scheduled. 363 ret := make([]*Dep, len(deps)) 364 var toSchedule []int // indexes 365 for i, d := range resolvingDeps { 366 if d.ready { 367 ret[i] = d.resolvedDep 368 } else { 369 // Also covers the case of a dep not yet having a CL entity. 370 toSchedule = append(toSchedule, i) 371 } 372 } 373 if len(toSchedule) == 0 { 374 // Quick path exit. 375 return sortDeps(ret), nil 376 } 377 378 errs := parallel.WorkPool(min(10, len(toSchedule)), func(work chan<- func() error) { 379 for _, i := range toSchedule { 380 i, d := i, resolvingDeps[i] 381 work <- func() error { 382 if err := d.createIfNotExists(ctx, u.mutator, luciProject); err != nil { 383 return err 384 } 385 if err := d.schedule(ctx, u, luciProject, requester); err != nil { 386 return err 387 } 388 ret[i] = d.resolvedDep 389 return nil 390 } 391 } 392 }) 393 if errs != nil { 394 return nil, common.MostSevereError(err) 395 } 396 return sortDeps(ret), nil 397 } 398 399 /////////////////////////////////////////////////////////////////////////////// 400 // implementation details. 401 402 func (u *Updater) handleBatch(ctx context.Context, batch *BatchUpdateCLTask) error { 403 total := len(batch.GetTasks()) 404 err := parallel.WorkPool(min(16, total), func(work chan<- func() error) { 405 for _, task := range batch.GetTasks() { 406 task := task 407 work <- func() error { return u.Schedule(ctx, task) } 408 } 409 }) 410 switch merrs, ok := err.(errors.MultiError); { 411 case err == nil: 412 return nil 413 case !ok: 414 return err 415 default: 416 failed, _ := merrs.Summary() 417 err = common.MostSevereError(merrs) 418 return errors.Annotate(err, "failed to schedule UpdateCLTask for %d out of %d CLs, keeping the most severe error", failed, total).Err() 419 } 420 } 421 422 // TestingForceUpdate runs the CL Updater synchronously. 423 // 424 // For use in tests only. Production code should use Schedule() to benefit from 425 // task de-duplication. 426 // 427 // TODO(crbug/1284393): revisit the usefulness of the sync refresh after 428 // consistency-on-demand is provided by Gerrit. 429 func (u *Updater) TestingForceUpdate(ctx context.Context, task *UpdateCLTask) error { 430 return u.handleCL(ctx, task) 431 } 432 433 func (u *Updater) handleCL(ctx context.Context, task *UpdateCLTask) error { 434 cl, err := u.preload(ctx, task) 435 if err != nil { 436 return common.TQifyError(ctx, err) 437 } 438 // cl.ID == 0 means CL doesn't yet exist. 439 ctx = logging.SetFields(ctx, logging.Fields{ 440 "project": task.GetLuciProject(), 441 "id": cl.ID, 442 "eid": cl.ExternalID, 443 }) 444 445 backend, err := u.backendFor(cl) 446 if err != nil { 447 return common.TQifyError(ctx, err) 448 } 449 450 switch err := u.handleCLWithBackend(ctx, task, cl, backend); { 451 case err == errHackRetryForOutOfQuota: 452 return tq.Ignore.Apply(err) 453 case err != nil: 454 return backend.TQErrorSpec().Error(ctx, err) 455 } 456 return nil 457 } 458 459 var errHackRetryForOutOfQuota = errors.New("hack retry for out of quota") 460 461 func (u *Updater) handleCLWithBackend(ctx context.Context, task *UpdateCLTask, cl *CL, backend UpdaterBackend) error { 462 // Save ID and ExternalID before giving CL to backend to avoid accidental corruption. 463 id, eid := cl.ID, cl.ExternalID 464 skip, updateFields, err := u.trySkippingFetch(ctx, task, cl, backend) 465 var fetchDuration time.Duration 466 switch { 467 case err != nil: 468 return err 469 case !skip: 470 now := clock.Now(ctx) 471 updateFields, err = backend.Fetch(ctx, NewFetchInput(cl, task)) 472 fetchDuration = clock.Since(ctx, now) 473 switch { 474 case err != nil && errors.Unwrap(err) == gerrit.ErrOutOfQuota && task.GetLuciProject() == "chromeos": 475 // HACK: don't retry on out of quota error, instead schedule another task 476 // with delay so that it will be deduplicated in cloud task with any 477 // subsequent tasks. 478 if scheduleErr := u.ScheduleDelayed(ctx, task, blindRefreshInterval); scheduleErr != nil { 479 return errors.Annotate(err, "%T.Fetch failed", backend).Err() 480 } 481 return errHackRetryForOutOfQuota 482 case err != nil: 483 return errors.Annotate(err, "%T.Fetch failed", backend).Err() 484 } 485 } 486 487 if updateFields.IsEmpty() { 488 logging.Debugf(ctx, "No update is necessary") 489 return nil 490 } 491 492 // Transactionally update the CL. 493 var changed, changedSnapshot bool 494 transClbk := func(latest *CL) error { 495 if changed, changedSnapshot = updateFields.Apply(latest, backend); !changed { 496 // Someone, possibly even us in case of Datastore transaction retry, has 497 // already updated this CL. 498 return ErrStopMutation 499 } 500 return nil 501 } 502 if cl.ID == 0 { 503 _, err = u.mutator.Upsert(ctx, task.GetLuciProject(), eid, transClbk) 504 } else { 505 _, err = u.mutator.Update(ctx, task.GetLuciProject(), id, transClbk) 506 } 507 508 if err != nil { 509 return err 510 } 511 512 switch { 513 case updateFields.Snapshot == nil: 514 // Skip reporting the fetch metrics. It's either the fetch operation 515 // failed or skipped. 516 case skip: 517 // Fetch was not performed; skip reporting the metrics. 518 case changed: 519 // Report the latency metrics only if the fetch actually returned 520 // new data. If the data was the same as the existing snapshot, 521 // the fetch wasn't needed, indeed. 522 delay := clock.Now(ctx).Sub(updateFields.Snapshot.ExternalUpdateTime.AsTime()) 523 if delay < 0 { 524 logging.Errorf(ctx, "negative CL fetch duration (%d) detected", delay) 525 delay = 0 526 } 527 metrics.Internal.CLIngestionLatency.Add( 528 ctx, delay.Seconds(), task.GetRequester().String(), task.GetIsForDep(), 529 task.GetLuciProject(), changedSnapshot) 530 metrics.Internal.CLIngestionLatencyWithoutFetch.Add( 531 ctx, (delay - fetchDuration).Seconds(), task.GetRequester().String(), 532 task.GetIsForDep(), task.GetLuciProject(), changedSnapshot) 533 fallthrough 534 default: 535 metrics.Internal.CLIngestionAttempted.Add( 536 ctx, 1, task.GetRequester().String(), changed, task.GetIsForDep(), 537 task.GetLuciProject(), changedSnapshot) 538 } 539 return nil 540 } 541 542 // trySkippingFetch checks if a fetch from the backend can be skipped. 543 // 544 // Returns true if so. 545 // NOTE: UpdateFields may be set if fetch can be skipped, meaning CL entity 546 // should be updated in Datastore. 547 func (u *Updater) trySkippingFetch(ctx context.Context, task *UpdateCLTask, cl *CL, backend UpdaterBackend) (bool, UpdateFields, error) { 548 if cl.ID == 0 || cl.Snapshot == nil || cl.Snapshot.GetOutdated() != nil { 549 return false, UpdateFields{}, nil 550 } 551 552 hintedTS := task.GetHint().GetExternalUpdateTime() 553 hintedRevID := task.GetHint().GetMetaRevId() 554 switch { 555 case hintedTS == nil && hintedRevID == "": 556 // fetch always if there is no hint available. 557 return false, UpdateFields{}, nil 558 case hintedRevID != "" && hintedRevID != cl.Snapshot.GetGerrit().GetInfo().GetMetaRevId(): 559 // fetch always if MetaRev is different to the rev id of the stored 560 // snapshot. If the fetched snapshot is older than the stored snapshot, 561 // it will be skipped to update the DS entity with the fetched snapshot. 562 return false, UpdateFields{}, nil 563 case hintedTS != nil && hintedTS.AsTime().After(cl.Snapshot.GetExternalUpdateTime().AsTime()): 564 // There is no confidence that Snapshot is up-to-date, so proceed fetching 565 // anyway. 566 567 // NOTE: it's tempting to check first whether the LUCI project is watching 568 // the CL given the existing Snapshot and skip the fetch if it's not the 569 // case. However, for Gerrit CLs, the ref is mutable after the CL 570 // creation and since ref is used to determine if CL is being watched, 571 // we can't skip the fetch. For an example, see Gerrit move API 572 // https://gerrit-review.googlesource.com/Documentation/rest-api-changes.html#move-change 573 return false, UpdateFields{}, nil 574 } 575 576 // CL Snapshot is up to date, but does it belong to the right LUCI project? 577 acfg, err := backend.LookupApplicableConfig(ctx, cl) 578 if err != nil { 579 err = errors.Annotate(err, "%T.LookupApplicableConfig failed", backend).Err() 580 return false, UpdateFields{}, err 581 } 582 if acfg == nil { 583 // Insufficient saved CL, need to fetch before deciding if CL is watched. 584 return false, UpdateFields{}, err 585 } 586 587 // Update CL with the new set of watching projects if materially different, 588 // which should be saved to Datastore even if the fetch from Gerrit itself is 589 // skipped. 590 var toUpdate UpdateFields 591 if !cl.ApplicableConfig.SemanticallyEqual(acfg) { 592 toUpdate.ApplicableConfig = acfg 593 } 594 595 if !acfg.HasProject(task.GetLuciProject()) { 596 // This project isn't watching the CL, so no need to fetch. 597 // 598 // NOTE: even if the Snapshot was fetched in the context of this project before, 599 // we don't have to erase the Snapshot from the CL immediately: the update 600 // in cl.ApplicableConfig suffices to ensure that CV won't be using the 601 // Snapshot. 602 return true, toUpdate, nil 603 } 604 605 if !acfg.HasProject(cl.Snapshot.GetLuciProject()) { 606 // The Snapshot was previously fetched in the context of a project which is 607 // no longer watching the CL. 608 // 609 // This can happen in practice in case of e.g. newly created "chromium-mXXX" 610 // project to watch for a specific ref which was previously watched by a 611 // generic "chromium" project. A Snapshot of a CL on such a ref would have 612 // been fetched in the context of "chromium" first, and now it must be re-fetched 613 // under "chromium-mXXX" to verify that the new project hasn't lost access 614 // to the Gerrit CL. 615 logging.Warningf(ctx, "Detected switch from %q LUCI project", cl.Snapshot.GetLuciProject()) 616 return false, toUpdate, nil 617 } 618 619 // At this point, these must be true: 620 // * the Snapshot is up-to-date to the best of CV knowledge; 621 // * this project is watching the CL, but there may be other projects, too; 622 // * the Snapshot was created by a project still watching the CL, but which may 623 // differ from this project. 624 if len(acfg.GetProjects()) >= 2 { 625 // When there are several watching projects, projects shouldn't race 626 // re-fetching & saving Snapshot. No new Runs are going to be started on 627 // such CLs, so skip fetching new snapshot. 628 return true, toUpdate, nil 629 } 630 631 // There is just 1 project, so check the invariant. 632 if task.GetLuciProject() != cl.Snapshot.GetLuciProject() { 633 panic(fmt.Errorf("BUG: this project %q must have created the Snapshot, not %q", task.GetLuciProject(), cl.Snapshot.GetLuciProject())) 634 } 635 636 if restriction := cl.Access.GetByProject()[task.GetLuciProject()]; restriction != nil { 637 // For example, Gerrit has responded HTTP 403/404 before. 638 // Must fetch again to verify if restriction still holds. 639 logging.Debugf(ctx, "Detected prior access restriction: %s", restriction) 640 return false, toUpdate, nil 641 } 642 643 // Finally, do refresh if the CL entity is just really old and the meta rev 644 // id is unset. 645 switch { 646 case hintedRevID != "": 647 // skip the fetch if the meta rev id is the same as the rev id of the stored 648 // snapshot. 649 case clock.Since(ctx, cl.UpdateTime) > autoRefreshAfter: 650 // Strictly speaking, cl.UpdateTime isn't just changed on refresh, but 651 // also whenever Run starts/ends. However, the start of Run is usually 652 // happenening right after recent refresh, and end of Run is usually 653 // followed by the refresh. 654 return false, toUpdate, nil 655 } 656 657 // OK, skip the fetch. 658 return true, toUpdate, nil 659 } 660 661 func (*Updater) preload(ctx context.Context, task *UpdateCLTask) (*CL, error) { 662 if task.GetLuciProject() == "" { 663 return nil, errors.New("invalid task input: LUCI project must be given") 664 } 665 eid := ExternalID(task.GetExternalId()) 666 id := common.CLID(task.GetId()) 667 switch { 668 case id != 0: 669 cl := &CL{ID: common.CLID(id)} 670 switch err := datastore.Get(ctx, cl); { 671 case err == datastore.ErrNoSuchEntity: 672 return nil, errors.Annotate(err, "CL %d %q doesn't exist in Datastore", id, task.GetExternalId()).Err() 673 case err != nil: 674 return nil, errors.Annotate(err, "failed to load CL %d", id).Tag(transient.Tag).Err() 675 case eid != "" && eid != cl.ExternalID: 676 return nil, errors.Reason("invalid task input: CL %d actually has %q ExternalID, not %q", id, cl.ExternalID, eid).Err() 677 default: 678 return cl, nil 679 } 680 case eid == "": 681 return nil, errors.Reason("invalid task input: either internal ID or ExternalID must be given").Err() 682 default: 683 switch cl, err := eid.Load(ctx); { 684 case err != nil: 685 return nil, errors.Annotate(err, "failed to load CL %q", eid).Tag(transient.Tag).Err() 686 case cl == nil: 687 // New CL to be created. 688 return &CL{ 689 ExternalID: eid, 690 ID: 0, // will be populated later. 691 EVersion: 0, 692 }, nil 693 default: 694 return cl, nil 695 } 696 } 697 } 698 699 func (u *Updater) backendFor(cl *CL) (UpdaterBackend, error) { 700 kind, err := cl.ExternalID.kind() 701 if err != nil { 702 return nil, err 703 } 704 u.rwmutex.RLock() 705 defer u.rwmutex.RUnlock() 706 if b, exists := u.backends[kind]; exists { 707 return b, nil 708 } 709 return nil, errors.Reason("%q backend is not supported", kind).Err() 710 } 711 712 // makeTaskDeduplicationKey returns TQ task deduplication key. 713 func makeTaskDeduplicationKey(ctx context.Context, t *UpdateCLTask, delay time.Duration) string { 714 var sb strings.Builder 715 sb.WriteString("v0") 716 sb.WriteRune('\n') 717 sb.WriteString(t.GetLuciProject()) 718 sb.WriteRune('\n') 719 720 // Prefer ExternalID if both ID and ExternalID are known, as the most frequent 721 // use-case for update via PubSub/Polling, which specifies ExternalID and may 722 // not resolve it to internal ID just yet. 723 uniqArg := t.GetExternalId() 724 if uniqArg == "" { 725 uniqArg = strconv.FormatInt(t.GetId(), 16) 726 } 727 sb.WriteString(uniqArg) 728 729 // If the meta rev ID is set, dedup with a time window isn't necessary. 730 // 1) Gerrit guarantees one publish for each of CL update events. 731 // 2) # of redelivered messages should be low enough to ignore. 732 // 3) If the same message is redelivered multiple times, the backend 733 // will skip fetching the snapshot after the first message. 734 // 4) If it's concerned that retries can fast burn out Gerrit quota, 735 // pubsub retry config should be tuned, instead. 736 if revID := t.GetHint().GetMetaRevId(); revID != "" { 737 _, _ = fmt.Fprintf(&sb, "\n%s", revID) 738 return sb.String() 739 } 740 741 // Dedup in the short term to avoid excessive number of refreshes, 742 // but ensure eventually calling Schedule with the same payload results in a 743 // new task. This is done by de-duping only within a single "epoch" window, 744 // which differs by CL to avoid synchronized herd of requests hitting 745 // a backend (e.g. Gerrit). 746 // 747 // +----------------------------------------------------------------------+ 748 // | ... -> time goes forward -> .... | 749 // +----------------------------------------------------------------------+ 750 // | | 751 // | ... | epoch (N-1, CL-A) | epoch (N, CL-A) | epoch (N+1, CL-A) | ... | 752 // | | 753 // | ... | epoch (N-1, CL-B) | epoch (N, CL-B) | ... | 754 // +----------------------------------------------------------------------+ 755 // 756 // Furthermore, de-dup window differs based on whether updatedHint is given 757 // or it's a blind refresh. 758 interval := blindRefreshInterval 759 if t.GetHint().GetExternalUpdateTime() != nil { 760 interval = knownRefreshInterval 761 } 762 epochOffset := common.DistributeOffset(interval, "update-cl", t.GetLuciProject(), uniqArg) 763 epochTS := clock.Now(ctx).Add(delay).Truncate(interval).Add(interval + epochOffset) 764 _, _ = fmt.Fprintf(&sb, "\n%x", epochTS.UnixNano()) 765 if h := t.GetHint().GetExternalUpdateTime(); h != nil { 766 _, _ = fmt.Fprintf(&sb, "\n%x", h.AsTime().UnixNano()) 767 } 768 return sb.String() 769 } 770 771 // makeTQTitleForHumans makes human-readable TQ task title. 772 // 773 // WARNING: do not use for anything else. Doesn't guarantee uniqueness. 774 // 775 // It will be visible in logs as the suffix of URL in Cloud Tasks console and 776 // in the GAE requests log. 777 // 778 // The primary purpose is that quick search for specific CL in the GAE request 779 // log alone, as opposed to searching through much larger and separate stderr 780 // log of the process (which is where logging.Logf calls go into). 781 // 782 // For example, 783 // 784 // "proj/gerrit/chromium/1111111/u2016-02-03T04:05:06Z/deadbeef" 785 // "proj/gerrit/chromium/1111111/u2016-02-03T04:05:06Z" 786 // "proj/gerrit/chromium/1111111/deadbeef" 787 func makeTQTitleForHumans(t *UpdateCLTask) string { 788 var sb strings.Builder 789 sb.WriteString(t.GetLuciProject()) 790 if id := t.GetId(); id != 0 { 791 _, _ = fmt.Fprintf(&sb, "/%d", id) 792 } 793 if eid := t.GetExternalId(); eid != "" { 794 sb.WriteRune('/') 795 // Reduce verbosity in common case of Gerrit on googlesource. 796 // Although it's possible to delegate this to backend, the additional 797 // boilerplate isn't yet justified. 798 if kind, err := ExternalID(eid).kind(); err == nil && kind == "gerrit" { 799 eid = strings.Replace(eid, "-review.googlesource.com/", "/", 1) 800 } 801 sb.WriteString(eid) 802 } 803 if hintedTS := t.GetHint().GetExternalUpdateTime(); hintedTS != nil { 804 sb.WriteString("/u") 805 sb.WriteString(hintedTS.AsTime().UTC().Format(time.RFC3339)) 806 } 807 if hintedRevID := t.GetHint().GetMetaRevId(); hintedRevID != "" { 808 sb.WriteString("/") 809 sb.WriteString(hintedRevID) 810 } 811 return sb.String() 812 } 813 814 const maxDepsLoadingBatchSize = 100 815 816 func resolveDeps(ctx context.Context, luciProject string, deps map[ExternalID]DepKind) ([]resolvingDep, error) { 817 eids := make([]ExternalID, 0, len(deps)) 818 ret := make([]resolvingDep, 0, len(deps)) 819 for eid, kind := range deps { 820 eids = append(eids, eid) 821 ret = append(ret, resolvingDep{eid: eid, kind: kind}) 822 } 823 824 ids, err := Lookup(ctx, eids) 825 if err != nil { 826 return nil, err 827 } 828 depCLs := make([]CL, 0, maxDepsLoadingBatchSize) 829 depCLIndices := make([]int, 0, maxDepsLoadingBatchSize) 830 for i, id := range ids { 831 if id > 0 { 832 cl := CL{ID: id} 833 depCLs = append(depCLs, cl) 834 depCLIndices = append(depCLIndices, i) 835 ret[i].resolvedDep = &Dep{Clid: int64(id), Kind: ret[i].kind} 836 } 837 if len(depCLs) == maxDepsLoadingBatchSize || (len(depCLs) > 0 && i == len(ids)-1) { 838 // cut a batch if max is reached or end of ids. 839 if err := datastore.Get(ctx, depCLs); err != nil { 840 // Mark error as transient because by this time, all CLIDs should have 841 // corresponding CL entities in datastore. 842 return nil, errors.Annotate(err, "failed to load %d CLs", len(depCLs)).Tag(transient.Tag).Err() 843 } 844 for j, depCL := range depCLs { 845 ret[depCLIndices[j]].ready = !depNeedsRefresh(ctx, depCL, luciProject) 846 } 847 depCLs = depCLs[:0] 848 depCLIndices = depCLIndices[:0] 849 } 850 } 851 return ret, nil 852 } 853 854 // resolvingDep represents a dependency known by its external ID only being 855 // resolved. 856 // 857 // Helper struct for the Updater.ResolveAndScheduleDeps. 858 type resolvingDep struct { 859 eid ExternalID 860 kind DepKind 861 ready bool // true if already up to date and .dep is populated. 862 resolvedDep *Dep // if nil, use createIfNotExists() to populate 863 } 864 865 func (d *resolvingDep) createIfNotExists(ctx context.Context, m *Mutator, luciProject string) error { 866 if d.resolvedDep != nil { 867 return nil // already exists 868 } 869 cl, err := m.Upsert(ctx, luciProject, d.eid, func(cl *CL) error { 870 // TODO: somehow record when CL was inserted to put a boundary on how long 871 // Project Manager should be waiting for the dep to be actually fetched & 872 // its entity updated in Datastore. 873 if cl.EVersion > 0 { 874 // If CL already exists, we don't need to modify it % above comment. 875 return ErrStopMutation 876 } 877 return nil 878 }) 879 if err != nil { 880 return err 881 } 882 d.resolvedDep = &Dep{Clid: int64(cl.ID), Kind: d.kind} 883 return nil 884 } 885 886 func (d *resolvingDep) schedule(ctx context.Context, u *Updater, luciProject string, requester UpdateCLTask_Requester) error { 887 return u.Schedule(ctx, &UpdateCLTask{ 888 ExternalId: string(d.eid), 889 Id: d.resolvedDep.GetClid(), 890 LuciProject: luciProject, 891 Requester: requester, 892 IsForDep: true, 893 }) 894 } 895 896 // sortDeps sorts given slice by CLID ASC in place and returns it. 897 func sortDeps(deps []*Dep) []*Dep { 898 sort.Slice(deps, func(i, j int) bool { 899 return deps[i].GetClid() < deps[j].GetClid() 900 }) 901 return deps 902 } 903 904 // depNeedsRefresh returns true if the dependency CL needs a refresh in the 905 // context of a specific LUCI project. 906 func depNeedsRefresh(ctx context.Context, dep CL, luciProject string) bool { 907 switch { 908 case dep.Snapshot == nil: 909 return true 910 case dep.Snapshot.GetOutdated() != nil: 911 return true 912 case dep.Snapshot.GetLuciProject() != luciProject: 913 return true 914 default: 915 return false 916 } 917 }