go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/changelist/updater.go (about)

     1  // Copyright 2021 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package changelist
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"sort"
    21  	"strconv"
    22  	"strings"
    23  	"sync"
    24  	"time"
    25  
    26  	"google.golang.org/protobuf/proto"
    27  
    28  	"go.chromium.org/luci/common/clock"
    29  	"go.chromium.org/luci/common/errors"
    30  	"go.chromium.org/luci/common/logging"
    31  	"go.chromium.org/luci/common/retry/transient"
    32  	"go.chromium.org/luci/common/sync/parallel"
    33  	"go.chromium.org/luci/gae/service/datastore"
    34  	"go.chromium.org/luci/server/tq"
    35  
    36  	"go.chromium.org/luci/cv/internal/common"
    37  	"go.chromium.org/luci/cv/internal/gerrit"
    38  	"go.chromium.org/luci/cv/internal/metrics"
    39  )
    40  
    41  const (
    42  	// BatchUpdateCLTaskClass is the Task Class ID of the BatchUpdateCLTask,
    43  	// which is enqueued only during a transaction.
    44  	BatchUpdateCLTaskClass = "batch-update-cl"
    45  	// UpdateCLTaskClass is the Task Class ID of the UpdateCLTask.
    46  	UpdateCLTaskClass = "update-cl"
    47  
    48  	// blindRefreshInterval sets interval between blind refreshes of a CL.
    49  	blindRefreshInterval = time.Minute
    50  
    51  	// knownRefreshInterval sets interval between refreshes of a CL when
    52  	// updatedHint is known.
    53  	knownRefreshInterval = 15 * time.Minute
    54  
    55  	// autoRefreshAfter makes CLs worthy of "blind" refresh.
    56  	//
    57  	// "blind" refresh means that CL is already stored in Datastore and is up to
    58  	// the date to the best knowledge of CV.
    59  	autoRefreshAfter = 2 * time.Hour
    60  )
    61  
    62  // UpdaterBackend abstracts out fetching CL details from code review backend.
    63  type UpdaterBackend interface {
    64  	// Kind identifies the backend.
    65  	//
    66  	// It's also the first part of the CL's ExternalID, e.g. "gerrit".
    67  	// Must not contain a slash.
    68  	Kind() string
    69  
    70  	// LookupApplicableConfig returns the latest ApplicableConfig for the previously
    71  	// saved CL.
    72  	//
    73  	// See CL.ApplicableConfig field doc for more details. Roughly, it finds which
    74  	// LUCI projects are configured to watch this CL.
    75  	//
    76  	// Updater calls LookupApplicableConfig() before Fetch() in order to avoid
    77  	// the unnecessary Fetch() call entirely, e.g. if the CL is up to date or if
    78  	// the CL is definitely not watched by a specific LUCI project.
    79  	//
    80  	// Returns non-nil ApplicableConfig normally.
    81  	// Returns nil ApplicableConfig if the previously saved CL state isn't
    82  	// sufficient to confidently determine the ApplicableConfig.
    83  	LookupApplicableConfig(ctx context.Context, saved *CL) (*ApplicableConfig, error)
    84  
    85  	// Fetch fetches the CL in the context of a given project.
    86  	Fetch(ctx context.Context, input *FetchInput) (UpdateFields, error)
    87  
    88  	// HasChanged decides whether the CL in the backend has changed from existing
    89  	// snapshot in LUCI CV.
    90  	HasChanged(cvCurrent, backendCurrent *Snapshot) bool
    91  
    92  	// TQErrorSpec allows customizing logging and error TQ-specific handling.
    93  	//
    94  	// For example, Gerrit backend may wish to retry out of quota errors without
    95  	// logging detailed stacktrace.
    96  	TQErrorSpec() common.TQIfy
    97  }
    98  
    99  // FetchInput an input for UpdaterBackend.Fetch.
   100  //
   101  // It contains fields for what to fetch with meta information.
   102  type FetchInput struct {
   103  	// CL of the ChangeList to fetch a snapshot of.
   104  	//
   105  	// If CL.ID in the input is 0, it means the CL entity doesn't exist in
   106  	// Datastore. The cl.ExternalID is always set.
   107  	CL *CL
   108  	// Project is the LUCI project to use the scoped account of for the fetch
   109  	// operation to be performed.
   110  	Project string
   111  	// UpdatedHint, if not zero time, is the backend-originating timestamp of
   112  	// the most recent CL update time. It's sourced by CV by e.g. polling or
   113  	// PubSub subscription. It is useful to detect and work around backend's
   114  	// eventual consistency.
   115  	UpdatedHint time.Time
   116  	// Requester identifies various scenarios that issued the Fetch invocation.
   117  	Requester UpdateCLTask_Requester
   118  	Hint      *UpdateCLTask_Hint
   119  }
   120  
   121  // NewFetchInput returns FetchInput for a given CL and UpdateCLTask.
   122  func NewFetchInput(cl *CL, task *UpdateCLTask) *FetchInput {
   123  	return &FetchInput{
   124  		CL:        cl,
   125  		Project:   task.GetLuciProject(),
   126  		Hint:      task.GetHint(),
   127  		Requester: task.GetRequester(),
   128  	}
   129  }
   130  
   131  // UpdateFields defines what parts of CL to update.
   132  //
   133  // At least one field must be specified.
   134  type UpdateFields struct {
   135  	// Snapshot overwrites existing CL snapshot if newer according to its
   136  	// .ExternalUpdateTime.
   137  	Snapshot *Snapshot
   138  
   139  	// ApplicableConfig overwrites existing CL ApplicableConfig if semantically
   140  	// different from existing one.
   141  	ApplicableConfig *ApplicableConfig
   142  
   143  	// AddDependentMeta adds or overwrites metadata per LUCI project in CL AsDepMeta.
   144  	// Doesn't affect metadata stored for projects not referenced here.
   145  	AddDependentMeta *Access
   146  
   147  	// DelAccess deletes Access records for the given projects.
   148  	DelAccess []string
   149  }
   150  
   151  // IsEmpty returns true if no updates are necessary.
   152  func (u UpdateFields) IsEmpty() bool {
   153  	return (u.Snapshot == nil &&
   154  		u.ApplicableConfig == nil &&
   155  		len(u.AddDependentMeta.GetByProject()) == 0 &&
   156  		len(u.DelAccess) == 0)
   157  }
   158  
   159  func (u UpdateFields) shouldUpdateSnapshot(cl *CL, backend UpdaterBackend) bool {
   160  	switch {
   161  	case u.Snapshot == nil:
   162  		return false
   163  	case cl.Snapshot == nil:
   164  		return true
   165  	case cl.Snapshot.GetOutdated() != nil:
   166  		return true
   167  	case cl.Snapshot.GetLuciProject() != u.Snapshot.GetLuciProject():
   168  		return true
   169  	case backend.HasChanged(cl.Snapshot, u.Snapshot):
   170  		return true
   171  	default:
   172  		return false
   173  	}
   174  }
   175  
   176  // Apply applies the UpdatedFields to a given CL.
   177  func (u UpdateFields) Apply(cl *CL, backend UpdaterBackend) (changed, changedSnapshot bool) {
   178  	if u.ApplicableConfig != nil && !cl.ApplicableConfig.SemanticallyEqual(u.ApplicableConfig) {
   179  		cl.ApplicableConfig = u.ApplicableConfig
   180  		changed = true
   181  	}
   182  
   183  	if u.shouldUpdateSnapshot(cl, backend) {
   184  		cl.Snapshot = u.Snapshot
   185  		changed, changedSnapshot = true, true
   186  	}
   187  
   188  	switch {
   189  	case u.AddDependentMeta == nil:
   190  	case cl.Access == nil || cl.Access.GetByProject() == nil:
   191  		cl.Access = u.AddDependentMeta
   192  		changed = true
   193  	default:
   194  		e := cl.Access.GetByProject()
   195  		for lProject, v := range u.AddDependentMeta.GetByProject() {
   196  			if v.GetNoAccessTime() == nil {
   197  				panic("NoAccessTime must be set")
   198  			}
   199  			old, exists := e[lProject]
   200  			if !exists || old.GetUpdateTime().AsTime().Before(v.GetUpdateTime().AsTime()) {
   201  				if old.GetNoAccessTime() != nil && old.GetNoAccessTime().AsTime().Before(v.GetNoAccessTime().AsTime()) {
   202  					v.NoAccessTime = old.NoAccessTime
   203  				}
   204  				e[lProject] = v
   205  				changed = true
   206  			}
   207  		}
   208  	}
   209  
   210  	if len(u.DelAccess) > 0 && len(cl.Access.GetByProject()) > 0 {
   211  		for _, p := range u.DelAccess {
   212  			if _, exists := cl.Access.GetByProject()[p]; exists {
   213  				changed = true
   214  				delete(cl.Access.ByProject, p)
   215  				if len(cl.Access.GetByProject()) == 0 {
   216  					cl.Access = nil
   217  					break
   218  				}
   219  			}
   220  		}
   221  	}
   222  
   223  	return
   224  }
   225  
   226  // Updater knows how to update CLs from relevant backend (e.g. Gerrit),
   227  // notifying other CV parts as needed.
   228  type Updater struct {
   229  	tqd     *tq.Dispatcher
   230  	mutator *Mutator
   231  
   232  	rwmutex  sync.RWMutex // guards `backends`
   233  	backends map[string]UpdaterBackend
   234  }
   235  
   236  // NewUpdater creates a new Updater.
   237  //
   238  // Starts without backends, but they ought to be added via RegisterBackend().
   239  func NewUpdater(tqd *tq.Dispatcher, m *Mutator) *Updater {
   240  	u := &Updater{
   241  		tqd:      tqd,
   242  		mutator:  m,
   243  		backends: make(map[string]UpdaterBackend, 1),
   244  	}
   245  	tqd.RegisterTaskClass(tq.TaskClass{
   246  		ID:           BatchUpdateCLTaskClass,
   247  		Prototype:    &BatchUpdateCLTask{},
   248  		Queue:        "update-cl",
   249  		Quiet:        true,
   250  		QuietOnError: true,
   251  		Kind:         tq.Transactional,
   252  		Handler: func(ctx context.Context, payload proto.Message) error {
   253  			t := payload.(*BatchUpdateCLTask)
   254  			err := u.handleBatch(ctx, t)
   255  			return common.TQifyError(ctx, err)
   256  		},
   257  	})
   258  	tqd.RegisterTaskClass(tq.TaskClass{
   259  		ID:           UpdateCLTaskClass,
   260  		Prototype:    &UpdateCLTask{},
   261  		Queue:        "update-cl",
   262  		Quiet:        true,
   263  		QuietOnError: true,
   264  		Kind:         tq.FollowsContext,
   265  		Handler: func(ctx context.Context, payload proto.Message) error {
   266  			t := payload.(*UpdateCLTask)
   267  			// NOTE: unlike other TQ handlers code in CV, the common.TQifyError is
   268  			// done inside the handler to allow per-backend definition of which errors
   269  			// are retriable.
   270  			return u.handleCL(ctx, t)
   271  		},
   272  	})
   273  	return u
   274  }
   275  
   276  // RegisterBackend registers a backend.
   277  //
   278  // Panics if backend for the same kind is already registered.
   279  func (u *Updater) RegisterBackend(b UpdaterBackend) {
   280  	kind := b.Kind()
   281  	if strings.ContainsRune(kind, '/') {
   282  		panic(fmt.Errorf("backend %T of kind %q must not contain '/'", b, kind))
   283  	}
   284  	u.rwmutex.Lock()
   285  	defer u.rwmutex.Unlock()
   286  	if _, exists := u.backends[kind]; exists {
   287  		panic(fmt.Errorf("backend %q is already registered", kind))
   288  	}
   289  	u.backends[kind] = b
   290  }
   291  
   292  // ScheduleBatch schedules update of several CLs.
   293  //
   294  // If called in a transaction, enqueues exactly one TQ task transactionally.
   295  // This allows to write 1 Datastore entity during a transaction instead of N
   296  // entities if Schedule() was used for each CL.
   297  //
   298  // Otherwise, enqueues 1 TQ task per CL non-transactionally and in parallel.
   299  func (u *Updater) ScheduleBatch(ctx context.Context, luciProject string, cls []*CL, requester UpdateCLTask_Requester) error {
   300  	tasks := make([]*UpdateCLTask, len(cls))
   301  	for i, cl := range cls {
   302  		tasks[i] = &UpdateCLTask{
   303  			LuciProject: luciProject,
   304  			ExternalId:  string(cl.ExternalID),
   305  			Id:          int64(cl.ID),
   306  			Requester:   requester,
   307  		}
   308  	}
   309  
   310  	switch {
   311  	case len(tasks) == 1:
   312  		// Optimization for the most frequent use-case of single-CL Runs.
   313  		return u.Schedule(ctx, tasks[0])
   314  	case datastore.CurrentTransaction(ctx) == nil:
   315  		return u.handleBatch(ctx, &BatchUpdateCLTask{Tasks: tasks})
   316  	default:
   317  		return u.tqd.AddTask(ctx, &tq.Task{
   318  			Payload: &BatchUpdateCLTask{Tasks: tasks},
   319  			Title:   fmt.Sprintf("batch-%s-%d", luciProject, len(tasks)),
   320  		})
   321  	}
   322  }
   323  
   324  // Schedule dispatches a TQ task. It should be used instead of the direct
   325  // tq.AddTask to allow for consistent de-duplication.
   326  func (u *Updater) Schedule(ctx context.Context, payload *UpdateCLTask) error {
   327  	return u.ScheduleDelayed(ctx, payload, 0)
   328  }
   329  
   330  // ScheduleDelayed is the same as Schedule but with a delay.
   331  func (u *Updater) ScheduleDelayed(ctx context.Context, payload *UpdateCLTask, delay time.Duration) error {
   332  	task := &tq.Task{
   333  		Payload: payload,
   334  		Delay:   delay,
   335  		Title:   makeTQTitleForHumans(payload),
   336  	}
   337  	if payload.Requester == UpdateCLTask_REQUESTER_CLASS_UNSPECIFIED {
   338  		panic(fmt.Errorf("BUG: UpdateCLTask.Requester unspecified: %s", payload))
   339  	}
   340  	if datastore.CurrentTransaction(ctx) == nil {
   341  		task.DeduplicationKey = makeTaskDeduplicationKey(ctx, payload, delay)
   342  	}
   343  	return u.tqd.AddTask(ctx, task)
   344  }
   345  
   346  // ResolveAndScheduleDepsUpdate resolves deps, creating new CL entities as
   347  // necessary, and schedules an update task for each dep which needs an update.
   348  //
   349  // It's meant to be used by the Updater backends.
   350  //
   351  // Returns a sorted slice of Deps by their CL ID, ready to be stored as
   352  // CL.Snapshot.Deps.
   353  func (u *Updater) ResolveAndScheduleDepsUpdate(ctx context.Context, luciProject string, deps map[ExternalID]DepKind, requester UpdateCLTask_Requester) ([]*Dep, error) {
   354  	// Optimize for the most frequent case whereby deps are already known to CV
   355  	// and were updated recently enough that no task scheduling is even necessary.
   356  
   357  	// Batch-resolve external IDs to CLIDs, and load all existing CLs.
   358  	resolvingDeps, err := resolveDeps(ctx, luciProject, deps)
   359  	if err != nil {
   360  		return nil, err
   361  	}
   362  	// Identify indexes of deps which need to have an update task scheduled.
   363  	ret := make([]*Dep, len(deps))
   364  	var toSchedule []int // indexes
   365  	for i, d := range resolvingDeps {
   366  		if d.ready {
   367  			ret[i] = d.resolvedDep
   368  		} else {
   369  			// Also covers the case of a dep not yet having a CL entity.
   370  			toSchedule = append(toSchedule, i)
   371  		}
   372  	}
   373  	if len(toSchedule) == 0 {
   374  		// Quick path exit.
   375  		return sortDeps(ret), nil
   376  	}
   377  
   378  	errs := parallel.WorkPool(min(10, len(toSchedule)), func(work chan<- func() error) {
   379  		for _, i := range toSchedule {
   380  			i, d := i, resolvingDeps[i]
   381  			work <- func() error {
   382  				if err := d.createIfNotExists(ctx, u.mutator, luciProject); err != nil {
   383  					return err
   384  				}
   385  				if err := d.schedule(ctx, u, luciProject, requester); err != nil {
   386  					return err
   387  				}
   388  				ret[i] = d.resolvedDep
   389  				return nil
   390  			}
   391  		}
   392  	})
   393  	if errs != nil {
   394  		return nil, common.MostSevereError(err)
   395  	}
   396  	return sortDeps(ret), nil
   397  }
   398  
   399  ///////////////////////////////////////////////////////////////////////////////
   400  // implementation details.
   401  
   402  func (u *Updater) handleBatch(ctx context.Context, batch *BatchUpdateCLTask) error {
   403  	total := len(batch.GetTasks())
   404  	err := parallel.WorkPool(min(16, total), func(work chan<- func() error) {
   405  		for _, task := range batch.GetTasks() {
   406  			task := task
   407  			work <- func() error { return u.Schedule(ctx, task) }
   408  		}
   409  	})
   410  	switch merrs, ok := err.(errors.MultiError); {
   411  	case err == nil:
   412  		return nil
   413  	case !ok:
   414  		return err
   415  	default:
   416  		failed, _ := merrs.Summary()
   417  		err = common.MostSevereError(merrs)
   418  		return errors.Annotate(err, "failed to schedule UpdateCLTask for %d out of %d CLs, keeping the most severe error", failed, total).Err()
   419  	}
   420  }
   421  
   422  // TestingForceUpdate runs the CL Updater synchronously.
   423  //
   424  // For use in tests only. Production code should use Schedule() to benefit from
   425  // task de-duplication.
   426  //
   427  // TODO(crbug/1284393): revisit the usefulness of the sync refresh after
   428  // consistency-on-demand is provided by Gerrit.
   429  func (u *Updater) TestingForceUpdate(ctx context.Context, task *UpdateCLTask) error {
   430  	return u.handleCL(ctx, task)
   431  }
   432  
   433  func (u *Updater) handleCL(ctx context.Context, task *UpdateCLTask) error {
   434  	cl, err := u.preload(ctx, task)
   435  	if err != nil {
   436  		return common.TQifyError(ctx, err)
   437  	}
   438  	// cl.ID == 0 means CL doesn't yet exist.
   439  	ctx = logging.SetFields(ctx, logging.Fields{
   440  		"project": task.GetLuciProject(),
   441  		"id":      cl.ID,
   442  		"eid":     cl.ExternalID,
   443  	})
   444  
   445  	backend, err := u.backendFor(cl)
   446  	if err != nil {
   447  		return common.TQifyError(ctx, err)
   448  	}
   449  
   450  	switch err := u.handleCLWithBackend(ctx, task, cl, backend); {
   451  	case err == errHackRetryForOutOfQuota:
   452  		return tq.Ignore.Apply(err)
   453  	case err != nil:
   454  		return backend.TQErrorSpec().Error(ctx, err)
   455  	}
   456  	return nil
   457  }
   458  
   459  var errHackRetryForOutOfQuota = errors.New("hack retry for out of quota")
   460  
   461  func (u *Updater) handleCLWithBackend(ctx context.Context, task *UpdateCLTask, cl *CL, backend UpdaterBackend) error {
   462  	// Save ID and ExternalID before giving CL to backend to avoid accidental corruption.
   463  	id, eid := cl.ID, cl.ExternalID
   464  	skip, updateFields, err := u.trySkippingFetch(ctx, task, cl, backend)
   465  	var fetchDuration time.Duration
   466  	switch {
   467  	case err != nil:
   468  		return err
   469  	case !skip:
   470  		now := clock.Now(ctx)
   471  		updateFields, err = backend.Fetch(ctx, NewFetchInput(cl, task))
   472  		fetchDuration = clock.Since(ctx, now)
   473  		switch {
   474  		case err != nil && errors.Unwrap(err) == gerrit.ErrOutOfQuota && task.GetLuciProject() == "chromeos":
   475  			// HACK: don't retry on out of quota error, instead schedule another task
   476  			// with delay so that it will be deduplicated in cloud task with any
   477  			// subsequent tasks.
   478  			if scheduleErr := u.ScheduleDelayed(ctx, task, blindRefreshInterval); scheduleErr != nil {
   479  				return errors.Annotate(err, "%T.Fetch failed", backend).Err()
   480  			}
   481  			return errHackRetryForOutOfQuota
   482  		case err != nil:
   483  			return errors.Annotate(err, "%T.Fetch failed", backend).Err()
   484  		}
   485  	}
   486  
   487  	if updateFields.IsEmpty() {
   488  		logging.Debugf(ctx, "No update is necessary")
   489  		return nil
   490  	}
   491  
   492  	// Transactionally update the CL.
   493  	var changed, changedSnapshot bool
   494  	transClbk := func(latest *CL) error {
   495  		if changed, changedSnapshot = updateFields.Apply(latest, backend); !changed {
   496  			// Someone, possibly even us in case of Datastore transaction retry, has
   497  			// already updated this CL.
   498  			return ErrStopMutation
   499  		}
   500  		return nil
   501  	}
   502  	if cl.ID == 0 {
   503  		_, err = u.mutator.Upsert(ctx, task.GetLuciProject(), eid, transClbk)
   504  	} else {
   505  		_, err = u.mutator.Update(ctx, task.GetLuciProject(), id, transClbk)
   506  	}
   507  
   508  	if err != nil {
   509  		return err
   510  	}
   511  
   512  	switch {
   513  	case updateFields.Snapshot == nil:
   514  		// Skip reporting the fetch metrics. It's either the fetch operation
   515  		// failed or skipped.
   516  	case skip:
   517  		// Fetch was not performed; skip reporting the metrics.
   518  	case changed:
   519  		// Report the latency metrics only if the fetch actually returned
   520  		// new data. If the data was the same as the existing snapshot,
   521  		// the fetch wasn't needed, indeed.
   522  		delay := clock.Now(ctx).Sub(updateFields.Snapshot.ExternalUpdateTime.AsTime())
   523  		if delay < 0 {
   524  			logging.Errorf(ctx, "negative CL fetch duration (%d) detected", delay)
   525  			delay = 0
   526  		}
   527  		metrics.Internal.CLIngestionLatency.Add(
   528  			ctx, delay.Seconds(), task.GetRequester().String(), task.GetIsForDep(),
   529  			task.GetLuciProject(), changedSnapshot)
   530  		metrics.Internal.CLIngestionLatencyWithoutFetch.Add(
   531  			ctx, (delay - fetchDuration).Seconds(), task.GetRequester().String(),
   532  			task.GetIsForDep(), task.GetLuciProject(), changedSnapshot)
   533  		fallthrough
   534  	default:
   535  		metrics.Internal.CLIngestionAttempted.Add(
   536  			ctx, 1, task.GetRequester().String(), changed, task.GetIsForDep(),
   537  			task.GetLuciProject(), changedSnapshot)
   538  	}
   539  	return nil
   540  }
   541  
   542  // trySkippingFetch checks if a fetch from the backend can be skipped.
   543  //
   544  // Returns true if so.
   545  // NOTE: UpdateFields may be set if fetch can be skipped, meaning CL entity
   546  // should be updated in Datastore.
   547  func (u *Updater) trySkippingFetch(ctx context.Context, task *UpdateCLTask, cl *CL, backend UpdaterBackend) (bool, UpdateFields, error) {
   548  	if cl.ID == 0 || cl.Snapshot == nil || cl.Snapshot.GetOutdated() != nil {
   549  		return false, UpdateFields{}, nil
   550  	}
   551  
   552  	hintedTS := task.GetHint().GetExternalUpdateTime()
   553  	hintedRevID := task.GetHint().GetMetaRevId()
   554  	switch {
   555  	case hintedTS == nil && hintedRevID == "":
   556  		// fetch always if there is no hint available.
   557  		return false, UpdateFields{}, nil
   558  	case hintedRevID != "" && hintedRevID != cl.Snapshot.GetGerrit().GetInfo().GetMetaRevId():
   559  		// fetch always if MetaRev is different to the rev id of the stored
   560  		// snapshot. If the fetched snapshot is older than the stored snapshot,
   561  		// it will be skipped to update the DS entity with the fetched snapshot.
   562  		return false, UpdateFields{}, nil
   563  	case hintedTS != nil && hintedTS.AsTime().After(cl.Snapshot.GetExternalUpdateTime().AsTime()):
   564  		// There is no confidence that Snapshot is up-to-date, so proceed fetching
   565  		// anyway.
   566  
   567  		// NOTE: it's tempting to check first whether the LUCI project is watching
   568  		// the CL given the existing Snapshot and skip the fetch if it's not the
   569  		// case. However, for Gerrit CLs, the ref is mutable after the CL
   570  		// creation and since ref is used to determine if CL is being watched,
   571  		// we can't skip the fetch. For an example, see Gerrit move API
   572  		// https://gerrit-review.googlesource.com/Documentation/rest-api-changes.html#move-change
   573  		return false, UpdateFields{}, nil
   574  	}
   575  
   576  	// CL Snapshot is up to date, but does it belong to the right LUCI project?
   577  	acfg, err := backend.LookupApplicableConfig(ctx, cl)
   578  	if err != nil {
   579  		err = errors.Annotate(err, "%T.LookupApplicableConfig failed", backend).Err()
   580  		return false, UpdateFields{}, err
   581  	}
   582  	if acfg == nil {
   583  		// Insufficient saved CL, need to fetch before deciding if CL is watched.
   584  		return false, UpdateFields{}, err
   585  	}
   586  
   587  	// Update CL with the new set of watching projects if materially different,
   588  	// which should be saved to Datastore even if the fetch from Gerrit itself is
   589  	// skipped.
   590  	var toUpdate UpdateFields
   591  	if !cl.ApplicableConfig.SemanticallyEqual(acfg) {
   592  		toUpdate.ApplicableConfig = acfg
   593  	}
   594  
   595  	if !acfg.HasProject(task.GetLuciProject()) {
   596  		// This project isn't watching the CL, so no need to fetch.
   597  		//
   598  		// NOTE: even if the Snapshot was fetched in the context of this project before,
   599  		// we don't have to erase the Snapshot from the CL immediately: the update
   600  		// in cl.ApplicableConfig suffices to ensure that CV won't be using the
   601  		// Snapshot.
   602  		return true, toUpdate, nil
   603  	}
   604  
   605  	if !acfg.HasProject(cl.Snapshot.GetLuciProject()) {
   606  		// The Snapshot was previously fetched in the context of a project which is
   607  		// no longer watching the CL.
   608  		//
   609  		// This can happen in practice in case of e.g. newly created "chromium-mXXX"
   610  		// project to watch for a specific ref which was previously watched by a
   611  		// generic "chromium" project. A Snapshot of a CL on such a ref would have
   612  		// been fetched in the context of "chromium" first, and now it must be re-fetched
   613  		// under "chromium-mXXX" to verify that the new project hasn't lost access
   614  		// to the Gerrit CL.
   615  		logging.Warningf(ctx, "Detected switch from %q LUCI project", cl.Snapshot.GetLuciProject())
   616  		return false, toUpdate, nil
   617  	}
   618  
   619  	// At this point, these must be true:
   620  	// * the Snapshot is up-to-date to the best of CV knowledge;
   621  	// * this project is watching the CL, but there may be other projects, too;
   622  	// * the Snapshot was created by a project still watching the CL, but which may
   623  	//   differ from this project.
   624  	if len(acfg.GetProjects()) >= 2 {
   625  		// When there are several watching projects, projects shouldn't race
   626  		// re-fetching & saving Snapshot. No new Runs are going to be started on
   627  		// such CLs, so skip fetching new snapshot.
   628  		return true, toUpdate, nil
   629  	}
   630  
   631  	// There is just 1 project, so check the invariant.
   632  	if task.GetLuciProject() != cl.Snapshot.GetLuciProject() {
   633  		panic(fmt.Errorf("BUG: this project %q must have created the Snapshot, not %q", task.GetLuciProject(), cl.Snapshot.GetLuciProject()))
   634  	}
   635  
   636  	if restriction := cl.Access.GetByProject()[task.GetLuciProject()]; restriction != nil {
   637  		// For example, Gerrit has responded HTTP 403/404 before.
   638  		// Must fetch again to verify if restriction still holds.
   639  		logging.Debugf(ctx, "Detected prior access restriction: %s", restriction)
   640  		return false, toUpdate, nil
   641  	}
   642  
   643  	// Finally, do refresh if the CL entity is just really old and the meta rev
   644  	// id is unset.
   645  	switch {
   646  	case hintedRevID != "":
   647  	// skip the fetch if the meta rev id is the same as the rev id of the stored
   648  	// snapshot.
   649  	case clock.Since(ctx, cl.UpdateTime) > autoRefreshAfter:
   650  		// Strictly speaking, cl.UpdateTime isn't just changed on refresh, but
   651  		// also whenever Run starts/ends. However, the start of Run is usually
   652  		// happenening right after recent refresh, and end of Run is usually
   653  		// followed by the refresh.
   654  		return false, toUpdate, nil
   655  	}
   656  
   657  	// OK, skip the fetch.
   658  	return true, toUpdate, nil
   659  }
   660  
   661  func (*Updater) preload(ctx context.Context, task *UpdateCLTask) (*CL, error) {
   662  	if task.GetLuciProject() == "" {
   663  		return nil, errors.New("invalid task input: LUCI project must be given")
   664  	}
   665  	eid := ExternalID(task.GetExternalId())
   666  	id := common.CLID(task.GetId())
   667  	switch {
   668  	case id != 0:
   669  		cl := &CL{ID: common.CLID(id)}
   670  		switch err := datastore.Get(ctx, cl); {
   671  		case err == datastore.ErrNoSuchEntity:
   672  			return nil, errors.Annotate(err, "CL %d %q doesn't exist in Datastore", id, task.GetExternalId()).Err()
   673  		case err != nil:
   674  			return nil, errors.Annotate(err, "failed to load CL %d", id).Tag(transient.Tag).Err()
   675  		case eid != "" && eid != cl.ExternalID:
   676  			return nil, errors.Reason("invalid task input: CL %d actually has %q ExternalID, not %q", id, cl.ExternalID, eid).Err()
   677  		default:
   678  			return cl, nil
   679  		}
   680  	case eid == "":
   681  		return nil, errors.Reason("invalid task input: either internal ID or ExternalID must be given").Err()
   682  	default:
   683  		switch cl, err := eid.Load(ctx); {
   684  		case err != nil:
   685  			return nil, errors.Annotate(err, "failed to load CL %q", eid).Tag(transient.Tag).Err()
   686  		case cl == nil:
   687  			// New CL to be created.
   688  			return &CL{
   689  				ExternalID: eid,
   690  				ID:         0, // will be populated later.
   691  				EVersion:   0,
   692  			}, nil
   693  		default:
   694  			return cl, nil
   695  		}
   696  	}
   697  }
   698  
   699  func (u *Updater) backendFor(cl *CL) (UpdaterBackend, error) {
   700  	kind, err := cl.ExternalID.kind()
   701  	if err != nil {
   702  		return nil, err
   703  	}
   704  	u.rwmutex.RLock()
   705  	defer u.rwmutex.RUnlock()
   706  	if b, exists := u.backends[kind]; exists {
   707  		return b, nil
   708  	}
   709  	return nil, errors.Reason("%q backend is not supported", kind).Err()
   710  }
   711  
   712  // makeTaskDeduplicationKey returns TQ task deduplication key.
   713  func makeTaskDeduplicationKey(ctx context.Context, t *UpdateCLTask, delay time.Duration) string {
   714  	var sb strings.Builder
   715  	sb.WriteString("v0")
   716  	sb.WriteRune('\n')
   717  	sb.WriteString(t.GetLuciProject())
   718  	sb.WriteRune('\n')
   719  
   720  	// Prefer ExternalID if both ID and ExternalID are known, as the most frequent
   721  	// use-case for update via PubSub/Polling, which specifies ExternalID and may
   722  	// not resolve it to internal ID just yet.
   723  	uniqArg := t.GetExternalId()
   724  	if uniqArg == "" {
   725  		uniqArg = strconv.FormatInt(t.GetId(), 16)
   726  	}
   727  	sb.WriteString(uniqArg)
   728  
   729  	// If the meta rev ID is set, dedup with a time window isn't necessary.
   730  	// 1) Gerrit guarantees one publish for each of CL update events.
   731  	// 2) # of redelivered messages should be low enough to ignore.
   732  	// 3) If the same message is redelivered multiple times, the backend
   733  	// will skip fetching the snapshot after the first message.
   734  	// 4) If it's concerned that retries can fast burn out Gerrit quota,
   735  	// pubsub retry config should be tuned, instead.
   736  	if revID := t.GetHint().GetMetaRevId(); revID != "" {
   737  		_, _ = fmt.Fprintf(&sb, "\n%s", revID)
   738  		return sb.String()
   739  	}
   740  
   741  	// Dedup in the short term to avoid excessive number of refreshes,
   742  	// but ensure eventually calling Schedule with the same payload results in a
   743  	// new task. This is done by de-duping only within a single "epoch" window,
   744  	// which differs by CL to avoid synchronized herd of requests hitting
   745  	// a backend (e.g. Gerrit).
   746  	//
   747  	// +----------------------------------------------------------------------+
   748  	// |                 ... -> time goes forward -> ....                     |
   749  	// +----------------------------------------------------------------------+
   750  	// |                                                                      |
   751  	// | ... | epoch (N-1, CL-A) | epoch (N, CL-A) | epoch (N+1, CL-A) | ...  |
   752  	// |                                                                      |
   753  	// |            ... | epoch (N-1, CL-B) | epoch (N, CL-B) | ...           |
   754  	// +----------------------------------------------------------------------+
   755  	//
   756  	// Furthermore, de-dup window differs based on whether updatedHint is given
   757  	// or it's a blind refresh.
   758  	interval := blindRefreshInterval
   759  	if t.GetHint().GetExternalUpdateTime() != nil {
   760  		interval = knownRefreshInterval
   761  	}
   762  	epochOffset := common.DistributeOffset(interval, "update-cl", t.GetLuciProject(), uniqArg)
   763  	epochTS := clock.Now(ctx).Add(delay).Truncate(interval).Add(interval + epochOffset)
   764  	_, _ = fmt.Fprintf(&sb, "\n%x", epochTS.UnixNano())
   765  	if h := t.GetHint().GetExternalUpdateTime(); h != nil {
   766  		_, _ = fmt.Fprintf(&sb, "\n%x", h.AsTime().UnixNano())
   767  	}
   768  	return sb.String()
   769  }
   770  
   771  // makeTQTitleForHumans makes human-readable TQ task title.
   772  //
   773  // WARNING: do not use for anything else. Doesn't guarantee uniqueness.
   774  //
   775  // It will be visible in logs as the suffix of URL in Cloud Tasks console and
   776  // in the GAE requests log.
   777  //
   778  // The primary purpose is that quick search for specific CL in the GAE request
   779  // log alone, as opposed to searching through much larger and separate stderr
   780  // log of the process (which is where logging.Logf calls go into).
   781  //
   782  // For example,
   783  //
   784  //	"proj/gerrit/chromium/1111111/u2016-02-03T04:05:06Z/deadbeef"
   785  //	"proj/gerrit/chromium/1111111/u2016-02-03T04:05:06Z"
   786  //	"proj/gerrit/chromium/1111111/deadbeef"
   787  func makeTQTitleForHumans(t *UpdateCLTask) string {
   788  	var sb strings.Builder
   789  	sb.WriteString(t.GetLuciProject())
   790  	if id := t.GetId(); id != 0 {
   791  		_, _ = fmt.Fprintf(&sb, "/%d", id)
   792  	}
   793  	if eid := t.GetExternalId(); eid != "" {
   794  		sb.WriteRune('/')
   795  		// Reduce verbosity in common case of Gerrit on googlesource.
   796  		// Although it's possible to delegate this to backend, the additional
   797  		// boilerplate isn't yet justified.
   798  		if kind, err := ExternalID(eid).kind(); err == nil && kind == "gerrit" {
   799  			eid = strings.Replace(eid, "-review.googlesource.com/", "/", 1)
   800  		}
   801  		sb.WriteString(eid)
   802  	}
   803  	if hintedTS := t.GetHint().GetExternalUpdateTime(); hintedTS != nil {
   804  		sb.WriteString("/u")
   805  		sb.WriteString(hintedTS.AsTime().UTC().Format(time.RFC3339))
   806  	}
   807  	if hintedRevID := t.GetHint().GetMetaRevId(); hintedRevID != "" {
   808  		sb.WriteString("/")
   809  		sb.WriteString(hintedRevID)
   810  	}
   811  	return sb.String()
   812  }
   813  
   814  const maxDepsLoadingBatchSize = 100
   815  
   816  func resolveDeps(ctx context.Context, luciProject string, deps map[ExternalID]DepKind) ([]resolvingDep, error) {
   817  	eids := make([]ExternalID, 0, len(deps))
   818  	ret := make([]resolvingDep, 0, len(deps))
   819  	for eid, kind := range deps {
   820  		eids = append(eids, eid)
   821  		ret = append(ret, resolvingDep{eid: eid, kind: kind})
   822  	}
   823  
   824  	ids, err := Lookup(ctx, eids)
   825  	if err != nil {
   826  		return nil, err
   827  	}
   828  	depCLs := make([]CL, 0, maxDepsLoadingBatchSize)
   829  	depCLIndices := make([]int, 0, maxDepsLoadingBatchSize)
   830  	for i, id := range ids {
   831  		if id > 0 {
   832  			cl := CL{ID: id}
   833  			depCLs = append(depCLs, cl)
   834  			depCLIndices = append(depCLIndices, i)
   835  			ret[i].resolvedDep = &Dep{Clid: int64(id), Kind: ret[i].kind}
   836  		}
   837  		if len(depCLs) == maxDepsLoadingBatchSize || (len(depCLs) > 0 && i == len(ids)-1) {
   838  			// cut a batch if max is reached or end of ids.
   839  			if err := datastore.Get(ctx, depCLs); err != nil {
   840  				// Mark error as transient because by this time, all CLIDs should have
   841  				// corresponding CL entities in datastore.
   842  				return nil, errors.Annotate(err, "failed to load %d CLs", len(depCLs)).Tag(transient.Tag).Err()
   843  			}
   844  			for j, depCL := range depCLs {
   845  				ret[depCLIndices[j]].ready = !depNeedsRefresh(ctx, depCL, luciProject)
   846  			}
   847  			depCLs = depCLs[:0]
   848  			depCLIndices = depCLIndices[:0]
   849  		}
   850  	}
   851  	return ret, nil
   852  }
   853  
   854  // resolvingDep represents a dependency known by its external ID only being
   855  // resolved.
   856  //
   857  // Helper struct for the Updater.ResolveAndScheduleDeps.
   858  type resolvingDep struct {
   859  	eid         ExternalID
   860  	kind        DepKind
   861  	ready       bool // true if already up to date and .dep is populated.
   862  	resolvedDep *Dep // if nil, use createIfNotExists() to populate
   863  }
   864  
   865  func (d *resolvingDep) createIfNotExists(ctx context.Context, m *Mutator, luciProject string) error {
   866  	if d.resolvedDep != nil {
   867  		return nil // already exists
   868  	}
   869  	cl, err := m.Upsert(ctx, luciProject, d.eid, func(cl *CL) error {
   870  		// TODO: somehow record when CL was inserted to put a boundary on how long
   871  		// Project Manager should be waiting for the dep to be actually fetched &
   872  		// its entity updated in Datastore.
   873  		if cl.EVersion > 0 {
   874  			// If CL already exists, we don't need to modify it % above comment.
   875  			return ErrStopMutation
   876  		}
   877  		return nil
   878  	})
   879  	if err != nil {
   880  		return err
   881  	}
   882  	d.resolvedDep = &Dep{Clid: int64(cl.ID), Kind: d.kind}
   883  	return nil
   884  }
   885  
   886  func (d *resolvingDep) schedule(ctx context.Context, u *Updater, luciProject string, requester UpdateCLTask_Requester) error {
   887  	return u.Schedule(ctx, &UpdateCLTask{
   888  		ExternalId:  string(d.eid),
   889  		Id:          d.resolvedDep.GetClid(),
   890  		LuciProject: luciProject,
   891  		Requester:   requester,
   892  		IsForDep:    true,
   893  	})
   894  }
   895  
   896  // sortDeps sorts given slice by CLID ASC in place and returns it.
   897  func sortDeps(deps []*Dep) []*Dep {
   898  	sort.Slice(deps, func(i, j int) bool {
   899  		return deps[i].GetClid() < deps[j].GetClid()
   900  	})
   901  	return deps
   902  }
   903  
   904  // depNeedsRefresh returns true if the dependency CL needs a refresh in the
   905  // context of a specific LUCI project.
   906  func depNeedsRefresh(ctx context.Context, dep CL, luciProject string) bool {
   907  	switch {
   908  	case dep.Snapshot == nil:
   909  		return true
   910  	case dep.Snapshot.GetOutdated() != nil:
   911  		return true
   912  	case dep.Snapshot.GetLuciProject() != luciProject:
   913  		return true
   914  	default:
   915  		return false
   916  	}
   917  }