go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/gerrit/updater/fetcher.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package updater
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"sort"
    21  	"strings"
    22  	"time"
    23  
    24  	"golang.org/x/sync/errgroup"
    25  	"google.golang.org/grpc"
    26  	"google.golang.org/grpc/codes"
    27  	"google.golang.org/protobuf/types/known/timestamppb"
    28  
    29  	"go.chromium.org/luci/common/clock"
    30  	"go.chromium.org/luci/common/data/stringset"
    31  	"go.chromium.org/luci/common/errors"
    32  	"go.chromium.org/luci/common/logging"
    33  	gerritpb "go.chromium.org/luci/common/proto/gerrit"
    34  	"go.chromium.org/luci/grpc/grpcutil"
    35  
    36  	"go.chromium.org/luci/cv/internal/changelist"
    37  	"go.chromium.org/luci/cv/internal/common"
    38  	"go.chromium.org/luci/cv/internal/configs/prjcfg"
    39  	"go.chromium.org/luci/cv/internal/gerrit"
    40  	"go.chromium.org/luci/cv/internal/gerrit/cqdepend"
    41  	"go.chromium.org/luci/cv/internal/gerrit/gobmap"
    42  	"go.chromium.org/luci/cv/internal/gerrit/metadata"
    43  )
    44  
    45  const (
    46  	// noAccessGraceDuration works around eventually consistent Gerrit,
    47  	// whereby Gerrit can temporarily return 404 for a CL that actually exists.
    48  	noAccessGraceDuration = 1 * time.Minute
    49  
    50  	// noAccessGraceRetryDelay determines when to schedule the next retry task.
    51  	//
    52  	// Set it at approximately ~2 tries before noAccessGraceDuration expires.
    53  	noAccessGraceRetryDelay = noAccessGraceDuration / 3
    54  )
    55  
    56  var errStaleOrNoAccess = errors.Annotate(gerrit.ErrStaleData, "either no access or deleted or stale").Err()
    57  
    58  // fetcher efficiently computes new snapshot by fetching data from Gerrit.
    59  //
    60  // It ensures each dependency is resolved to an existing CLID,
    61  // creating CLs in the Datastore as needed. Schedules tasks to update
    62  // dependencies but doesn't wait for them to complete.
    63  //
    64  // fetch is a single-use object:
    65  //
    66  //	f := fetcher{...}
    67  //	if err := f.fetch(ctx); err != nil {...}
    68  //	// Do something with `f.toUpdate`.
    69  type fetcher struct {
    70  	// Dependencies & input. Must be set.
    71  	gFactory                     gerrit.Factory
    72  	g                            gerrit.Client
    73  	scheduleRefresh              func(context.Context, *changelist.UpdateCLTask, time.Duration) error
    74  	resolveAndScheduleDepsUpdate func(ctx context.Context, project string, deps map[changelist.ExternalID]changelist.DepKind, requester changelist.UpdateCLTask_Requester) ([]*changelist.Dep, error)
    75  	project                      string
    76  	host                         string
    77  	change                       int64
    78  	hint                         *changelist.UpdateCLTask_Hint
    79  	requester                    changelist.UpdateCLTask_Requester
    80  	externalID                   changelist.ExternalID
    81  	priorCL                      *changelist.CL // not-nil, if CL already exists in Datastore.
    82  
    83  	// Result is stored here.
    84  	toUpdate changelist.UpdateFields
    85  }
    86  
    87  func (f *fetcher) fetch(ctx context.Context) error {
    88  	ci, err := f.fetchChangeInfo(ctx,
    89  		// These are expensive to compute for Gerrit,
    90  		// CV should not do this needlessly.
    91  		gerritpb.QueryOption_ALL_REVISIONS,
    92  		gerritpb.QueryOption_CURRENT_COMMIT,
    93  		gerritpb.QueryOption_DETAILED_LABELS,
    94  		gerritpb.QueryOption_DETAILED_ACCOUNTS,
    95  		gerritpb.QueryOption_MESSAGES,
    96  		gerritpb.QueryOption_SUBMITTABLE,
    97  		gerritpb.QueryOption_SUBMIT_REQUIREMENTS,
    98  		// Avoid asking Gerrit to perform expensive operation.
    99  		gerritpb.QueryOption_SKIP_MERGEABLE,
   100  	)
   101  	switch {
   102  	case err != nil:
   103  		return err
   104  	case ci == nil:
   105  		// Don't proceed to fetching the other details.
   106  		// It's likely due to one of the following.
   107  		// - CV lacks access to the CL
   108  		// - this LUCI project is no longer watching the CL
   109  		// - the task was hinted with an old MetaRevID due to a pubsub message
   110  		// delivered out of order
   111  		return nil
   112  	}
   113  
   114  	f.toUpdate.Snapshot = &changelist.Snapshot{
   115  		LuciProject:        f.project,
   116  		ExternalUpdateTime: ci.GetUpdated(),
   117  		Metadata: metadata.Extract(
   118  			ci.GetRevisions()[ci.GetCurrentRevision()].GetCommit().GetMessage(),
   119  		),
   120  		Kind: &changelist.Snapshot_Gerrit{
   121  			Gerrit: &changelist.Gerrit{
   122  				Host: f.host,
   123  				Info: ci,
   124  			},
   125  		},
   126  	}
   127  	f.toUpdate.DelAccess = []string{f.project}
   128  	if err := f.fetchPostChangeInfo(ctx, ci); err != nil {
   129  		return err
   130  	}
   131  	// Finally, remove all info no longer necessary for CV.
   132  	changelist.RemoveUnusedGerritInfo(ci)
   133  	return nil
   134  }
   135  
   136  func (f *fetcher) fetchPostChangeInfo(ctx context.Context, ci *gerritpb.ChangeInfo) error {
   137  	min, cur, err := gerrit.EquivalentPatchsetRange(ci)
   138  	if err != nil {
   139  		return errors.Annotate(err, "failed to compute equivalent patchset range on %s", f).Err()
   140  	}
   141  	f.toUpdate.Snapshot.MinEquivalentPatchset = int32(min)
   142  	f.toUpdate.Snapshot.Patchset = int32(cur)
   143  
   144  	switch ci.GetStatus() {
   145  	case gerritpb.ChangeStatus_NEW:
   146  		// OK, proceed.
   147  	case gerritpb.ChangeStatus_ABANDONED, gerritpb.ChangeStatus_MERGED:
   148  		// CV doesn't care about such CLs beyond their status, so don't fetch
   149  		// additional details to avoid stumbiling into edge cases with how Gerrit
   150  		// treats abandoned and submitted CLs.
   151  		logging.Debugf(ctx, "%s is %s", f, ci.GetStatus())
   152  		return nil
   153  	default:
   154  		logging.Warningf(ctx, "%s has unknown status %d %s", f, ci.GetStatus().Number(), ci.GetStatus().String())
   155  		return nil
   156  	}
   157  
   158  	// Check if we can re-use info from previous snapshot.
   159  	reused := false
   160  	switch before := f.priorSnapshot().GetGerrit(); {
   161  	case before == nil:
   162  	case before.GetInfo().GetCurrentRevision() != f.mustHaveCurrentRevision():
   163  	case before.GetInfo().GetStatus() != gerritpb.ChangeStatus_NEW:
   164  	default:
   165  		reused = true
   166  		// Re-use past results since CurrentRevision is the same.
   167  		f.toUpdate.Snapshot.GetGerrit().Files = before.GetFiles()
   168  		// NOTE: CQ-Depend deps are fixed per revision. Once soft deps are accepted
   169  		// via hashtags or topics, the re-use won't be possible.
   170  		f.toUpdate.Snapshot.GetGerrit().SoftDeps = before.GetSoftDeps()
   171  	}
   172  
   173  	eg, ectx := errgroup.WithContext(ctx)
   174  	// Always fetch related changes here. It turns out in b/272828859 that
   175  	// Gerrit GetRelatedChange may return inconsistent response if it is called
   176  	// immediately after a CL is updated. Therefore, the first CL Update attempt
   177  	// may incorrectly set the dependencies. By always fetching related changes,
   178  	// we are hoping any subsequent CL Update attempt would receive up-to-date
   179  	// related change infos so that the Dep info is correctly populated.
   180  	eg.Go(func() error { return f.fetchRelated(ectx) })
   181  
   182  	if !reused {
   183  		eg.Go(func() error { return f.fetchFiles(ectx) })
   184  		// Meanwhile, compute soft deps. Currently, it's cheap operation.
   185  		// In the future, it may require sending another RPC to Gerrit,
   186  		// e.g. to fetch related CLs by topic.
   187  		if err = f.setSoftDeps(); err != nil {
   188  			return err
   189  		}
   190  	}
   191  
   192  	if err = eg.Wait(); err != nil {
   193  		return err
   194  	}
   195  	// Always run resolveDeps regardless of re-use of GitDeps/SoftDeps.
   196  	// CV retention policy deletes CLs not modified for a long time,
   197  	// which in some very rare case may affect a dep of this CL.
   198  	if err := f.resolveDeps(ctx); err != nil {
   199  		return err
   200  	}
   201  	return nil
   202  }
   203  
   204  // fetchChangeInfo fetches newest ChangeInfo from Gerrit.
   205  //
   206  // * handles permission errors
   207  // * verifies fetched data isn't definitely stale.
   208  // * checks that current LUCI project is still watching the change.
   209  //
   210  // Returns nil ChangeInfo if no further fetching should proceed.
   211  func (f *fetcher) fetchChangeInfo(ctx context.Context, opts ...gerritpb.QueryOption) (*gerritpb.ChangeInfo, error) {
   212  	// Avoid querying Gerrit iff the current project doesn't watch the given host,
   213  	// which should be treated as PermissionDenied.
   214  	switch watched, err := f.isHostWatched(ctx); {
   215  	case err != nil:
   216  		return nil, err
   217  	case !watched:
   218  		logging.Warningf(ctx, "Gerrit host %q is not watched by project %q [%s]", f.host, f.project, f)
   219  		return nil, f.setCertainNoAccess(ctx)
   220  	}
   221  
   222  	var ci *gerritpb.ChangeInfo
   223  	err := f.gFactory.MakeMirrorIterator(ctx).RetryIfStale(func(opt grpc.CallOption) error {
   224  		var err error
   225  		ci, err = f.g.GetChange(ctx, &gerritpb.GetChangeRequest{
   226  			Number:  f.change,
   227  			Project: f.gerritProjectIfKnown(),
   228  			Options: opts,
   229  			Meta:    f.hint.GetMetaRevId(),
   230  		}, opt)
   231  		switch grpcutil.Code(err) {
   232  		case codes.OK:
   233  			if f.isStale(ctx, ci.GetUpdated()) {
   234  				return gerrit.ErrStaleData
   235  			}
   236  			return nil
   237  		case codes.NotFound, codes.PermissionDenied:
   238  			return errStaleOrNoAccess
   239  		// GetChange() returns codes.FailedPrecondition if meta is given
   240  		// but the SHA-1 is not reachable from the serving Gerrit replica.
   241  		//
   242  		// Return errStaleData to retry on it.
   243  		case codes.FailedPrecondition:
   244  			return gerrit.ErrStaleData
   245  		default:
   246  			return gerrit.UnhandledError(ctx, err, "failed to fetch %s", f)
   247  		}
   248  	})
   249  	switch {
   250  	case err == errStaleOrNoAccess:
   251  		// Chances are it's not due to eventual consistency, but be conservative.
   252  		return nil, f.setLikelyNoAccess(ctx)
   253  	case err != nil:
   254  		return nil, err
   255  	}
   256  
   257  	f.toUpdate.ApplicableConfig, err = gobmap.Lookup(ctx, f.host, ci.GetProject(), ci.GetRef())
   258  	switch storedTS := f.priorSnapshot().GetExternalUpdateTime(); {
   259  	case err != nil:
   260  		return nil, err
   261  	case !f.toUpdate.ApplicableConfig.HasProject(f.project):
   262  		logging.Debugf(ctx, "%s is not watched by the %q project", f, f.project)
   263  		return nil, f.setCertainNoAccess(ctx)
   264  	case storedTS != nil && storedTS.AsTime().After(ci.GetUpdated().AsTime()):
   265  		// The fetched snapshot is fresh, but older than the prior snapshot.
   266  		// Then, skip updating the snapshot.
   267  		//
   268  		// It can happen pubsub messages were delivered out of order.
   269  		return nil, nil
   270  	}
   271  
   272  	return ci, nil
   273  }
   274  
   275  func (f *fetcher) setCertainNoAccess(ctx context.Context) error {
   276  	now := clock.Now(ctx)
   277  	noAccessAt := now
   278  	if prior := f.priorNoAccessTime(); !prior.IsZero() && prior.Before(now) {
   279  		// Keep noAccessAt as is.
   280  		noAccessAt = prior
   281  	}
   282  	f.setNoAccessAt(now, noAccessAt)
   283  	return nil
   284  }
   285  
   286  func (f *fetcher) setLikelyNoAccess(ctx context.Context) error {
   287  	now := clock.Now(ctx)
   288  	var noAccessAt time.Time
   289  	var err error
   290  	switch prior := f.priorNoAccessTime(); {
   291  	case prior.IsZero():
   292  		// This is the first time CL.
   293  		noAccessAt = now.Add(noAccessGraceDuration)
   294  		err = f.reschedule(ctx, noAccessGraceRetryDelay)
   295  	case prior.Before(now):
   296  		// Keep noAccessAt as is, it's now considered certain.
   297  		noAccessAt = prior
   298  	default:
   299  		// Keep noAccessAt as is, but schedule yet another refresh.
   300  		noAccessAt = prior
   301  		err = f.reschedule(ctx, noAccessGraceRetryDelay)
   302  	}
   303  	f.setNoAccessAt(now, noAccessAt)
   304  	return err
   305  }
   306  
   307  func (f *fetcher) setNoAccessAt(now, noAccessAt time.Time) {
   308  	f.toUpdate.AddDependentMeta = &changelist.Access{
   309  		ByProject: map[string]*changelist.Access_Project{
   310  			f.project: {
   311  				UpdateTime:   timestamppb.New(now),
   312  				NoAccessTime: timestamppb.New(noAccessAt),
   313  				NoAccess:     true,
   314  			},
   315  		},
   316  	}
   317  }
   318  
   319  // reschedule reschedules the same task with a delay.
   320  func (f *fetcher) reschedule(ctx context.Context, delay time.Duration) error {
   321  	t := &changelist.UpdateCLTask{
   322  		LuciProject: f.project,
   323  		ExternalId:  string(f.externalID),
   324  		Id:          int64(f.clidIfKnown()),
   325  		Hint:        f.hint,
   326  		Requester:   f.requester,
   327  	}
   328  	return f.scheduleRefresh(ctx, t, delay)
   329  }
   330  
   331  // fetchRelated fetches related changes and computes GerritGitDeps.
   332  func (f *fetcher) fetchRelated(ctx context.Context) error {
   333  	return f.gFactory.MakeMirrorIterator(ctx).RetryIfStale(func(opt grpc.CallOption) error {
   334  		resp, err := f.g.GetRelatedChanges(ctx, &gerritpb.GetRelatedChangesRequest{
   335  			Number:     f.change,
   336  			Project:    f.gerritProjectIfKnown(),
   337  			RevisionId: f.mustHaveCurrentRevision(),
   338  		}, opt)
   339  		switch code := grpcutil.Code(err); code {
   340  		case codes.OK:
   341  			f.setGitDeps(ctx, resp.GetChanges())
   342  			return nil
   343  		case codes.PermissionDenied, codes.NotFound:
   344  			// Getting this right after successfully fetching ChangeInfo should
   345  			// typically be due to eventual consistency of Gerrit, and rarely due to
   346  			// change of ACLs.
   347  			return gerrit.ErrStaleData
   348  		default:
   349  			return gerrit.UnhandledError(ctx, err, "failed to fetch related changes for %s", f)
   350  		}
   351  	})
   352  }
   353  
   354  // setGitDeps sets GerritGitDeps based on list of related changes provided by
   355  // Gerrit.GetRelatedChanges RPC.
   356  //
   357  // If GetRelatedChanges output is invalid, doesn't set GerritGitDep and adds an
   358  // appropriate CLError to Snapshot.Errors.
   359  func (f *fetcher) setGitDeps(ctx context.Context, related []*gerritpb.GetRelatedChangesResponse_ChangeAndCommit) {
   360  	// Gerrit does not provide API that returns just the changes which a given
   361  	// change depends on, but has the API call that returns the following changes:
   362  	//   (1) those on which this change depends, transitively. Among these,
   363  	//       some CLs may have been already merged.
   364  	//   (2) this change itself, with its commit and parent(s) hashes
   365  	//   (3) changes which depend on this change transitively
   366  	// We need (1).
   367  	if len(related) == 0 {
   368  		// Gerrit may not bother to return the CL itself if there are no related
   369  		// changes.
   370  		return
   371  	}
   372  	this, clErr := f.matchCurrentAmongRelated(ctx, related)
   373  	if clErr != nil {
   374  		f.toUpdate.Snapshot.Errors = append(f.toUpdate.Snapshot.Errors, clErr)
   375  		return
   376  	}
   377  
   378  	// Construct a map from revision to a list of changes that it represents.
   379  	// One may think that list is not necessary:
   380  	//   two CLs with the same revision should (% sha1 collision) have equal
   381  	//   commit messages, and hence Change-Id, so should be really the same CL.
   382  	// However, many Gerrit projects do not require Change-Id in commit message at
   383  	// upload time, instead generating new Change-Id on the fly.
   384  	byRevision := make(map[string][]*gerritpb.GetRelatedChangesResponse_ChangeAndCommit, len(related))
   385  	for _, r := range related {
   386  		rev := r.GetCommit().GetId()
   387  		byRevision[rev] = append(byRevision[rev], r)
   388  	}
   389  
   390  	thisParentsCount := f.countRelatedWhichAreParents(this, byRevision)
   391  	if thisParentsCount == 0 {
   392  		// Quick exit if there are no dependencies of this change (1), only changes
   393  		// depending on this change (3).
   394  		return
   395  	}
   396  
   397  	// Now starting from `this` change and following parents relation,
   398  	// find all issues that we can reach via breadth first traversal ordeded by
   399  	// distance from this CL.
   400  	// Note that diamond-shaped child->[parent1, parent2]->grantparent are
   401  	// probably possible, so keeping track of visited commits is required.
   402  	// Furthermore, the same CL number may appear multiple times in the chain
   403  	// under different revisions (patchsets).
   404  	visitedRevs := stringset.New(len(related))
   405  	ordered := make([]*gerritpb.GetRelatedChangesResponse_ChangeAndCommit, 0, len(related))
   406  	curLevel := make([]*gerritpb.GetRelatedChangesResponse_ChangeAndCommit, 0, len(related))
   407  	nextLevel := make([]*gerritpb.GetRelatedChangesResponse_ChangeAndCommit, 1, len(related))
   408  	nextLevel[0] = this
   409  	for len(nextLevel) > 0 {
   410  		curLevel, nextLevel = nextLevel, curLevel[:0]
   411  		// For determinism of the output.
   412  		sort.SliceStable(curLevel, func(i, j int) bool {
   413  			return curLevel[i].GetNumber() < curLevel[j].GetNumber()
   414  		})
   415  		ordered = append(ordered, curLevel...)
   416  		for _, r := range curLevel {
   417  			for _, p := range r.GetCommit().GetParents() {
   418  				switch prs := byRevision[p.GetId()]; {
   419  				case len(prs) == 0:
   420  					continue
   421  				case len(prs) > 1:
   422  					logging.Warningf(
   423  						ctx,
   424  						"Gerrit.GetRelatedChanges returned rev %q %d times for %s (ALL Related %s)",
   425  						p.GetId(), len(prs), f, related)
   426  					// Avoid borking. Take the first CL by number.
   427  					for i, x := range prs[1:] {
   428  						if prs[0].GetNumber() > x.GetNumber() {
   429  							prs[i+1], prs[0] = prs[0], prs[i+1]
   430  						}
   431  					}
   432  					fallthrough
   433  				default:
   434  					if visitedRevs.Add(prs[0].GetCommit().GetId()) {
   435  						nextLevel = append(nextLevel, prs[0])
   436  					}
   437  				}
   438  			}
   439  		}
   440  	}
   441  
   442  	deps := make([]*changelist.GerritGitDep, 0, len(ordered)-1)
   443  	// Specific revision doesn't matter, CV always looks at latest revision,
   444  	// but since the same CL may have >1 revision, the CL number may be added
   445  	// several times into `ordered`.
   446  	// TODO(tandrii): after CQDaemon is removed, consider paying attention to
   447  	// specific revision of the dependency to notice when parent dep has been
   448  	// substantially modified such that tryjobs of this change alone ought to be
   449  	// invalidated (see https://crbug.com/686115).
   450  	added := make(map[int64]bool, len(ordered))
   451  	for i, r := range ordered[1:] {
   452  		n := r.GetNumber()
   453  		if added[n] {
   454  			continue
   455  		}
   456  		added[n] = true
   457  		deps = append(deps, &changelist.GerritGitDep{
   458  			Change: n,
   459  			// By construction of ordered, immediate dependencies must be located at
   460  			// ordered[1:1+thisParentsCount], but we are iterating over [1:] subslice.
   461  			Immediate: i < thisParentsCount,
   462  		})
   463  	}
   464  	f.toUpdate.Snapshot.GetGerrit().GitDeps = deps
   465  }
   466  
   467  func (f *fetcher) matchCurrentAmongRelated(
   468  	ctx context.Context, related []*gerritpb.GetRelatedChangesResponse_ChangeAndCommit,
   469  ) (*gerritpb.GetRelatedChangesResponse_ChangeAndCommit, *changelist.CLError) {
   470  
   471  	var this *gerritpb.GetRelatedChangesResponse_ChangeAndCommit
   472  	matched := 0
   473  	for _, r := range related {
   474  		if r.GetNumber() == f.change {
   475  			matched++
   476  			this = r
   477  		}
   478  	}
   479  	if matched != 1 {
   480  		// Apparently in rare cases, Gerrit may get confused and substitute this CL
   481  		// for some other CL in the output (see https://crbug.com/1199471).
   482  		msg := fmt.Sprintf(
   483  			("Gerrit related changes should return the %s/%d CL itself exactly once, but got %d." +
   484  				" Maybe https://crbug.com/1199471 is affecting you?"), f.host, f.change, matched)
   485  		logging.Errorf(ctx, "%s Related output: %s", msg, related)
   486  		return nil, &changelist.CLError{
   487  			Kind: &changelist.CLError_CorruptGerritMetadata{
   488  				CorruptGerritMetadata: msg,
   489  			},
   490  		}
   491  	}
   492  	return this, nil
   493  }
   494  
   495  func (f *fetcher) countRelatedWhichAreParents(this *gerritpb.GetRelatedChangesResponse_ChangeAndCommit, byRevision map[string][]*gerritpb.GetRelatedChangesResponse_ChangeAndCommit) int {
   496  	cnt := 0
   497  	for _, p := range this.GetCommit().GetParents() {
   498  		// Not all parents may be represented by related CLs.
   499  		// OTOH, if there are several CLs matching parent revision,
   500  		// CV will choose just one.
   501  		if _, ok := byRevision[p.GetId()]; ok {
   502  			cnt++
   503  		}
   504  	}
   505  	return cnt
   506  }
   507  
   508  // fetchFiles fetches files for the current revision of the new Snapshot.
   509  func (f *fetcher) fetchFiles(ctx context.Context) error {
   510  	return f.gFactory.MakeMirrorIterator(ctx).RetryIfStale(func(opt grpc.CallOption) error {
   511  		curRev := f.mustHaveCurrentRevision()
   512  		req := &gerritpb.ListFilesRequest{
   513  			Number:     f.change,
   514  			Project:    f.gerritProjectIfKnown(),
   515  			RevisionId: curRev,
   516  		}
   517  		switch revInfo, ok := f.toUpdate.Snapshot.GetGerrit().GetInfo().GetRevisions()[curRev]; {
   518  		case !ok:
   519  			return errors.Reason("missing RevisionInfo for current revision: %s", curRev).Err()
   520  		case len(revInfo.GetCommit().GetParents()) == 0:
   521  			// Occasionally, CL doesn't have parent commit. See: crbug.com/1295817.
   522  		default:
   523  			// For CLs with >1 parent commit (aka merge commits), this relies on
   524  			// Gerrit ensuring that such a CL always has first parent from the
   525  			// target branch.
   526  			req.Parent = 1 // Request a diff against the first parent.
   527  		}
   528  		resp, err := f.g.ListFiles(ctx, req, opt)
   529  		switch code := grpcutil.Code(err); code {
   530  		case codes.OK:
   531  			// Iterate files map and take keys only. CV treats all files "touched" in
   532  			// a Change to be interesting, including chmods. Skip special /COMMIT_MSG
   533  			// and /MERGE_LIST entries, which aren't files. For example output, see
   534  			// https://chromium-review.googlesource.com/changes/1817639/revisions/1/files?parent=1
   535  			fs := make([]string, 0, len(resp.GetFiles()))
   536  			for f := range resp.GetFiles() {
   537  				if !strings.HasPrefix(f, "/") {
   538  					fs = append(fs, f)
   539  				}
   540  			}
   541  			sort.Strings(fs)
   542  			f.toUpdate.Snapshot.GetGerrit().Files = fs
   543  			return nil
   544  
   545  		case codes.PermissionDenied, codes.NotFound:
   546  			return gerrit.ErrStaleData
   547  		default:
   548  			return gerrit.UnhandledError(ctx, err, "failed to fetch files for %s", f)
   549  		}
   550  	})
   551  }
   552  
   553  // setSoftDeps parses CL description and sets soft deps.
   554  func (f *fetcher) setSoftDeps() error {
   555  	ci := f.toUpdate.Snapshot.GetGerrit().GetInfo()
   556  	msg := ci.GetRevisions()[ci.GetCurrentRevision()].GetCommit().GetMessage()
   557  	deps := cqdepend.Parse(msg)
   558  	if len(deps) == 0 {
   559  		return nil
   560  	}
   561  
   562  	// Given f.host like "sub-review.x.y.z", compute "-review.x.y.z" suffix.
   563  	dot := strings.IndexRune(f.host, '.')
   564  	if dot == -1 || !strings.HasSuffix(f.host[:dot], "-review") {
   565  		return errors.Reason("Host %s doesn't support Cq-Depend (%s)", f.host, f).Err()
   566  	}
   567  	hostSuffix := f.host[dot-len("-review"):]
   568  
   569  	softDeps := make([]*changelist.GerritSoftDep, len(deps))
   570  	for i, d := range deps {
   571  		depHost := f.host
   572  		if d.Subdomain != "" {
   573  			depHost = d.Subdomain + hostSuffix
   574  		}
   575  		softDeps[i] = &changelist.GerritSoftDep{Host: depHost, Change: int64(d.Change)}
   576  	}
   577  	f.toUpdate.Snapshot.GetGerrit().SoftDeps = softDeps
   578  	return nil
   579  }
   580  
   581  // resolveDeps resolves to CLID and triggers tasks for each of the soft and GerritGit dep.
   582  func (f *fetcher) resolveDeps(ctx context.Context) error {
   583  	depsMap, err := f.depsToExternalIDs()
   584  	if err != nil {
   585  		return err
   586  	}
   587  
   588  	if depsCnt := len(depsMap); depsCnt > 500 {
   589  		logging.Warningf(ctx, "CL has high number of dependency CLs. Deps count: %d", depsCnt)
   590  	}
   591  	resolved, err := f.resolveAndScheduleDepsUpdate(ctx, f.project, depsMap, f.requester)
   592  	if err != nil {
   593  		return err
   594  	}
   595  	f.toUpdate.Snapshot.Deps = resolved
   596  	return nil
   597  }
   598  
   599  func (f *fetcher) depsToExternalIDs() (map[changelist.ExternalID]changelist.DepKind, error) {
   600  	cqdeps := f.toUpdate.Snapshot.GetGerrit().GetSoftDeps()
   601  	gitdeps := f.toUpdate.Snapshot.GetGerrit().GetGitDeps()
   602  	// Git deps are HARD deps. Since arbitrary Cq-Depend deps may duplicate those
   603  	// of Git, avoid accidental downgrading from HARD to SOFT dep by processing
   604  	// Cq-Depend first and Git deps second.
   605  	eids := make(map[changelist.ExternalID]changelist.DepKind, len(cqdeps)+len(gitdeps))
   606  	for _, dep := range cqdeps {
   607  		eid, err := changelist.GobID(dep.Host, dep.Change)
   608  		if err != nil {
   609  			return nil, err
   610  		}
   611  		eids[eid] = changelist.DepKind_SOFT
   612  	}
   613  	for _, dep := range gitdeps {
   614  		eid, err := changelist.GobID(f.host, dep.Change)
   615  		if err != nil {
   616  			return nil, err
   617  		}
   618  		eids[eid] = changelist.DepKind_HARD
   619  	}
   620  	return eids, nil
   621  }
   622  
   623  // isStale returns true if given Gerrit updated timestamp is older than
   624  // the updateHint or the existing CL state.
   625  func (f *fetcher) isStale(ctx context.Context, externalUpdateTime *timestamppb.Timestamp) bool {
   626  	if f.hint.GetMetaRevId() != "" {
   627  		// If meta was set, it's always the fresh data of the snapshot.
   628  		//
   629  		// If it is an older snapshot than the stored snapshot,
   630  		// the snapshot update will be skipped.
   631  		return false
   632  	}
   633  	t := externalUpdateTime.AsTime()
   634  	storedTS := f.priorSnapshot().GetExternalUpdateTime()
   635  	hintedTS := f.hint.GetExternalUpdateTime()
   636  	switch {
   637  	case hintedTS != nil && hintedTS.AsTime().After(t):
   638  		logging.Debugf(ctx, "Fetched last Gerrit update of %s, but %s expected (%s)", t, hintedTS.AsTime(), f)
   639  	case storedTS != nil && storedTS.AsTime().After(t):
   640  		logging.Debugf(ctx, "Fetched last Gerrit update of %s, but %s was already seen & stored (%s)", t, storedTS.AsTime(), f)
   641  	default:
   642  		return false
   643  	}
   644  	return true
   645  }
   646  
   647  // Checks whether this LUCI project watches any repo on this Gerrit host.
   648  func (f *fetcher) isHostWatched(ctx context.Context) (bool, error) {
   649  	meta, err := prjcfg.GetLatestMeta(ctx, f.project)
   650  	if err != nil {
   651  		return false, err
   652  	}
   653  	cgs, err := meta.GetConfigGroups(ctx)
   654  	if err != nil {
   655  		return false, err
   656  	}
   657  	for _, cg := range cgs {
   658  		for _, g := range cg.Content.GetGerrit() {
   659  			if prjcfg.GerritHost(g) == f.host {
   660  				return true, nil
   661  			}
   662  		}
   663  	}
   664  	return false, nil
   665  }
   666  
   667  func (f *fetcher) gerritProjectIfKnown() string {
   668  	if project := f.priorSnapshot().GetGerrit().GetInfo().GetProject(); project != "" {
   669  		return project
   670  	}
   671  	if project := f.toUpdate.Snapshot.GetGerrit().GetInfo().GetProject(); project != "" {
   672  		return project
   673  	}
   674  	return ""
   675  }
   676  
   677  func (f *fetcher) clidIfKnown() common.CLID {
   678  	if f.priorCL != nil {
   679  		return f.priorCL.ID
   680  	}
   681  	return 0
   682  }
   683  
   684  func (f *fetcher) priorSnapshot() *changelist.Snapshot {
   685  	if f.priorCL != nil {
   686  		return f.priorCL.Snapshot
   687  	}
   688  	return nil
   689  }
   690  
   691  func (f *fetcher) priorNoAccessTime() time.Time {
   692  	if f.priorCL == nil {
   693  		return time.Time{}
   694  	}
   695  	t := f.priorCL.Access.GetByProject()[f.project].GetNoAccessTime()
   696  	if t == nil {
   697  		return time.Time{}
   698  	}
   699  	return t.AsTime()
   700  }
   701  
   702  func (f *fetcher) mustHaveCurrentRevision() string {
   703  	switch ci := f.toUpdate.Snapshot.GetGerrit().GetInfo(); {
   704  	case ci == nil:
   705  		panic("ChangeInfo must be already fetched into toUpdate.Snapshot")
   706  	case ci.GetCurrentRevision() == "":
   707  		panic("ChangeInfo must have CurrentRevision populated.")
   708  	default:
   709  		return ci.GetCurrentRevision()
   710  	}
   711  }
   712  
   713  // String is used for debug identification of a fetch in errors and logs.
   714  func (f *fetcher) String() string {
   715  	if f.priorCL == nil {
   716  		return fmt.Sprintf("CL(%s/%d)", f.host, f.change)
   717  	}
   718  	return fmt.Sprintf("CL(%s/%d [%d])", f.host, f.change, f.priorCL.ID)
   719  }