go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/gerrit/poller/query.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package poller
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"strings"
    21  	"time"
    22  
    23  	"google.golang.org/grpc/codes"
    24  	"google.golang.org/grpc/status"
    25  	"google.golang.org/protobuf/types/known/timestamppb"
    26  
    27  	gerritutil "go.chromium.org/luci/common/api/gerrit"
    28  	"go.chromium.org/luci/common/clock"
    29  	"go.chromium.org/luci/common/errors"
    30  	"go.chromium.org/luci/common/logging"
    31  	gerritpb "go.chromium.org/luci/common/proto/gerrit"
    32  	"go.chromium.org/luci/common/retry/transient"
    33  
    34  	"go.chromium.org/luci/cv/internal/changelist"
    35  	"go.chromium.org/luci/cv/internal/common"
    36  	"go.chromium.org/luci/cv/internal/configs/srvcfg"
    37  	"go.chromium.org/luci/cv/internal/gerrit"
    38  )
    39  
    40  const (
    41  	// fullPollInterval is between querying Gerrit for all changes relevant to CV as
    42  	// if from scratch.
    43  	fullPollInterval = 30 * time.Minute
    44  
    45  	// incrementalPollOverlap is safety overlap of time range of Change.Updated
    46  	// between two successive polls.
    47  	//
    48  	// While this doesn't guarantee that CV won't miss changes in-between
    49  	// incremental polls, it should mitigate the most common reasons:
    50  	//   * time skew between CV and Gerrit clocks,
    51  	//   * hopping between potentially out of sync Gerrit mirrors.
    52  	incrementalPollOverlap = time.Minute
    53  
    54  	// changesPerPoll is how many changes CV will process per poll.
    55  	//
    56  	// A value that's too low here will first affect full polls, since they have
    57  	// to (re-)process all interesting changes watched by a LUCI project.
    58  	//
    59  	// 10k is OK to fetch sequentially and keep in RAM without OOM-ing,
    60  	// and currently enough for each of the LUCI projects.
    61  	//
    62  	// Higher values may need smarter full polling techniques.
    63  	changesPerPoll = 10000
    64  
    65  	// pageSize is how many changes to request in a single ListChangesRequest.
    66  	pageSize = 1000
    67  
    68  	// moreChangesTrustFactor controls when CV must not trust false value of
    69  	// ListChangesResponse.MoreChanges.
    70  	//
    71  	// Value of 0.5 combined with pageSize of 1000 means that CV will trust
    72  	// MoreChanges iff Gerrit returns <= 500 CLs.
    73  	//
    74  	// For more info, see corresponding field in
    75  	// https://godoc.org/go.chromium.org/luci/common/api/gerrit#PagingListChangesOptions
    76  	moreChangesTrustFactor = 0.5
    77  )
    78  
    79  // doOneQuery queries Gerrit and updates the query's state.
    80  func (p *Poller) doOneQuery(ctx context.Context, luciProject string, qs *QueryState) error {
    81  	q := singleQuery{
    82  		luciProject: luciProject,
    83  		qs:          qs,
    84  	}
    85  	var err error
    86  	if q.client, err = p.gFactory.MakeClient(ctx, qs.GetHost(), luciProject); err != nil {
    87  		return err
    88  	}
    89  
    90  	// Time to trigger a full-poll?
    91  	now, lastFull := clock.Now(ctx), qs.GetLastFullTime()
    92  	if lastFull == nil || now.After(lastFull.AsTime().Add(fullPollInterval)) {
    93  		return p.doFullQuery(ctx, q)
    94  	}
    95  
    96  	// If pub/sub is enabled for the project, skip incremental-poll.
    97  	switch yes, err := srvcfg.IsProjectEnabledInListener(ctx, luciProject); {
    98  	case err != nil:
    99  		return errors.Annotate(err, "srvcfg.IsProjectEnabledInListener").Err()
   100  	case yes:
   101  		return nil
   102  	}
   103  
   104  	return p.doIncrementalQuery(ctx, q)
   105  }
   106  
   107  func (p *Poller) doFullQuery(ctx context.Context, q singleQuery) error {
   108  	ctx = logging.SetField(ctx, "poll", "full")
   109  	started := clock.Now(ctx)
   110  	after := started.Add(-common.MaxTriggerAge)
   111  	changes, err := q.fetch(ctx, after, q.qs.gerritString(queryLimited))
   112  	// There can be partial result even if err != nil.
   113  	switch err2 := p.notifyOnMatchedCLs(ctx, q.luciProject, q.qs.GetHost(), changes, true, changelist.UpdateCLTask_FULL_POLL_MATCHED); {
   114  	case err != nil:
   115  		return err
   116  	case err2 != nil:
   117  		return err2
   118  	}
   119  
   120  	cur := uniqueSortedIDsOf(changes)
   121  	if diff := common.DifferenceSorted(q.qs.Changes, cur); len(diff) != 0 {
   122  		// `diff` changes are no longer matching the limited query,
   123  		// so they were probably updated since.
   124  		if err := p.notifyOnUnmatchedCLs(ctx, q.luciProject, q.qs.GetHost(), diff, changelist.UpdateCLTask_FULL_POLL_UNMATCHED); err != nil {
   125  			return err
   126  		}
   127  	}
   128  
   129  	q.qs.Changes = cur
   130  	q.qs.LastFullTime = timestamppb.New(started)
   131  	q.qs.LastIncrTime = nil
   132  	return nil
   133  }
   134  
   135  func (p *Poller) doIncrementalQuery(ctx context.Context, q singleQuery) error {
   136  	ctx = logging.SetField(ctx, "poll", "incremental")
   137  	started := clock.Now(ctx)
   138  
   139  	lastInc := q.qs.GetLastIncrTime()
   140  	if lastInc == nil {
   141  		if lastInc = q.qs.GetLastFullTime(); lastInc == nil {
   142  			panic("must have been a full poll")
   143  		}
   144  	}
   145  	after := lastInc.AsTime().Add(-incrementalPollOverlap)
   146  	// Unlike the full poll, query for all changes regardless of status or CQ
   147  	// vote. This ensures that CV notices quickly when previously NEW & CQ-ed
   148  	// change has either CQ vote removed OR status changed (e.g. submitted or
   149  	// abandoned).
   150  	changes, err := q.fetch(ctx, after, q.qs.gerritString(queryAll))
   151  	// There can be partial result even if err != nil.
   152  	switch err2 := p.notifyOnMatchedCLs(ctx, q.luciProject, q.qs.GetHost(), changes, false, changelist.UpdateCLTask_INCR_POLL_MATCHED); {
   153  	case err != nil:
   154  		return err
   155  	case err2 != nil:
   156  		return err2
   157  	}
   158  
   159  	q.qs.Changes = common.UnionSorted(q.qs.Changes, uniqueSortedIDsOf(changes))
   160  	q.qs.LastIncrTime = timestamppb.New(started)
   161  	return nil
   162  }
   163  
   164  type singleQuery struct {
   165  	luciProject string
   166  	qs          *QueryState
   167  	client      gerrit.Client
   168  }
   169  
   170  func (q singleQuery) fetch(ctx context.Context, after time.Time, query string) ([]*gerritpb.ChangeInfo, error) {
   171  	opts := gerritutil.PagingListChangesOptions{
   172  		Limit:                  changesPerPoll,
   173  		PageSize:               pageSize,
   174  		MoreChangesTrustFactor: moreChangesTrustFactor,
   175  		UpdatedAfter:           after,
   176  	}
   177  	req := gerritpb.ListChangesRequest{
   178  		Options: []gerritpb.QueryOption{
   179  			gerritpb.QueryOption_SKIP_MERGEABLE,
   180  		},
   181  		Query: query,
   182  	}
   183  	resp, err := gerritutil.PagingListChanges(ctx, q.client, &req, opts)
   184  	grpcStatus, _ := status.FromError(errors.Unwrap(err))
   185  	switch grpcCode := grpcStatus.Code(); {
   186  	case grpcCode == codes.OK:
   187  		if resp.GetMoreChanges() {
   188  			logging.Errorf(ctx, "Ignoring oldest changes because reached max (%d) allowed to process per poll", changesPerPoll)
   189  		}
   190  		return resp.GetChanges(), nil
   191  	case grpcCode == codes.InvalidArgument && strings.Contains(grpcStatus.Message(), "Invalid authentication credentials. Please generate a new identifier:"):
   192  		logging.Errorf(ctx, "crbug/1286454: got invalid authentication credential"+
   193  			" error when paging changes. Mark it as transient so that it will be"+
   194  			" retried.")
   195  		return nil, transient.Tag.Apply(err)
   196  	// TODO(tandrii): handle 403 and 404 if CV lacks access to entire host.
   197  	default:
   198  		// NOTE: resp may be set if there was partial success in fetching changes
   199  		// followed by a typically transient error.
   200  		return resp.GetChanges(), gerrit.UnhandledError(ctx, err, "PagingListChanges failed")
   201  	}
   202  }
   203  
   204  type queryKind int
   205  
   206  const (
   207  	queryLimited queryKind = iota
   208  	queryAll
   209  )
   210  
   211  // gerritString encodes query for Gerrit.
   212  //
   213  // If queryLimited, unlike queryAll, searches for NEW CLs with CQ vote.
   214  func (qs *QueryState) gerritString(kind queryKind) string {
   215  	buf := strings.Builder{}
   216  	switch kind {
   217  	case queryLimited:
   218  		buf.WriteString("status:NEW ")
   219  		// TODO(tandrii): make label optional to support Tricium use-case.
   220  		buf.WriteString("label:Commit-Queue>0 ")
   221  	case queryAll:
   222  	default:
   223  		panic(fmt.Errorf("unknown queryKind %d", kind))
   224  	}
   225  	// TODO(crbug/1163177): specify `branch:` search term to restrict search to
   226  	// specific refs. For projects watching a single ref, this will provide more
   227  	// targeted queries, reducing load on CV & Gerrit, but care must be taken to
   228  	// to avoid excessive number of queries when multiple refs are watched.
   229  
   230  	emitProjectValue := func(p string) {
   231  		// Even though it appears to work without, Gerrit doc says project names
   232  		// containing / must be surrounded by "" or {}:
   233  		// https://gerrit-review.googlesource.com/Documentation/user-search.html#_argument_quoting
   234  		buf.WriteRune('"')
   235  		buf.WriteString(p)
   236  		buf.WriteRune('"')
   237  	}
   238  
   239  	// One of .OrProjects or .CommonProjectPrefix must be set.
   240  	switch prs := qs.GetOrProjects(); len(prs) {
   241  	case 0:
   242  		if qs.GetCommonProjectPrefix() == "" {
   243  			panic("partitionConfig function should have ensured this")
   244  		}
   245  		// project*s* means find matching projects by prefix
   246  		buf.WriteString("projects:")
   247  		emitProjectValue(qs.GetCommonProjectPrefix())
   248  	case 1:
   249  		buf.WriteString("project:")
   250  		emitProjectValue(prs[0])
   251  	default:
   252  		buf.WriteRune('(')
   253  		for i, p := range prs {
   254  			if i > 0 {
   255  				buf.WriteString(" OR ")
   256  			}
   257  			buf.WriteString("project:")
   258  			emitProjectValue(p)
   259  		}
   260  		buf.WriteRune(')')
   261  	}
   262  	return buf.String()
   263  }
   264  
   265  func uniqueSortedIDsOf(changes []*gerritpb.ChangeInfo) []int64 {
   266  	if len(changes) == 0 {
   267  		return nil
   268  	}
   269  
   270  	out := make([]int64, len(changes))
   271  	for i, c := range changes {
   272  		out[i] = c.GetNumber()
   273  	}
   274  	return common.UniqueSorted(out)
   275  }