go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/buildbucket/appengine/internal/search/query.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package search
    16  
    17  import (
    18  	"container/heap"
    19  	"context"
    20  	"fmt"
    21  	"regexp"
    22  	"strconv"
    23  	"strings"
    24  	"time"
    25  
    26  	"google.golang.org/grpc/codes"
    27  	"google.golang.org/protobuf/types/known/timestamppb"
    28  
    29  	"go.chromium.org/luci/auth/identity"
    30  	"go.chromium.org/luci/common/data/stringset"
    31  	"go.chromium.org/luci/common/data/strpair"
    32  	"go.chromium.org/luci/common/errors"
    33  	"go.chromium.org/luci/common/logging"
    34  	"go.chromium.org/luci/common/sync/parallel"
    35  	"go.chromium.org/luci/gae/service/datastore"
    36  	"go.chromium.org/luci/grpc/appstatus"
    37  
    38  	bb "go.chromium.org/luci/buildbucket"
    39  	"go.chromium.org/luci/buildbucket/appengine/internal/buildid"
    40  	"go.chromium.org/luci/buildbucket/appengine/internal/perm"
    41  	"go.chromium.org/luci/buildbucket/appengine/model"
    42  	"go.chromium.org/luci/buildbucket/bbperms"
    43  	pb "go.chromium.org/luci/buildbucket/proto"
    44  	"go.chromium.org/luci/buildbucket/protoutil"
    45  )
    46  
    47  const (
    48  	defaultPageSize = 100
    49  	maxPageSize     = 1000
    50  )
    51  
    52  var (
    53  	PageTokenRegex = regexp.MustCompile(`^id>\d+$`)
    54  )
    55  
    56  // Query is the intermediate to store the arguments for ds search query.
    57  type Query struct {
    58  	Builder           *pb.BuilderID
    59  	Tags              strpair.Map
    60  	Status            pb.Status
    61  	CreatedBy         identity.Identity
    62  	StartTime         time.Time
    63  	EndTime           time.Time
    64  	ExperimentFilters stringset.Set
    65  	BuildIDHigh       int64
    66  	BuildIDLow        int64
    67  	DescendantOf      int64
    68  	ChildOf           int64
    69  	PageSize          int32
    70  	PageToken         string
    71  }
    72  
    73  // NewQuery builds a Query from a pb.SearchBuildsRequest.
    74  // It assumes req is valid, otherwise may panic.
    75  func NewQuery(req *pb.SearchBuildsRequest) *Query {
    76  	if req.GetPredicate() == nil {
    77  		return &Query{
    78  			PageSize:  fixPageSize(req.GetPageSize()),
    79  			PageToken: req.GetPageToken(),
    80  		}
    81  	}
    82  
    83  	p := req.Predicate
    84  	q := &Query{
    85  		Builder:           p.GetBuilder(),
    86  		Tags:              protoutil.StringPairMap(p.Tags),
    87  		Status:            p.Status,
    88  		CreatedBy:         identity.Identity(fixCreatedBy(p.CreatedBy)),
    89  		StartTime:         mustTimestamp(p.CreateTime.GetStartTime()),
    90  		EndTime:           mustTimestamp(p.CreateTime.GetEndTime()),
    91  		ExperimentFilters: stringset.NewFromSlice(p.Experiments...),
    92  		DescendantOf:      p.DescendantOf,
    93  		ChildOf:           p.ChildOf,
    94  		PageSize:          fixPageSize(req.PageSize),
    95  		PageToken:         req.PageToken,
    96  	}
    97  
    98  	// Filter by gerrit changes.
    99  	for _, change := range p.GerritChanges {
   100  		q.Tags.Add("buildset", protoutil.GerritBuildSet(change))
   101  	}
   102  
   103  	// Filter by build range.
   104  	// BuildIds less or equal to 0 means no boundary.
   105  	// Convert BuildRange to [buildLow, buildHigh).
   106  	// Note that unlike buildLow/buildHigh, BuildRange in req encapsulates the fact
   107  	// that build ids are decreasing. So we need to reverse the order.
   108  	if p.Build.GetStartBuildId() > 0 {
   109  		// Add 1 because startBuildId is inclusive and buildHigh is exclusive.
   110  		q.BuildIDHigh = p.Build.GetStartBuildId() + 1
   111  	}
   112  	if p.Build.GetEndBuildId() > 0 {
   113  		// Subtract 1 because endBuildId is exclusive and buildLow is inclusive.
   114  		q.BuildIDLow = p.Build.GetEndBuildId() - 1
   115  	}
   116  
   117  	// Filter by canary. Note that validateExperiment has already verified that
   118  	// p.Experiments doesn't contain a filter for ExperimentBBCanarySoftware.
   119  	if c := p.GetCanary(); c == pb.Trinary_YES {
   120  		q.ExperimentFilters.Add("+" + bb.ExperimentBBCanarySoftware)
   121  	} else if c == pb.Trinary_NO {
   122  		q.ExperimentFilters.Add("-" + bb.ExperimentBBCanarySoftware)
   123  	}
   124  
   125  	// Apply IncludeExperimental.
   126  	//
   127  	// If the user explicitly requested builds which were non_production, then we
   128  	// ignore this.
   129  	if !p.IncludeExperimental && !q.ExperimentFilters.Has("+"+bb.ExperimentNonProduction) {
   130  		q.ExperimentFilters.Add("-" + bb.ExperimentNonProduction)
   131  	}
   132  
   133  	return q
   134  }
   135  
   136  // IndexedTags returns the indexed tags.
   137  func IndexedTags(tags strpair.Map) []string {
   138  	set := make(stringset.Set)
   139  	for k, vals := range tags {
   140  		if k != "buildset" && k != "build_address" {
   141  			continue
   142  		}
   143  		for _, val := range vals {
   144  			set.Add(strpair.Format(k, val))
   145  		}
   146  	}
   147  	return set.ToSortedSlice()
   148  }
   149  
   150  // UpdateTagIndex updates the tag index for the given builds. Panics if any
   151  // build.Proto.Builder is unspecified.
   152  func UpdateTagIndex(ctx context.Context, builds []*model.Build) errors.MultiError {
   153  	merr := make(errors.MultiError, len(builds))
   154  	tagToBldIdx := make(map[string][]int)                  // tag -> builds indexes
   155  	indexEntries := make(map[string][]model.TagIndexEntry) // tag -> entries
   156  
   157  	for i, b := range builds {
   158  		for _, t := range IndexedTags(strpair.ParseMap(b.Tags)) {
   159  			indexEntries[t] = append(indexEntries[t], model.TagIndexEntry{
   160  				BuildID:     b.ID,
   161  				BucketID:    protoutil.FormatBucketID(b.Proto.Builder.Project, b.Proto.Builder.Bucket),
   162  				CreatedTime: mustTimestamp(b.Proto.CreateTime),
   163  			})
   164  			tagToBldIdx[t] = append(tagToBldIdx[t], i)
   165  		}
   166  	}
   167  	_ = parallel.WorkPool(64, func(work chan<- func() error) {
   168  		for tag, ents := range indexEntries {
   169  			tag := tag
   170  			ents := ents
   171  			work <- func() error {
   172  				if err := model.UpdateTagIndex(ctx, tag, ents); err != nil {
   173  					for _, i := range tagToBldIdx[tag] {
   174  						merr[i] = err
   175  					}
   176  				}
   177  				return nil
   178  			}
   179  		}
   180  	})
   181  
   182  	if merr.First() != nil {
   183  		return merr
   184  	}
   185  	return nil
   186  }
   187  
   188  // Fetch performs main build search logic.
   189  func (q *Query) Fetch(ctx context.Context) (*pb.SearchBuildsResponse, error) {
   190  	if !buildid.MayContainBuilds(q.StartTime, q.EndTime) {
   191  		return &pb.SearchBuildsResponse{}, nil
   192  	}
   193  
   194  	// Verify bucket ACL permission.
   195  	if q.Builder != nil && q.Builder.Bucket != "" {
   196  		if err := perm.HasInBuilder(ctx, bbperms.BuildsList, q.Builder); err != nil {
   197  			return nil, err
   198  		}
   199  	}
   200  
   201  	cpy := *q
   202  	q = &cpy
   203  	q.PageSize = fixPageSize(q.PageSize)
   204  	// Determine which subflow - directly query on Builds or on TagIndex.
   205  	if len(IndexedTags(q.Tags)) != 0 {
   206  		switch res, err := q.fetchOnTagIndex(ctx); {
   207  		case model.TagIndexIncomplete.In(err):
   208  			logging.Warningf(ctx, "Falling back to querying search on builds")
   209  		case err != nil:
   210  			return nil, err
   211  		default:
   212  			return res, nil
   213  		}
   214  	}
   215  
   216  	return q.fetchOnBuild(ctx)
   217  }
   218  
   219  // fetchOnBuild fetches directly on Build entity.
   220  func (q *Query) fetchOnBuild(ctx context.Context) (*pb.SearchBuildsResponse, error) {
   221  	dq := datastore.NewQuery(model.BuildKind)
   222  
   223  	for _, tag := range q.Tags.Format() {
   224  		dq = dq.Eq("tags", tag)
   225  	}
   226  
   227  	switch {
   228  	case q.Status == pb.Status_ENDED_MASK:
   229  		dq = dq.Eq("incomplete", false)
   230  	case q.Status != pb.Status_STATUS_UNSPECIFIED:
   231  		dq = dq.Eq("status_v2", q.Status)
   232  	}
   233  
   234  	if q.CreatedBy != "" {
   235  		dq = dq.Eq("created_by", q.CreatedBy)
   236  	}
   237  
   238  	var dropExperimental bool
   239  	q.ExperimentFilters.Iter(func(filter string) bool {
   240  		if filter[0] == '-' && filter[1:] == bb.ExperimentNonProduction {
   241  			// filter these in post
   242  			dropExperimental = true
   243  		} else {
   244  			dq = dq.Eq("experiments", filter)
   245  		}
   246  		return true
   247  	})
   248  
   249  	idLow, idHigh := q.idRange()
   250  	if idLow != 0 {
   251  		dq = dq.Gte("__key__", datastore.KeyForObj(ctx, &model.Build{ID: idLow}))
   252  	}
   253  	if idHigh != 0 {
   254  		dq = dq.Lt("__key__", datastore.KeyForObj(ctx, &model.Build{ID: idHigh}))
   255  	}
   256  	if idLow != 0 && idHigh != 0 && idLow >= idHigh {
   257  		return &pb.SearchBuildsResponse{}, nil
   258  	}
   259  
   260  	if q.DescendantOf != 0 {
   261  		dq = dq.Eq("ancestor_ids", q.DescendantOf)
   262  	}
   263  
   264  	if q.ChildOf != 0 {
   265  		dq = dq.Eq("parent_id", q.ChildOf)
   266  	}
   267  
   268  	var queries []*datastore.Query
   269  	var buckets []string
   270  	var err error
   271  	switch {
   272  	case q.Builder.GetBuilder() != "":
   273  		queries = append(queries, dq.Eq("builder_id", protoutil.FormatBuilderID(q.Builder)))
   274  	case q.Builder.GetBucket() != "":
   275  		buckets = []string{protoutil.FormatBucketID(q.Builder.Project, q.Builder.Bucket)}
   276  	default:
   277  		switch buckets, err = perm.BucketsByPerm(ctx, bbperms.BuildersList, q.Builder.GetProject()); {
   278  		case err != nil:
   279  			return nil, errors.Annotate(err, "error fetching accessible buckets").Err()
   280  		case len(buckets) == 0:
   281  			return &pb.SearchBuildsResponse{}, nil
   282  		}
   283  	}
   284  
   285  	for _, bucket := range buckets {
   286  		queries = append(queries, dq.Eq("bucket_id", bucket))
   287  	}
   288  
   289  	rsp := &pb.SearchBuildsResponse{}
   290  	logging.Debugf(ctx, "datastore query for FetchOnBuild: %v", queries)
   291  	err = datastore.RunMulti(ctx, queries, func(b *model.Build) error {
   292  		if len(rsp.Builds) >= int(q.PageSize) {
   293  			return datastore.Stop
   294  		}
   295  
   296  		// Check the build status again, as the index might be stale.
   297  		if q.Status != pb.Status_STATUS_UNSPECIFIED &&
   298  			q.Status != pb.Status_ENDED_MASK &&
   299  			q.Status != b.Status {
   300  			return nil
   301  		}
   302  
   303  		// Filter non-production builds here instead of at the datastore level to
   304  		// reduce the zigzag merge in index scans as the majority of builds are
   305  		// production.
   306  		if dropExperimental && b.ExperimentStatus(bb.ExperimentNonProduction) == pb.Trinary_YES {
   307  			return nil
   308  		}
   309  
   310  		rsp.Builds = append(rsp.Builds, b.ToSimpleBuildProto(ctx))
   311  		return nil
   312  	})
   313  	if err != nil {
   314  		return nil, err
   315  	}
   316  
   317  	if len(rsp.Builds) == int(q.PageSize) {
   318  		rsp.NextPageToken = fmt.Sprintf("id>%d", rsp.Builds[q.PageSize-1].Id)
   319  	}
   320  
   321  	return rsp, nil
   322  }
   323  
   324  // fetchOnTagIndex searches for builds using the TagIndex entities.
   325  func (q *Query) fetchOnTagIndex(ctx context.Context) (*pb.SearchBuildsResponse, error) {
   326  	// Have checked earlier that len(IndexedTags) > 0.
   327  	// Choose the most selective tag to search by.
   328  	indexedTag := IndexedTags(q.Tags)[0]
   329  	k, v := strpair.Parse(indexedTag)
   330  
   331  	// Load tag index entries and put them to a min-heap, sorted by build ID.
   332  	entries, err := model.SearchTagIndex(ctx, k, v)
   333  	if err != nil {
   334  		return nil, err
   335  	}
   336  
   337  	var eHeap minHeap
   338  	switch filteredEntries, err := q.filterEntries(ctx, entries); {
   339  	case err != nil:
   340  		return nil, err
   341  	case len(filteredEntries) == 0:
   342  		return &pb.SearchBuildsResponse{}, nil
   343  	default:
   344  		eHeap = filteredEntries
   345  	}
   346  	heap.Init(&eHeap)
   347  
   348  	// Find the builds.
   349  	results := make([]*pb.Build, 0, q.PageSize) // Ordered by build id by ascending.
   350  	var lastConsideredEntry *model.TagIndexEntry
   351  	inconsistentEntries := 0
   352  	var entriesToFetch []*model.TagIndexEntry
   353  	tags := q.Tags.Format()
   354  
   355  	// We don't record "-luci.non_production" on every build, so when the user
   356  	// asked for this filter, we replace it with a negated filter for the opposite
   357  	// experiment (i.e. `"+luci.non_production" not in b.experiments`).
   358  	//
   359  	// We could use b.ExperimentStatus here, but since we have to convert
   360  	// b.Experiments to a stringset anyway, we avoid looping twice by checking
   361  	// if nonProdFilter is in that stringset.
   362  	expFilter := q.ExperimentFilters.Dup()
   363  	nonProdFilter := ""
   364  	if expFilter.Del("-" + bb.ExperimentNonProduction) {
   365  		nonProdFilter = "+" + bb.ExperimentNonProduction
   366  	}
   367  
   368  	for len(results) < int(q.PageSize) {
   369  		toFetchCount := int(q.PageSize) - len(results)
   370  		entriesToFetch = entriesToFetch[:0]
   371  		for eHeap.Len() > 0 && len(entriesToFetch) < toFetchCount {
   372  			entry := heap.Pop(&eHeap).(*model.TagIndexEntry)
   373  			prev := lastConsideredEntry
   374  			lastConsideredEntry = entry
   375  			// Tolerate duplicates.
   376  			if prev != nil && prev.BuildID == entry.BuildID {
   377  				continue
   378  			}
   379  			entriesToFetch = append(entriesToFetch, entry)
   380  		}
   381  
   382  		if len(entriesToFetch) == 0 {
   383  			break
   384  		}
   385  
   386  		// Fetch builds
   387  		builds := make([]*model.Build, len(entriesToFetch))
   388  		for i, e := range entriesToFetch {
   389  			builds[i] = &model.Build{ID: e.BuildID}
   390  		}
   391  		// The non-existent builds will be filtered out in the filtering builds for-loop as they have no tags.
   392  		if err := model.GetIgnoreMissing(ctx, builds); err != nil {
   393  			logging.Errorf(ctx, "error fetching builds on fetchOnTagIndex code path : %s", err)
   394  			return nil, errors.Annotate(err, "error fetching builds").Err()
   395  		}
   396  
   397  		// Filter builds
   398  		for i, b := range builds {
   399  			buildTags := stringset.NewFromSlice(b.Tags...)
   400  			// Check for inconsistent entries.
   401  			if b.BucketID != entriesToFetch[i].BucketID || !buildTags.Has(indexedTag) {
   402  				logging.Warningf(ctx, "entry with build_id %d is inconsistent", b.ID)
   403  				inconsistentEntries++
   404  				continue
   405  			}
   406  			// Check user-supplied filters.
   407  			if !buildTags.HasAll(tags...) ||
   408  				(q.Status == pb.Status_ENDED_MASK && b.Incomplete) ||
   409  				(q.Status != pb.Status_STATUS_UNSPECIFIED && q.Status != pb.Status_ENDED_MASK && q.Status != b.Status) ||
   410  				(q.CreatedBy != "" && q.CreatedBy != b.CreatedBy) ||
   411  				(q.Builder.GetBuilder() != "" && b.Proto.Builder.Builder != q.Builder.Builder) ||
   412  				(q.Builder.GetProject() != "" && b.Proto.Builder.Project != q.Builder.Project) {
   413  				continue
   414  			}
   415  
   416  			bExps := stringset.NewFromSlice(b.Experiments...)
   417  			if !bExps.Contains(expFilter) {
   418  				continue
   419  			}
   420  			if nonProdFilter != "" && bExps.Has(nonProdFilter) {
   421  				continue
   422  			}
   423  
   424  			results = append(results, b.ToSimpleBuildProto(ctx))
   425  		}
   426  	}
   427  	// TODO(crbug/1090540): add metrics for inconsistentEntries.
   428  	rsp := &pb.SearchBuildsResponse{
   429  		Builds: results,
   430  	}
   431  	if len(results) == int(q.PageSize) && lastConsideredEntry != nil {
   432  		rsp.NextPageToken = fmt.Sprintf("id>%d", lastConsideredEntry.BuildID)
   433  	}
   434  	return rsp, nil
   435  }
   436  
   437  // filterEntries filters tag index entries by the build ID ranges and buckets
   438  // conditions in the Query.
   439  func (q *Query) filterEntries(ctx context.Context, entries []*model.TagIndexEntry) ([]*model.TagIndexEntry, error) {
   440  	idLow, idHigh := q.idRange()
   441  	if idHigh == 0 {
   442  		idHigh = int64(uint64(1)<<63 - 1)
   443  	}
   444  	if idLow >= idHigh {
   445  		return nil, nil
   446  	}
   447  
   448  	bucketID := protoutil.FormatBucketID(q.Builder.GetProject(), q.Builder.GetBucket())
   449  	preprocessed := make([]*model.TagIndexEntry, 0, len(entries))
   450  	// A cache whether the user has the access permission to buckets.
   451  	hasAccessCache := map[string]bool{}
   452  	for _, e := range entries {
   453  		if e.BuildID < idLow || e.BuildID >= idHigh {
   454  			continue
   455  		}
   456  		// If the bucket in query is not specified, the permission was not checked earlier.
   457  		// In this case, check the permission.
   458  		if q.Builder.GetBucket() == "" {
   459  			has, ok := hasAccessCache[e.BucketID]
   460  			if !ok {
   461  				proj, bkt, _ := protoutil.ParseBucketID(e.BucketID)
   462  				if err := perm.HasInBucket(ctx, bbperms.BuildsList, proj, bkt); err == nil {
   463  					has = true
   464  				} else {
   465  					status, ok := appstatus.Get(err)
   466  					if !ok || (status.Code() != codes.PermissionDenied && status.Code() != codes.NotFound) {
   467  						return nil, err
   468  					}
   469  				}
   470  				hasAccessCache[e.BucketID] = has
   471  			}
   472  			if !has {
   473  				continue
   474  			}
   475  		} else if bucketID != e.BucketID {
   476  			continue
   477  		}
   478  		preprocessed = append(preprocessed, e)
   479  	}
   480  	return preprocessed, nil
   481  }
   482  
   483  // idRange computes the id range from q.BuildIdLow/q.BuildIdHigh, q.StartTime/q.EndTime and q.StartCursor.
   484  // Returning 0 means no boundary.
   485  func (q *Query) idRange() (idLow, idHigh int64) {
   486  	if q.BuildIDLow != 0 || q.BuildIDHigh != 0 {
   487  		idLow, idHigh = q.BuildIDLow, q.BuildIDHigh
   488  	} else {
   489  		idLow, idHigh = buildid.IDRange(q.StartTime, q.EndTime)
   490  	}
   491  
   492  	if q.PageToken != "" {
   493  		if minExclusiveID, _ := strconv.ParseInt(q.PageToken[len("id>"):], 10, 64); minExclusiveID+1 > idLow {
   494  			idLow = minExclusiveID + 1
   495  		}
   496  	}
   497  	return
   498  }
   499  
   500  // fixPageSize ensures the size is positive and less than or equal to maxPageSize.
   501  func fixPageSize(size int32) int32 {
   502  	switch {
   503  	case size <= 0:
   504  		return defaultPageSize
   505  	case size > maxPageSize:
   506  		return maxPageSize
   507  	default:
   508  		return size
   509  	}
   510  }
   511  
   512  // fixPageSize ensures the createdBy identity string is the format "kind:value".
   513  func fixCreatedBy(createdBy string) string {
   514  	if createdBy != "" && !strings.Contains(createdBy, ":") {
   515  		createdBy = fmt.Sprintf("user:%s", createdBy)
   516  	}
   517  	return createdBy
   518  }
   519  
   520  // mustTimestamp converts a protobuf timestamp to a time.Time and panics on failures.
   521  // It returns zero time for nil timestamp.
   522  func mustTimestamp(ts *timestamppb.Timestamp) time.Time {
   523  	if ts == nil {
   524  		return time.Time{}
   525  	}
   526  
   527  	if err := ts.CheckValid(); err != nil {
   528  		panic(err)
   529  	}
   530  	t := ts.AsTime()
   531  	return t
   532  }
   533  
   534  // minHeap holds a slice of TagIndexEntry and implements heap.Interface.
   535  type minHeap []*model.TagIndexEntry
   536  
   537  var _ heap.Interface = &minHeap{}
   538  
   539  func (m minHeap) Len() int { return len(m) }
   540  
   541  func (m minHeap) Less(i, j int) bool { return m[i].BuildID < m[j].BuildID }
   542  
   543  func (m minHeap) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
   544  
   545  func (m *minHeap) Push(x any) {
   546  	*m = append(*m, x.(*model.TagIndexEntry))
   547  }
   548  
   549  func (m *minHeap) Pop() any {
   550  	old := *m
   551  	n := len(old)
   552  	item := old[n-1]
   553  	*m = old[0 : n-1]
   554  	return item
   555  }