go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/buildbucket/appengine/internal/search/query.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package search 16 17 import ( 18 "container/heap" 19 "context" 20 "fmt" 21 "regexp" 22 "strconv" 23 "strings" 24 "time" 25 26 "google.golang.org/grpc/codes" 27 "google.golang.org/protobuf/types/known/timestamppb" 28 29 "go.chromium.org/luci/auth/identity" 30 "go.chromium.org/luci/common/data/stringset" 31 "go.chromium.org/luci/common/data/strpair" 32 "go.chromium.org/luci/common/errors" 33 "go.chromium.org/luci/common/logging" 34 "go.chromium.org/luci/common/sync/parallel" 35 "go.chromium.org/luci/gae/service/datastore" 36 "go.chromium.org/luci/grpc/appstatus" 37 38 bb "go.chromium.org/luci/buildbucket" 39 "go.chromium.org/luci/buildbucket/appengine/internal/buildid" 40 "go.chromium.org/luci/buildbucket/appengine/internal/perm" 41 "go.chromium.org/luci/buildbucket/appengine/model" 42 "go.chromium.org/luci/buildbucket/bbperms" 43 pb "go.chromium.org/luci/buildbucket/proto" 44 "go.chromium.org/luci/buildbucket/protoutil" 45 ) 46 47 const ( 48 defaultPageSize = 100 49 maxPageSize = 1000 50 ) 51 52 var ( 53 PageTokenRegex = regexp.MustCompile(`^id>\d+$`) 54 ) 55 56 // Query is the intermediate to store the arguments for ds search query. 57 type Query struct { 58 Builder *pb.BuilderID 59 Tags strpair.Map 60 Status pb.Status 61 CreatedBy identity.Identity 62 StartTime time.Time 63 EndTime time.Time 64 ExperimentFilters stringset.Set 65 BuildIDHigh int64 66 BuildIDLow int64 67 DescendantOf int64 68 ChildOf int64 69 PageSize int32 70 PageToken string 71 } 72 73 // NewQuery builds a Query from a pb.SearchBuildsRequest. 74 // It assumes req is valid, otherwise may panic. 75 func NewQuery(req *pb.SearchBuildsRequest) *Query { 76 if req.GetPredicate() == nil { 77 return &Query{ 78 PageSize: fixPageSize(req.GetPageSize()), 79 PageToken: req.GetPageToken(), 80 } 81 } 82 83 p := req.Predicate 84 q := &Query{ 85 Builder: p.GetBuilder(), 86 Tags: protoutil.StringPairMap(p.Tags), 87 Status: p.Status, 88 CreatedBy: identity.Identity(fixCreatedBy(p.CreatedBy)), 89 StartTime: mustTimestamp(p.CreateTime.GetStartTime()), 90 EndTime: mustTimestamp(p.CreateTime.GetEndTime()), 91 ExperimentFilters: stringset.NewFromSlice(p.Experiments...), 92 DescendantOf: p.DescendantOf, 93 ChildOf: p.ChildOf, 94 PageSize: fixPageSize(req.PageSize), 95 PageToken: req.PageToken, 96 } 97 98 // Filter by gerrit changes. 99 for _, change := range p.GerritChanges { 100 q.Tags.Add("buildset", protoutil.GerritBuildSet(change)) 101 } 102 103 // Filter by build range. 104 // BuildIds less or equal to 0 means no boundary. 105 // Convert BuildRange to [buildLow, buildHigh). 106 // Note that unlike buildLow/buildHigh, BuildRange in req encapsulates the fact 107 // that build ids are decreasing. So we need to reverse the order. 108 if p.Build.GetStartBuildId() > 0 { 109 // Add 1 because startBuildId is inclusive and buildHigh is exclusive. 110 q.BuildIDHigh = p.Build.GetStartBuildId() + 1 111 } 112 if p.Build.GetEndBuildId() > 0 { 113 // Subtract 1 because endBuildId is exclusive and buildLow is inclusive. 114 q.BuildIDLow = p.Build.GetEndBuildId() - 1 115 } 116 117 // Filter by canary. Note that validateExperiment has already verified that 118 // p.Experiments doesn't contain a filter for ExperimentBBCanarySoftware. 119 if c := p.GetCanary(); c == pb.Trinary_YES { 120 q.ExperimentFilters.Add("+" + bb.ExperimentBBCanarySoftware) 121 } else if c == pb.Trinary_NO { 122 q.ExperimentFilters.Add("-" + bb.ExperimentBBCanarySoftware) 123 } 124 125 // Apply IncludeExperimental. 126 // 127 // If the user explicitly requested builds which were non_production, then we 128 // ignore this. 129 if !p.IncludeExperimental && !q.ExperimentFilters.Has("+"+bb.ExperimentNonProduction) { 130 q.ExperimentFilters.Add("-" + bb.ExperimentNonProduction) 131 } 132 133 return q 134 } 135 136 // IndexedTags returns the indexed tags. 137 func IndexedTags(tags strpair.Map) []string { 138 set := make(stringset.Set) 139 for k, vals := range tags { 140 if k != "buildset" && k != "build_address" { 141 continue 142 } 143 for _, val := range vals { 144 set.Add(strpair.Format(k, val)) 145 } 146 } 147 return set.ToSortedSlice() 148 } 149 150 // UpdateTagIndex updates the tag index for the given builds. Panics if any 151 // build.Proto.Builder is unspecified. 152 func UpdateTagIndex(ctx context.Context, builds []*model.Build) errors.MultiError { 153 merr := make(errors.MultiError, len(builds)) 154 tagToBldIdx := make(map[string][]int) // tag -> builds indexes 155 indexEntries := make(map[string][]model.TagIndexEntry) // tag -> entries 156 157 for i, b := range builds { 158 for _, t := range IndexedTags(strpair.ParseMap(b.Tags)) { 159 indexEntries[t] = append(indexEntries[t], model.TagIndexEntry{ 160 BuildID: b.ID, 161 BucketID: protoutil.FormatBucketID(b.Proto.Builder.Project, b.Proto.Builder.Bucket), 162 CreatedTime: mustTimestamp(b.Proto.CreateTime), 163 }) 164 tagToBldIdx[t] = append(tagToBldIdx[t], i) 165 } 166 } 167 _ = parallel.WorkPool(64, func(work chan<- func() error) { 168 for tag, ents := range indexEntries { 169 tag := tag 170 ents := ents 171 work <- func() error { 172 if err := model.UpdateTagIndex(ctx, tag, ents); err != nil { 173 for _, i := range tagToBldIdx[tag] { 174 merr[i] = err 175 } 176 } 177 return nil 178 } 179 } 180 }) 181 182 if merr.First() != nil { 183 return merr 184 } 185 return nil 186 } 187 188 // Fetch performs main build search logic. 189 func (q *Query) Fetch(ctx context.Context) (*pb.SearchBuildsResponse, error) { 190 if !buildid.MayContainBuilds(q.StartTime, q.EndTime) { 191 return &pb.SearchBuildsResponse{}, nil 192 } 193 194 // Verify bucket ACL permission. 195 if q.Builder != nil && q.Builder.Bucket != "" { 196 if err := perm.HasInBuilder(ctx, bbperms.BuildsList, q.Builder); err != nil { 197 return nil, err 198 } 199 } 200 201 cpy := *q 202 q = &cpy 203 q.PageSize = fixPageSize(q.PageSize) 204 // Determine which subflow - directly query on Builds or on TagIndex. 205 if len(IndexedTags(q.Tags)) != 0 { 206 switch res, err := q.fetchOnTagIndex(ctx); { 207 case model.TagIndexIncomplete.In(err): 208 logging.Warningf(ctx, "Falling back to querying search on builds") 209 case err != nil: 210 return nil, err 211 default: 212 return res, nil 213 } 214 } 215 216 return q.fetchOnBuild(ctx) 217 } 218 219 // fetchOnBuild fetches directly on Build entity. 220 func (q *Query) fetchOnBuild(ctx context.Context) (*pb.SearchBuildsResponse, error) { 221 dq := datastore.NewQuery(model.BuildKind) 222 223 for _, tag := range q.Tags.Format() { 224 dq = dq.Eq("tags", tag) 225 } 226 227 switch { 228 case q.Status == pb.Status_ENDED_MASK: 229 dq = dq.Eq("incomplete", false) 230 case q.Status != pb.Status_STATUS_UNSPECIFIED: 231 dq = dq.Eq("status_v2", q.Status) 232 } 233 234 if q.CreatedBy != "" { 235 dq = dq.Eq("created_by", q.CreatedBy) 236 } 237 238 var dropExperimental bool 239 q.ExperimentFilters.Iter(func(filter string) bool { 240 if filter[0] == '-' && filter[1:] == bb.ExperimentNonProduction { 241 // filter these in post 242 dropExperimental = true 243 } else { 244 dq = dq.Eq("experiments", filter) 245 } 246 return true 247 }) 248 249 idLow, idHigh := q.idRange() 250 if idLow != 0 { 251 dq = dq.Gte("__key__", datastore.KeyForObj(ctx, &model.Build{ID: idLow})) 252 } 253 if idHigh != 0 { 254 dq = dq.Lt("__key__", datastore.KeyForObj(ctx, &model.Build{ID: idHigh})) 255 } 256 if idLow != 0 && idHigh != 0 && idLow >= idHigh { 257 return &pb.SearchBuildsResponse{}, nil 258 } 259 260 if q.DescendantOf != 0 { 261 dq = dq.Eq("ancestor_ids", q.DescendantOf) 262 } 263 264 if q.ChildOf != 0 { 265 dq = dq.Eq("parent_id", q.ChildOf) 266 } 267 268 var queries []*datastore.Query 269 var buckets []string 270 var err error 271 switch { 272 case q.Builder.GetBuilder() != "": 273 queries = append(queries, dq.Eq("builder_id", protoutil.FormatBuilderID(q.Builder))) 274 case q.Builder.GetBucket() != "": 275 buckets = []string{protoutil.FormatBucketID(q.Builder.Project, q.Builder.Bucket)} 276 default: 277 switch buckets, err = perm.BucketsByPerm(ctx, bbperms.BuildersList, q.Builder.GetProject()); { 278 case err != nil: 279 return nil, errors.Annotate(err, "error fetching accessible buckets").Err() 280 case len(buckets) == 0: 281 return &pb.SearchBuildsResponse{}, nil 282 } 283 } 284 285 for _, bucket := range buckets { 286 queries = append(queries, dq.Eq("bucket_id", bucket)) 287 } 288 289 rsp := &pb.SearchBuildsResponse{} 290 logging.Debugf(ctx, "datastore query for FetchOnBuild: %v", queries) 291 err = datastore.RunMulti(ctx, queries, func(b *model.Build) error { 292 if len(rsp.Builds) >= int(q.PageSize) { 293 return datastore.Stop 294 } 295 296 // Check the build status again, as the index might be stale. 297 if q.Status != pb.Status_STATUS_UNSPECIFIED && 298 q.Status != pb.Status_ENDED_MASK && 299 q.Status != b.Status { 300 return nil 301 } 302 303 // Filter non-production builds here instead of at the datastore level to 304 // reduce the zigzag merge in index scans as the majority of builds are 305 // production. 306 if dropExperimental && b.ExperimentStatus(bb.ExperimentNonProduction) == pb.Trinary_YES { 307 return nil 308 } 309 310 rsp.Builds = append(rsp.Builds, b.ToSimpleBuildProto(ctx)) 311 return nil 312 }) 313 if err != nil { 314 return nil, err 315 } 316 317 if len(rsp.Builds) == int(q.PageSize) { 318 rsp.NextPageToken = fmt.Sprintf("id>%d", rsp.Builds[q.PageSize-1].Id) 319 } 320 321 return rsp, nil 322 } 323 324 // fetchOnTagIndex searches for builds using the TagIndex entities. 325 func (q *Query) fetchOnTagIndex(ctx context.Context) (*pb.SearchBuildsResponse, error) { 326 // Have checked earlier that len(IndexedTags) > 0. 327 // Choose the most selective tag to search by. 328 indexedTag := IndexedTags(q.Tags)[0] 329 k, v := strpair.Parse(indexedTag) 330 331 // Load tag index entries and put them to a min-heap, sorted by build ID. 332 entries, err := model.SearchTagIndex(ctx, k, v) 333 if err != nil { 334 return nil, err 335 } 336 337 var eHeap minHeap 338 switch filteredEntries, err := q.filterEntries(ctx, entries); { 339 case err != nil: 340 return nil, err 341 case len(filteredEntries) == 0: 342 return &pb.SearchBuildsResponse{}, nil 343 default: 344 eHeap = filteredEntries 345 } 346 heap.Init(&eHeap) 347 348 // Find the builds. 349 results := make([]*pb.Build, 0, q.PageSize) // Ordered by build id by ascending. 350 var lastConsideredEntry *model.TagIndexEntry 351 inconsistentEntries := 0 352 var entriesToFetch []*model.TagIndexEntry 353 tags := q.Tags.Format() 354 355 // We don't record "-luci.non_production" on every build, so when the user 356 // asked for this filter, we replace it with a negated filter for the opposite 357 // experiment (i.e. `"+luci.non_production" not in b.experiments`). 358 // 359 // We could use b.ExperimentStatus here, but since we have to convert 360 // b.Experiments to a stringset anyway, we avoid looping twice by checking 361 // if nonProdFilter is in that stringset. 362 expFilter := q.ExperimentFilters.Dup() 363 nonProdFilter := "" 364 if expFilter.Del("-" + bb.ExperimentNonProduction) { 365 nonProdFilter = "+" + bb.ExperimentNonProduction 366 } 367 368 for len(results) < int(q.PageSize) { 369 toFetchCount := int(q.PageSize) - len(results) 370 entriesToFetch = entriesToFetch[:0] 371 for eHeap.Len() > 0 && len(entriesToFetch) < toFetchCount { 372 entry := heap.Pop(&eHeap).(*model.TagIndexEntry) 373 prev := lastConsideredEntry 374 lastConsideredEntry = entry 375 // Tolerate duplicates. 376 if prev != nil && prev.BuildID == entry.BuildID { 377 continue 378 } 379 entriesToFetch = append(entriesToFetch, entry) 380 } 381 382 if len(entriesToFetch) == 0 { 383 break 384 } 385 386 // Fetch builds 387 builds := make([]*model.Build, len(entriesToFetch)) 388 for i, e := range entriesToFetch { 389 builds[i] = &model.Build{ID: e.BuildID} 390 } 391 // The non-existent builds will be filtered out in the filtering builds for-loop as they have no tags. 392 if err := model.GetIgnoreMissing(ctx, builds); err != nil { 393 logging.Errorf(ctx, "error fetching builds on fetchOnTagIndex code path : %s", err) 394 return nil, errors.Annotate(err, "error fetching builds").Err() 395 } 396 397 // Filter builds 398 for i, b := range builds { 399 buildTags := stringset.NewFromSlice(b.Tags...) 400 // Check for inconsistent entries. 401 if b.BucketID != entriesToFetch[i].BucketID || !buildTags.Has(indexedTag) { 402 logging.Warningf(ctx, "entry with build_id %d is inconsistent", b.ID) 403 inconsistentEntries++ 404 continue 405 } 406 // Check user-supplied filters. 407 if !buildTags.HasAll(tags...) || 408 (q.Status == pb.Status_ENDED_MASK && b.Incomplete) || 409 (q.Status != pb.Status_STATUS_UNSPECIFIED && q.Status != pb.Status_ENDED_MASK && q.Status != b.Status) || 410 (q.CreatedBy != "" && q.CreatedBy != b.CreatedBy) || 411 (q.Builder.GetBuilder() != "" && b.Proto.Builder.Builder != q.Builder.Builder) || 412 (q.Builder.GetProject() != "" && b.Proto.Builder.Project != q.Builder.Project) { 413 continue 414 } 415 416 bExps := stringset.NewFromSlice(b.Experiments...) 417 if !bExps.Contains(expFilter) { 418 continue 419 } 420 if nonProdFilter != "" && bExps.Has(nonProdFilter) { 421 continue 422 } 423 424 results = append(results, b.ToSimpleBuildProto(ctx)) 425 } 426 } 427 // TODO(crbug/1090540): add metrics for inconsistentEntries. 428 rsp := &pb.SearchBuildsResponse{ 429 Builds: results, 430 } 431 if len(results) == int(q.PageSize) && lastConsideredEntry != nil { 432 rsp.NextPageToken = fmt.Sprintf("id>%d", lastConsideredEntry.BuildID) 433 } 434 return rsp, nil 435 } 436 437 // filterEntries filters tag index entries by the build ID ranges and buckets 438 // conditions in the Query. 439 func (q *Query) filterEntries(ctx context.Context, entries []*model.TagIndexEntry) ([]*model.TagIndexEntry, error) { 440 idLow, idHigh := q.idRange() 441 if idHigh == 0 { 442 idHigh = int64(uint64(1)<<63 - 1) 443 } 444 if idLow >= idHigh { 445 return nil, nil 446 } 447 448 bucketID := protoutil.FormatBucketID(q.Builder.GetProject(), q.Builder.GetBucket()) 449 preprocessed := make([]*model.TagIndexEntry, 0, len(entries)) 450 // A cache whether the user has the access permission to buckets. 451 hasAccessCache := map[string]bool{} 452 for _, e := range entries { 453 if e.BuildID < idLow || e.BuildID >= idHigh { 454 continue 455 } 456 // If the bucket in query is not specified, the permission was not checked earlier. 457 // In this case, check the permission. 458 if q.Builder.GetBucket() == "" { 459 has, ok := hasAccessCache[e.BucketID] 460 if !ok { 461 proj, bkt, _ := protoutil.ParseBucketID(e.BucketID) 462 if err := perm.HasInBucket(ctx, bbperms.BuildsList, proj, bkt); err == nil { 463 has = true 464 } else { 465 status, ok := appstatus.Get(err) 466 if !ok || (status.Code() != codes.PermissionDenied && status.Code() != codes.NotFound) { 467 return nil, err 468 } 469 } 470 hasAccessCache[e.BucketID] = has 471 } 472 if !has { 473 continue 474 } 475 } else if bucketID != e.BucketID { 476 continue 477 } 478 preprocessed = append(preprocessed, e) 479 } 480 return preprocessed, nil 481 } 482 483 // idRange computes the id range from q.BuildIdLow/q.BuildIdHigh, q.StartTime/q.EndTime and q.StartCursor. 484 // Returning 0 means no boundary. 485 func (q *Query) idRange() (idLow, idHigh int64) { 486 if q.BuildIDLow != 0 || q.BuildIDHigh != 0 { 487 idLow, idHigh = q.BuildIDLow, q.BuildIDHigh 488 } else { 489 idLow, idHigh = buildid.IDRange(q.StartTime, q.EndTime) 490 } 491 492 if q.PageToken != "" { 493 if minExclusiveID, _ := strconv.ParseInt(q.PageToken[len("id>"):], 10, 64); minExclusiveID+1 > idLow { 494 idLow = minExclusiveID + 1 495 } 496 } 497 return 498 } 499 500 // fixPageSize ensures the size is positive and less than or equal to maxPageSize. 501 func fixPageSize(size int32) int32 { 502 switch { 503 case size <= 0: 504 return defaultPageSize 505 case size > maxPageSize: 506 return maxPageSize 507 default: 508 return size 509 } 510 } 511 512 // fixPageSize ensures the createdBy identity string is the format "kind:value". 513 func fixCreatedBy(createdBy string) string { 514 if createdBy != "" && !strings.Contains(createdBy, ":") { 515 createdBy = fmt.Sprintf("user:%s", createdBy) 516 } 517 return createdBy 518 } 519 520 // mustTimestamp converts a protobuf timestamp to a time.Time and panics on failures. 521 // It returns zero time for nil timestamp. 522 func mustTimestamp(ts *timestamppb.Timestamp) time.Time { 523 if ts == nil { 524 return time.Time{} 525 } 526 527 if err := ts.CheckValid(); err != nil { 528 panic(err) 529 } 530 t := ts.AsTime() 531 return t 532 } 533 534 // minHeap holds a slice of TagIndexEntry and implements heap.Interface. 535 type minHeap []*model.TagIndexEntry 536 537 var _ heap.Interface = &minHeap{} 538 539 func (m minHeap) Len() int { return len(m) } 540 541 func (m minHeap) Less(i, j int) bool { return m[i].BuildID < m[j].BuildID } 542 543 func (m minHeap) Swap(i, j int) { m[i], m[j] = m[j], m[i] } 544 545 func (m *minHeap) Push(x any) { 546 *m = append(*m, x.(*model.TagIndexEntry)) 547 } 548 549 func (m *minHeap) Pop() any { 550 old := *m 551 n := len(old) 552 item := old[n-1] 553 *m = old[0 : n-1] 554 return item 555 }