go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/scheduler/appengine/engine/invquery.go (about)

     1  // Copyright 2018 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package engine
    16  
    17  import (
    18  	"context"
    19  	"encoding/base64"
    20  	"sort"
    21  
    22  	"google.golang.org/protobuf/proto"
    23  
    24  	"go.chromium.org/luci/gae/service/datastore"
    25  
    26  	"go.chromium.org/luci/common/clock"
    27  	"go.chromium.org/luci/common/errors"
    28  	"go.chromium.org/luci/common/logging"
    29  	"go.chromium.org/luci/common/retry/transient"
    30  
    31  	"go.chromium.org/luci/scheduler/appengine/internal"
    32  )
    33  
    34  // invQuery abstracts a query that fetches invocations in order of their IDs,
    35  // smallest to largest.
    36  //
    37  // Think of it as a pointer to the head of the query, that can be advanced on
    38  // demand.
    39  type invQuery interface {
    40  	// peek returns the invocation the query currently points to.
    41  	//
    42  	// It is fetched when 'advance' is called. A first call to 'peek' may also
    43  	// initiate a fetch (to grab the first ever item).
    44  	//
    45  	// Returns nil if there's no more invocations to fetch. Returns an error if
    46  	// the fetch failed.
    47  	peek() (*Invocation, error)
    48  
    49  	// advance fetches the next invocation to be returned by 'peek'.
    50  	//
    51  	// Returns an error if this operation fails. Reaching the end of the results
    52  	// is not an error. If this happened, next 'peek' returns nil, and keeps
    53  	// returning nil forever.
    54  	advance() error
    55  }
    56  
    57  // mergeInvQueries merges results of multiple queries together.
    58  //
    59  // It picks smallest IDs first. In presence of duplicates, it favors queries
    60  // that are listed in 'qs' earlier.
    61  //
    62  // Places the results into 'out' slice, returning the extended slice at the end.
    63  //
    64  // Returns (..., true, nil) in case results of all queries has been exhausted,
    65  // and (..., false, nil) if it hit the limit on number of results.
    66  func mergeInvQueries(qs []invQuery, limit int, out []*Invocation) ([]*Invocation, bool, error) {
    67  	maxLen := len(out) + limit
    68  
    69  	for {
    70  		// Find the smallest invocation from heads of all queries. Do it even if
    71  		// already reached the limit, to check whether there are more items left.
    72  		var smallest *Invocation
    73  		for _, q := range qs {
    74  			inv, err := q.peek()
    75  			if err != nil {
    76  				return nil, false, err
    77  			}
    78  			if inv == nil {
    79  				continue // exhausted results of this query
    80  			}
    81  			if smallest == nil || inv.ID < smallest.ID {
    82  				smallest = inv
    83  			}
    84  		}
    85  
    86  		switch {
    87  		case smallest == nil:
    88  			return out, true, nil // exhausted results of all queries
    89  		case len(out) == maxLen:
    90  			return out, false, nil // actually filled up to the limit
    91  		}
    92  		out = append(out, smallest)
    93  
    94  		// There may be duplicates in the queries, so need to pop the consumed
    95  		// invocation from all queries.
    96  		for _, q := range qs {
    97  			for {
    98  				inv, err := q.peek()
    99  				if err != nil {
   100  					return nil, false, err
   101  				}
   102  				if inv == nil || inv.ID > smallest.ID {
   103  					break // found something larger at the head
   104  				}
   105  				if err := q.advance(); err != nil {
   106  					return nil, false, err
   107  				}
   108  			}
   109  		}
   110  	}
   111  }
   112  
   113  ////////////////////////////////////////////////////////////////////////////////
   114  // List based queries.
   115  
   116  // invListQuery implements invQuery on top of a sorted list of Invocations.
   117  //
   118  // The list is assumed to be sorted by IDs in smallest-to-largest order. This is
   119  // also the order in which invocations will be returned.
   120  type invListQuery struct {
   121  	invs []*Invocation
   122  	cur  int
   123  }
   124  
   125  func (q *invListQuery) peek() (*Invocation, error) {
   126  	if q.cur == len(q.invs) {
   127  		return nil, nil
   128  	}
   129  	return q.invs[q.cur], nil
   130  }
   131  
   132  func (q *invListQuery) advance() error {
   133  	if q.cur < len(q.invs) {
   134  		q.cur++
   135  	}
   136  	return nil
   137  }
   138  
   139  // activeInvQuery returns invQuery that emits active invocations, as fetched
   140  // from the job.ActiveInvocations field.
   141  //
   142  // Smallest IDs are returned first. IDs smaller than or equal to lastScanned are
   143  // skipped (this is used for pagination).
   144  func activeInvQuery(c context.Context, j *Job, lastScanned int64) *invListQuery {
   145  	var invs []*Invocation
   146  	for _, id := range j.ActiveInvocations {
   147  		if id > lastScanned {
   148  			invs = append(invs, &Invocation{ID: id})
   149  		}
   150  	}
   151  	sort.Slice(invs, func(l, r int) bool { return invs[l].ID < invs[r].ID })
   152  	return &invListQuery{invs: invs}
   153  }
   154  
   155  // recentInvQuery returns invQuery that emits recently finished invocations, as
   156  // fetched from the job.FinishedInvocationsRaw field.
   157  //
   158  // Smallest IDs are returned first. IDs smaller than or equal to lastScanned are
   159  // skipped (this is used for pagination).
   160  func recentInvQuery(c context.Context, j *Job, lastScanned int64) *invListQuery {
   161  	finished, err := filteredFinishedInvs(
   162  		j.FinishedInvocationsRaw, clock.Now(c).Add(-FinishedInvocationsHorizon))
   163  	if err != nil {
   164  		logging.WithError(err).Errorf(c, "Failed to unmarshal FinishedInvocationsRaw, skipping")
   165  		return &invListQuery{}
   166  	}
   167  
   168  	var invs []*Invocation
   169  	for _, inv := range finished {
   170  		if inv.InvocationId > lastScanned {
   171  			invs = append(invs, &Invocation{ID: inv.InvocationId})
   172  		}
   173  	}
   174  	sort.Slice(invs, func(l, r int) bool { return invs[l].ID < invs[r].ID })
   175  	return &invListQuery{invs: invs}
   176  }
   177  
   178  ////////////////////////////////////////////////////////////////////////////////
   179  // Datastore based queries.
   180  
   181  // invDatastoreIter is a wrapper over datastore query that makes it look more
   182  // like an iterator.
   183  //
   184  // Intended usage:
   185  //
   186  //	it.start(...)
   187  //	defer it.stop()
   188  //	for len(results) != pageSize {
   189  //	  switch item, err := it.next(); {
   190  //	  case err != nil:
   191  //	    return nil, err // RPC error
   192  //	  case item == nil:
   193  //	    ...
   194  //	    return results, nil // fetched all available results
   195  //	  default:
   196  //	    results = append(results, item)
   197  //	  }
   198  //	}
   199  //	return results // fetched the full page
   200  type invDatastoreIter struct {
   201  	results chan *Invocation // receives results of the query
   202  	done    chan struct{}    // closed when 'stop' is called
   203  	err     error            // error status of the query, synchronized via 'results'
   204  	stopped bool             // true if 'stop' was called
   205  }
   206  
   207  // start initiates the query.
   208  //
   209  // The iterator is initially positioned before the first item, so that a call
   210  // to 'next' will return the first item.
   211  func (it *invDatastoreIter) start(c context.Context, query *datastore.Query) {
   212  	it.results = make(chan *Invocation)
   213  	it.done = make(chan struct{})
   214  	go func() {
   215  		defer close(it.results)
   216  		err := datastore.Run(c, query, func(obj *Invocation, cb datastore.CursorCB) error {
   217  			select {
   218  			case it.results <- obj:
   219  				return nil
   220  			case <-it.done:
   221  				return datastore.Stop
   222  			}
   223  		})
   224  		// Let 'next' and 'stop' know about the error. They look here if they
   225  		// receive 'nil' from the results channel (which happens if it is closed).
   226  		it.err = err
   227  	}()
   228  }
   229  
   230  // next fetches the next query item if there's one.
   231  //
   232  // Returns (nil, nil) if all items has been successfully fetched. If the query
   233  // failed, returns (nil, err).
   234  func (it *invDatastoreIter) next() (*Invocation, error) {
   235  	switch {
   236  	case it.results == nil:
   237  		panic("'next' is called before 'start'")
   238  	case it.stopped:
   239  		panic("'next' is called after 'stop'")
   240  	}
   241  	if inv, ok := <-it.results; ok {
   242  		return inv, nil
   243  	}
   244  	return nil, it.err // 'it.err' is valid only after the channel is closed
   245  }
   246  
   247  // stop finishes the query, killing the internal goroutine.
   248  //
   249  // Once 'stop' is called, calls to 'next' are forbidden. It is OK to call
   250  // 'stop' again though (it will return exact same value).
   251  func (it *invDatastoreIter) stop() error {
   252  	if !it.stopped {
   253  		it.stopped = true
   254  		close(it.done)         // signal the inner loop to wake up and exit
   255  		for range it.results { // wait for the results channel to close
   256  		}
   257  	}
   258  	return it.err
   259  }
   260  
   261  // invDatastoreQuery implements invQuery on top of a datastore iterator.
   262  //
   263  // The datastore query results are expected to be sorted by IDs in
   264  // smallest-to-largest order. This is also the order in which invocations will
   265  // be returned.
   266  type invDatastoreQuery struct {
   267  	iter invDatastoreIter // iterator positioned before the next result
   268  	head *Invocation      // value to return in peek() or nil if haven't fetched yet
   269  	err  error            // non-nil if the last fetch failed
   270  	done bool             // true if fetched everything we could
   271  }
   272  
   273  func (q *invDatastoreQuery) peek() (*Invocation, error) {
   274  	if q.done || q.err != nil {
   275  		return nil, q.err // in a final non-advancable state
   276  	}
   277  	if q.head == nil {
   278  		q.advance() // need to fetch the first item ever
   279  	}
   280  	return q.head, q.err
   281  }
   282  
   283  func (q *invDatastoreQuery) advance() error {
   284  	if q.done || q.err != nil {
   285  		return q.err // in a final non-advancable state
   286  	}
   287  	q.head, q.err = q.iter.next()
   288  	q.done = q.head == nil && q.err == nil
   289  	return q.err
   290  }
   291  
   292  func (q *invDatastoreQuery) close() {
   293  	q.iter.stop()
   294  }
   295  
   296  // finishedInvQuery returns invQuery that emits historical finished invocations,
   297  // of the given job.
   298  //
   299  // Smallest IDs are returned first. IDs smaller than or equal to lastScanned are
   300  // skipped (this is used for pagination).
   301  func finishedInvQuery(c context.Context, job *Job, lastScanned int64) *invDatastoreQuery {
   302  	q := datastore.NewQuery("Invocation")
   303  	q = q.Eq("IndexedJobID", job.JobID)
   304  	if lastScanned > 0 {
   305  		q = q.Gt("__key__", datastore.KeyForObj(c, &Invocation{ID: lastScanned}))
   306  	}
   307  	q = q.Order("__key__")
   308  	out := &invDatastoreQuery{}
   309  	out.iter.start(c, q)
   310  	return out
   311  }
   312  
   313  ////////////////////////////////////////////////////////////////////////////////
   314  // Cursor helpers.
   315  
   316  // decodeInvCursor deserializes a base64-encoded cursor.
   317  func decodeInvCursor(cursor string, cur *internal.InvocationsCursor) error {
   318  	if cursor == "" {
   319  		*cur = internal.InvocationsCursor{}
   320  		return nil
   321  	}
   322  
   323  	blob, err := base64.RawURLEncoding.DecodeString(cursor)
   324  	if err != nil {
   325  		return errors.Annotate(err, "failed to base64 decode the cursor").Err()
   326  	}
   327  
   328  	if err = proto.Unmarshal(blob, cur); err != nil {
   329  		return errors.Annotate(err, "failed to unmarshal the cursor").Err()
   330  	}
   331  
   332  	return nil
   333  }
   334  
   335  // encodeInvCursor serializes the cursor to base64-encoded string.
   336  func encodeInvCursor(cur *internal.InvocationsCursor) (string, error) {
   337  	if cur.LastScanned == 0 {
   338  		return "", nil
   339  	}
   340  
   341  	blob, err := proto.Marshal(cur)
   342  	if err != nil {
   343  		return "", err // must never actually happen
   344  	}
   345  
   346  	return base64.RawURLEncoding.EncodeToString(blob), nil
   347  }
   348  
   349  ////////////////////////////////////////////////////////////////////////////////
   350  // High level functions used by Engine.
   351  
   352  // invsPage contains information about a page returned by fetchInvsPage.
   353  type invsPage struct {
   354  	count       int   // number of invocations in the page
   355  	final       bool  // true if this is the final page
   356  	lastScanned int64 // ID of the last scanned invocation if 'final' is false
   357  }
   358  
   359  // fetchInvsPage fetches (perhaps incomplete or empty) page of invocations,
   360  // by merging results from multiple queries into the given 'out' slice.
   361  //
   362  // It is called (perhaps multiple times) by public ListInvocations to construct
   363  // a full page of results out of smaller incomplete pages.
   364  //
   365  // Returns the extended 'out' slice (that now contains fetched items) and
   366  // information about the fetched page.
   367  func fetchInvsPage(c context.Context, qs []invQuery, opts ListInvocationsOpts, out []*Invocation) ([]*Invocation, invsPage, error) {
   368  	prevSize := len(out)
   369  	out, final, err := mergeInvQueries(qs, opts.PageSize, out)
   370  	if err != nil {
   371  		return nil, invsPage{}, errors.Annotate(err, "failed to query invocations").Tag(transient.Tag).Err()
   372  	}
   373  
   374  	// Nothing new at all? We are done.
   375  	if len(out) == prevSize {
   376  		return out, invsPage{final: true}, nil
   377  	}
   378  
   379  	// Otherwise remember the last ID we looked at to resume our query from it. It
   380  	// is important to grab the ID before the filtering, otherwise we may end up
   381  	// stuck in an infinite loop that fetches an empty page (with all items
   382  	// filtered out) over and over again, not advancing the query.
   383  	lastScanned := out[len(out)-1].ID
   384  
   385  	// Inflate and filter (in-place) shallow entities resulted from queries over
   386  	// IDs list. Note that this may reduce the returned page size, in
   387  	// a pathological case to 0. 'ListInvocations' will compensate for that by
   388  	// calling 'fetchInvsPage' again to fetch more stuff until the full page is
   389  	// fetched.
   390  	filtered, err := fillShallowInvs(c, out[prevSize:], opts)
   391  	if err != nil {
   392  		return nil, invsPage{}, err
   393  	}
   394  
   395  	// 'filtered' points to a subslice of 'out' (located at the end), that has
   396  	// all filtered items now. Truncate 'out' to get rid of garbage left after
   397  	// the filtering.
   398  	out = out[:prevSize+len(filtered)]
   399  
   400  	return out, invsPage{len(out) - prevSize, final, lastScanned}, nil
   401  }
   402  
   403  // fillShallowInvs detects entities that do not have bodies fetched yet, fetches
   404  // them, and filters them based on ActiveOnly/FinishedOnly filter defined by
   405  // opts.
   406  //
   407  // This is needed for results of queries that use IDs inlined in the Job entity.
   408  // We detect such shallow entities by missing Status value, which is guaranteed
   409  // to be set for all Invocation entities.
   410  //
   411  // Filtering is required since the state of the entities fetched here may be
   412  // more up-to-date than the state used by queries. In particular, active
   413  // invocations may not be active anymore.
   414  //
   415  // Filters the given slice in-place and returns the filtered slice that shares
   416  // the same underlying array.
   417  func fillShallowInvs(c context.Context, invs []*Invocation, opts ListInvocationsOpts) ([]*Invocation, error) {
   418  	var shallow []*Invocation
   419  	for _, inv := range invs {
   420  		if inv.Status == "" {
   421  			shallow = append(shallow, inv)
   422  		}
   423  	}
   424  	if len(shallow) == 0 {
   425  		return invs, nil
   426  	}
   427  
   428  	if err := datastore.Get(c, shallow); err != nil {
   429  		return nil, errors.Annotate(err, "failed to fetch invocations").Tag(transient.Tag).Err()
   430  	}
   431  
   432  	filtered := invs[:0]
   433  	for _, inv := range invs {
   434  		if opts.ActiveOnly && inv.Status.Final() {
   435  			continue
   436  		}
   437  		if opts.FinishedOnly && !inv.Status.Final() {
   438  			continue
   439  		}
   440  		filtered = append(filtered, inv)
   441  	}
   442  	return filtered, nil
   443  }