go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/gae/filter/txnBuf/query_merger.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package txnBuf
    16  
    17  import (
    18  	"bytes"
    19  	"sort"
    20  
    21  	"go.chromium.org/luci/common/data/cmpbin"
    22  	"go.chromium.org/luci/common/data/stringset"
    23  
    24  	"go.chromium.org/luci/gae/impl/memory"
    25  	ds "go.chromium.org/luci/gae/service/datastore"
    26  )
    27  
    28  // queryToIter takes a FinalizedQuery and returns an iterator function which
    29  // will produce either *items or errors.
    30  //
    31  //   - d is the raw datastore to run this query on
    32  //   - filter is a function which will return true if the given key should be
    33  //     excluded from the result set.
    34  func queryToIter(stopChan chan struct{}, fq *ds.FinalizedQuery, d ds.RawInterface) func() (*item, error) {
    35  	c := make(chan *item)
    36  
    37  	go func() {
    38  		defer close(c)
    39  
    40  		err := d.Run(fq, func(k *ds.Key, pm ds.PropertyMap, _ ds.CursorCB) error {
    41  			i := &item{key: k, data: pm}
    42  			select {
    43  			case c <- i:
    44  				return nil
    45  			case <-stopChan:
    46  				return ds.Stop
    47  			}
    48  		})
    49  		if err != nil {
    50  			c <- &item{err: err}
    51  		}
    52  	}()
    53  
    54  	return func() (*item, error) {
    55  		itm := <-c
    56  		if itm == nil {
    57  			return nil, nil
    58  		}
    59  		if itm.err != nil {
    60  			return nil, itm.err
    61  		}
    62  		return itm, nil
    63  	}
    64  }
    65  
    66  // adjustQuery applies various mutations to the query to make it suitable for
    67  // merging. In general, this removes limits and offsets the 'distinct' modifier,
    68  // and it ensures that if there are sort orders which won't appear in the
    69  // result data that the query is transformed into a projection query which
    70  // contains all of the data. A non-projection query will never be transformed
    71  // in this way.
    72  func adjustQuery(fq *ds.FinalizedQuery) (*ds.FinalizedQuery, error) {
    73  	q := fq.Original()
    74  
    75  	// The limit and offset must be done in-memory because otherwise we may
    76  	// request too few entities from the underlying store if many matching
    77  	// entities have been deleted in the buffered transaction.
    78  	q = q.Limit(-1)
    79  	q = q.Offset(-1)
    80  
    81  	// distinction must be done in-memory, because otherwise there's no way
    82  	// to merge in the effect of the in-flight changes (because there's no way
    83  	// to push back to the datastore "yeah, I know you told me that the (1, 2)
    84  	// result came from `/Bob,1`, but would you mind pretending that it didn't
    85  	// and tell me next the one instead?
    86  	q = q.Distinct(false)
    87  
    88  	// since we need to merge results, we must have all order-related fields
    89  	// in each result. The only time we wouldn't have all the data available would
    90  	// be for a keys-only or projection query. To fix this, we convert all
    91  	// Projection and KeysOnly queries to project on /all/ Orders.
    92  	//
    93  	// FinalizedQuery already guarantees that all projected fields show up in
    94  	// the Orders, but the projected fields could be a subset of the orders.
    95  	//
    96  	// Additionally on a keys-only query, any orders other than __key__ require
    97  	// conversion of this query to a projection query including those orders in
    98  	// order to merge the results correctly.
    99  	//
   100  	// In both cases, the resulting objects returned to the higher layers of the
   101  	// stack will only include the information requested by the user; keys-only
   102  	// queries will discard all PropertyMap data, and projection queries will
   103  	// discard any field data that the user didn't ask for.
   104  	orders := fq.Orders()
   105  	if len(fq.Project()) > 0 || (fq.KeysOnly() && len(orders) > 1) {
   106  		q = q.KeysOnly(false)
   107  
   108  		for _, o := range orders {
   109  			if o.Property == "__key__" {
   110  				continue
   111  			}
   112  			q = q.Project(o.Property)
   113  		}
   114  	}
   115  
   116  	return q.Finalize()
   117  }
   118  
   119  // runMergedQueries executes a user query `fq` against the parent datastore as
   120  // well as the in-memory datastore, calling `cb` with the merged result set.
   121  //
   122  // It's expected that the caller of this function will apply limit and offset
   123  // if the query contains those restrictions. This may convert the query to
   124  // an expanded projection query with more data than the user asked for. It's the
   125  // caller's responsibility to prune away the extra data.
   126  //
   127  // See also `dsTxnBuf.Run()`.
   128  func runMergedQueries(fq *ds.FinalizedQuery, sizes *sizeTracker,
   129  	memDS, parentDS ds.RawInterface, cb func(k *ds.Key, data ds.PropertyMap) error) error {
   130  
   131  	toRun, err := adjustQuery(fq)
   132  	if err != nil {
   133  		return err
   134  	}
   135  
   136  	cmpLower, cmpUpper := memory.GetBinaryBounds(fq)
   137  	cmpOrder := fq.Orders()
   138  	cmpFn := func(i *item) string {
   139  		return i.getCmpRow(cmpLower, cmpUpper, cmpOrder)
   140  	}
   141  
   142  	dedup := stringset.Set(nil)
   143  	distinct := stringset.Set(nil)
   144  	distinctOrder := []ds.IndexColumn(nil)
   145  	if len(fq.Project()) > 0 { // the original query was a projection query
   146  		if fq.Distinct() {
   147  			// it was a distinct projection query, so we need to dedup by distinct
   148  			// options.
   149  			distinct = stringset.New(0)
   150  			proj := fq.Project()
   151  			distinctOrder = make([]ds.IndexColumn, len(proj))
   152  			for i, p := range proj {
   153  				distinctOrder[i].Property = p
   154  			}
   155  		}
   156  	} else {
   157  		// the original was a normal or keys-only query, so we need to dedup by keys.
   158  		dedup = stringset.New(0)
   159  	}
   160  
   161  	stopChan := make(chan struct{})
   162  
   163  	parIter := queryToIter(stopChan, toRun, parentDS)
   164  	memIter := queryToIter(stopChan, toRun, memDS)
   165  
   166  	parItemGet := func() (*item, error) {
   167  		for {
   168  			itm, err := parIter()
   169  			if itm == nil || err != nil {
   170  				return nil, err
   171  			}
   172  			encKey := itm.getEncKey()
   173  			if sizes.has(encKey) || (dedup != nil && dedup.Has(encKey)) {
   174  				continue
   175  			}
   176  			return itm, nil
   177  		}
   178  	}
   179  	memItemGet := func() (*item, error) {
   180  		for {
   181  			itm, err := memIter()
   182  			if itm == nil || err != nil {
   183  				return nil, err
   184  			}
   185  			if dedup != nil && dedup.Has(itm.getEncKey()) {
   186  				continue
   187  			}
   188  			return itm, nil
   189  		}
   190  	}
   191  
   192  	defer func() {
   193  		close(stopChan)
   194  		parItemGet()
   195  		memItemGet()
   196  	}()
   197  
   198  	pitm, err := parItemGet()
   199  	if err != nil {
   200  		return err
   201  	}
   202  
   203  	mitm, err := memItemGet()
   204  	if err != nil {
   205  		return err
   206  	}
   207  
   208  	for {
   209  		// the err can be set during the loop below. If we come around the bend and
   210  		// it's set, then we need to return it. We don't check it immediately
   211  		// because it's set after we already have a good result to return to the
   212  		// user.
   213  		if err != nil {
   214  			return err
   215  		}
   216  
   217  		usePitm := pitm != nil
   218  		if pitm != nil && mitm != nil {
   219  			usePitm = cmpFn(pitm) < cmpFn(mitm)
   220  		} else if pitm == nil && mitm == nil {
   221  			break
   222  		}
   223  
   224  		toUse := (*item)(nil)
   225  		// we check the error at the beginning of the loop.
   226  		if usePitm {
   227  			toUse = pitm
   228  			pitm, err = parItemGet()
   229  		} else {
   230  			toUse = mitm
   231  			mitm, err = memItemGet()
   232  		}
   233  
   234  		if dedup != nil {
   235  			if !dedup.Add(toUse.getEncKey()) {
   236  				continue
   237  			}
   238  		}
   239  		if distinct != nil {
   240  			// NOTE: We know that toUse will not be used after this point for
   241  			// comparison purposes, so re-use its cmpRow property for our distinct
   242  			// filter here.
   243  			toUse.cmpRow = ""
   244  			if !distinct.Add(toUse.getCmpRow(nil, nil, distinctOrder)) {
   245  				continue
   246  			}
   247  		}
   248  		if err := cb(toUse.key, toUse.data); err != nil {
   249  			return err
   250  		}
   251  	}
   252  
   253  	return nil
   254  }
   255  
   256  // toComparableString computes the byte-sortable 'order' string for the given
   257  // key/PropertyMap.
   258  //
   259  //   - start/end are byte sequences which are the inequality bounds of the
   260  //     query, if any. These are a serialized datastore.Property. If the
   261  //     inequality column is inverted, then start and end are also inverted and
   262  //     swapped with each other.
   263  //   - order is the list of sort orders in the actual executing queries.
   264  //   - k / pm are the data to derive a sortable string for.
   265  //
   266  // The result of this function is the series of serialized properties, one per
   267  // order column, which represent this key/pm's first entry in the composite
   268  // index that would point to it (e.g. the one with `order` sort orders).
   269  func toComparableString(start, end []byte, order []ds.IndexColumn, k *ds.Key, pm ds.PropertyMap) (row, key []byte) {
   270  	doCmp := true
   271  	soFar := []byte{}
   272  	ps := ds.Serialize.IndexedPropertiesForIndicies(k, pm, order)
   273  	for _, ord := range order {
   274  		row := ps[ord.Property]
   275  		sort.Sort(row)
   276  		foundOne := false
   277  		for _, serialized := range row {
   278  			if ord.Descending {
   279  				serialized = cmpbin.InvertBytes(serialized)
   280  			}
   281  			if doCmp {
   282  				maybe := cmpbin.ConcatBytes(soFar, serialized)
   283  				cmp := bytes.Compare(maybe, start)
   284  				if cmp >= 0 {
   285  					foundOne = true
   286  					soFar = maybe
   287  					doCmp = len(soFar) < len(start)
   288  					break
   289  				}
   290  			} else {
   291  				foundOne = true
   292  				soFar = cmpbin.ConcatBytes(soFar, serialized)
   293  				break
   294  			}
   295  		}
   296  		if !foundOne {
   297  			return nil, nil
   298  		}
   299  	}
   300  	if end != nil && bytes.Compare(soFar, end) >= 0 {
   301  		return nil, nil
   302  	}
   303  	return soFar, ps["__key__"][0]
   304  }