go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/gae/impl/memory/datastore_query.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package memory
    16  
    17  import (
    18  	"bytes"
    19  	"encoding/base64"
    20  	"errors"
    21  	"fmt"
    22  
    23  	"go.chromium.org/luci/common/data/cmpbin"
    24  	"go.chromium.org/luci/common/data/stringset"
    25  
    26  	ds "go.chromium.org/luci/gae/service/datastore"
    27  )
    28  
    29  // MaxQueryComponents was lifted from a hard-coded constant in dev_appserver.
    30  // No idea if it's a real limit or just a convenience in the current dev
    31  // appserver implementation.
    32  const MaxQueryComponents = 100
    33  
    34  // MaxIndexColumns is the maximum number of index columns we're willing to
    35  // support.
    36  const MaxIndexColumns = 64
    37  
    38  // A queryCursor is:
    39  //
    40  //	{#orders} ++ IndexColumn* ++ RawRowData
    41  //	IndexColumn will always contain __key__ as the last column, and so #orders
    42  //	  must always be >= 1
    43  type queryCursor []byte
    44  
    45  func newCursor(s string) (ds.Cursor, error) {
    46  	d, err := base64.RawURLEncoding.DecodeString(s)
    47  	if err != nil {
    48  		return nil, fmt.Errorf("failed to Base64-decode cursor: %s", err)
    49  	}
    50  	c := queryCursor(d)
    51  	if _, _, err := c.decode(); err != nil {
    52  		return nil, err
    53  	}
    54  	return c, nil
    55  }
    56  
    57  func (q queryCursor) String() string { return base64.RawURLEncoding.EncodeToString([]byte(q)) }
    58  
    59  // decode returns the encoded IndexColumns, the raw row (cursor) data, or an
    60  // error.
    61  func (q queryCursor) decode() ([]ds.IndexColumn, []byte, error) {
    62  	buf := bytes.NewBuffer([]byte(q))
    63  	count, _, err := cmpbin.ReadUint(buf)
    64  	if err != nil {
    65  		return nil, nil, fmt.Errorf("invalid cursor: bad prefix number")
    66  	}
    67  
    68  	if count == 0 || count > MaxIndexColumns {
    69  		return nil, nil, fmt.Errorf("invalid cursor: bad column count %d", count)
    70  	}
    71  
    72  	if count == 0 {
    73  		return nil, nil, fmt.Errorf("invalid cursor: zero prefix number")
    74  	}
    75  
    76  	cols := make([]ds.IndexColumn, count)
    77  	for i := range cols {
    78  		if cols[i], err = ds.Deserialize.IndexColumn(buf); err != nil {
    79  			return nil, nil, fmt.Errorf("invalid cursor: unable to decode IndexColumn %d: %s", i, err)
    80  		}
    81  	}
    82  
    83  	if cols[len(cols)-1].Property != "__key__" {
    84  		return nil, nil, fmt.Errorf("invalid cursor: last column was not __key__: %v", cols[len(cols)-1])
    85  	}
    86  
    87  	return cols, buf.Bytes(), nil
    88  }
    89  
    90  func sortOrdersEqual(as, bs []ds.IndexColumn) bool {
    91  	if len(as) != len(bs) {
    92  		return false
    93  	}
    94  	for i, a := range as {
    95  		if a != bs[i] {
    96  			return false
    97  		}
    98  	}
    99  	return true
   100  }
   101  
   102  func numComponents(fq *ds.FinalizedQuery) int {
   103  	numComponents := len(fq.Orders())
   104  	if p, _, _ := fq.IneqFilterLow(); p != "" {
   105  		numComponents++
   106  	}
   107  	if p, _, _ := fq.IneqFilterHigh(); p != "" {
   108  		numComponents++
   109  	}
   110  	for _, v := range fq.EqFilters() {
   111  		numComponents += v.Len()
   112  	}
   113  	return numComponents
   114  }
   115  
   116  // GetBinaryBounds gets the binary encoding of the upper and lower bounds of
   117  // the inequality filter on fq, if any is defined. If a bound does not exist,
   118  // it is nil.
   119  //
   120  // NOTE: if fq specifies a descending sort order for the inequality, the bounds
   121  // will be inverted, incremented, and flipped.
   122  func GetBinaryBounds(fq *ds.FinalizedQuery) (lower, upper []byte) {
   123  	// Pick up the start/end range from the inequalities, if any.
   124  	//
   125  	// start and end in the reducedQuery are normalized so that `start >=
   126  	// X < end`. Because of that, we need to tweak the inequality filters
   127  	// contained in the query if they use the > or <= operators.
   128  	if ineqProp := fq.IneqFilterProp(); ineqProp != "" {
   129  		_, startOp, startV := fq.IneqFilterLow()
   130  		if startOp != "" {
   131  			lower = ds.Serialize.ToBytes(startV)
   132  			if startOp == ">" {
   133  				lower = increment(lower)
   134  			}
   135  		}
   136  
   137  		_, endOp, endV := fq.IneqFilterHigh()
   138  		if endOp != "" {
   139  			upper = ds.Serialize.ToBytes(endV)
   140  			if endOp == "<=" {
   141  				upper = increment(upper)
   142  			}
   143  		}
   144  
   145  		// The inequality is specified in natural (ascending) order in the query's
   146  		// Filter syntax, but the order information may indicate to use a descending
   147  		// index column for it. If that's the case, then we must invert, swap and
   148  		// increment the inequality endpoints.
   149  		//
   150  		// Invert so that the desired numbers are represented correctly in the index.
   151  		// Swap so that our iterators still go from >= start to < end.
   152  		// Increment so that >= and < get correctly bounded (since the iterator is
   153  		// still using natrual bytes ordering)
   154  		if fq.Orders()[0].Descending {
   155  			hi, lo := []byte(nil), []byte(nil)
   156  			if len(lower) > 0 {
   157  				lo = increment(cmpbin.InvertBytes(lower))
   158  			}
   159  			if len(upper) > 0 {
   160  				hi = increment(cmpbin.InvertBytes(upper))
   161  			}
   162  			upper, lower = lo, hi
   163  		}
   164  	}
   165  	return
   166  }
   167  
   168  func reduce(fq *ds.FinalizedQuery, kc ds.KeyContext, isTxn bool) (*reducedQuery, error) {
   169  	if err := fq.Valid(kc); err != nil {
   170  		return nil, err
   171  	}
   172  	if isTxn && !fq.Original().GetFirestoreMode() && fq.Ancestor() == nil {
   173  		return nil, fmt.Errorf("queries within a transaction to datastore must include an Ancestor filter")
   174  	}
   175  	if num := numComponents(fq); num > MaxQueryComponents {
   176  		return nil, fmt.Errorf(
   177  			"gae/memory: query is too large. may not have more than "+
   178  				"%d filters + sort orders + ancestor total: had %d",
   179  			MaxQueryComponents, num)
   180  	}
   181  
   182  	ret := &reducedQuery{
   183  		kc:           kc,
   184  		kind:         fq.Kind(),
   185  		suffixFormat: fq.Orders(),
   186  	}
   187  
   188  	eqFilts := fq.EqFilters()
   189  	ret.eqFilters = make(map[string]stringset.Set, len(eqFilts))
   190  	for prop, vals := range eqFilts {
   191  		sVals := stringset.New(len(vals))
   192  		for _, v := range vals {
   193  			sVals.Add(string(ds.Serialize.ToBytes(v)))
   194  		}
   195  		ret.eqFilters[prop] = sVals
   196  	}
   197  
   198  	// Only trivial IN filters with a single value are supported right now. They
   199  	// are identical to EQ filters: In("prop", "a") <=> Eq("prop", "a").
   200  	for prop, slices := range fq.InFilters() {
   201  		for _, vals := range slices {
   202  			if len(vals) != 1 {
   203  				return nil, fmt.Errorf("non-trivial IN filters are not implemented yet")
   204  			}
   205  			if ret.eqFilters[prop] == nil {
   206  				ret.eqFilters[prop] = stringset.New(1)
   207  			}
   208  			ret.eqFilters[prop].Add(string(ds.Serialize.ToBytes(vals[0])))
   209  		}
   210  	}
   211  
   212  	startD, endD := GetBinaryBounds(fq)
   213  
   214  	// Now we check the start and end cursors.
   215  	//
   216  	// Cursors are composed of a list of IndexColumns at the beginning, followed
   217  	// by the raw bytes to use for the suffix. The cursor is only valid if all of
   218  	// its IndexColumns match our proposed suffixFormat, as calculated above.
   219  	//
   220  	// Cursors are mutually exclusive with the start/end we picked up from the
   221  	// inequality. In a well formed query, they indicate a subset of results
   222  	// bounded by the inequality. Technically if the start cursor is not >= the
   223  	// low bound, or the end cursor is < the high bound, it's an error, but for
   224  	// simplicity we just cap to the narrowest intersection of the inequality and
   225  	// cursors.
   226  	ret.start = startD
   227  	ret.end = endD
   228  	if start, end := fq.Bounds(); start != nil || end != nil {
   229  		if start != nil {
   230  			if c, ok := start.(queryCursor); ok {
   231  				startCols, startD, err := c.decode()
   232  				if err != nil {
   233  					return nil, err
   234  				}
   235  
   236  				if !sortOrdersEqual(startCols, ret.suffixFormat) {
   237  					return nil, errors.New("gae/memory: start cursor is invalid for this query")
   238  				}
   239  				if ret.start == nil || bytes.Compare(ret.start, startD) < 0 {
   240  					ret.start = startD
   241  				}
   242  			} else {
   243  				return nil, errors.New("gae/memory: bad cursor type")
   244  			}
   245  		}
   246  
   247  		if end != nil {
   248  			if c, ok := end.(queryCursor); ok {
   249  				endCols, endD, err := c.decode()
   250  				if err != nil {
   251  					return nil, err
   252  				}
   253  
   254  				if !sortOrdersEqual(endCols, ret.suffixFormat) {
   255  					return nil, errors.New("gae/memory: end cursor is invalid for this query")
   256  				}
   257  				if ret.end == nil || bytes.Compare(endD, ret.end) < 0 {
   258  					ret.end = endD
   259  				}
   260  			} else {
   261  				return nil, errors.New("gae/memory: bad cursor type")
   262  			}
   263  		}
   264  	}
   265  
   266  	// Finally, verify that we could even /potentially/ do work. If we have
   267  	// overlapping range ends, then we don't have anything to do.
   268  	if ret.end != nil && bytes.Compare(ret.start, ret.end) >= 0 {
   269  		return nil, ds.ErrNullQuery
   270  	}
   271  
   272  	ret.numCols = len(ret.suffixFormat)
   273  	for prop, vals := range ret.eqFilters {
   274  		if len(ret.suffixFormat) == 1 && prop == "__ancestor__" {
   275  			continue
   276  		}
   277  		ret.numCols += vals.Len()
   278  	}
   279  
   280  	return ret, nil
   281  }
   282  
   283  func increment(bstr []byte) []byte {
   284  	ret, overflow := cmpbin.IncrementBytes(bstr)
   285  	if overflow {
   286  		// This byte string was ALL 0xFF's. The only safe incrementation to do here
   287  		// would be to add a new byte to the beginning of bstr with the value 0x01,
   288  		// and a byte to the beginning OF ALL OTHER []byte's which bstr may be
   289  		// compared with. This is obviously impossible to do here, so panic. If we
   290  		// hit this, then we would need to add a spare 0 byte before every index
   291  		// column.
   292  		//
   293  		// Another way to think about this is that we just accumulated a 'carry' bit,
   294  		// and the new value has overflowed this representation.
   295  		//
   296  		// Fortunately, the first byte of a serialized index column entry is a
   297  		// PropertyType byte, and the only valid values that we'll be incrementing
   298  		// are never equal to 0xFF, since they have the high bit set (so either they're
   299  		// 0x8*, or 0x7*, depending on if it's inverted).
   300  		impossible(fmt.Errorf("incrementing %v would require more sigfigs", bstr))
   301  	}
   302  	return ret
   303  }