go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/gae/impl/memory/datastore_query.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package memory 16 17 import ( 18 "bytes" 19 "encoding/base64" 20 "errors" 21 "fmt" 22 23 "go.chromium.org/luci/common/data/cmpbin" 24 "go.chromium.org/luci/common/data/stringset" 25 26 ds "go.chromium.org/luci/gae/service/datastore" 27 ) 28 29 // MaxQueryComponents was lifted from a hard-coded constant in dev_appserver. 30 // No idea if it's a real limit or just a convenience in the current dev 31 // appserver implementation. 32 const MaxQueryComponents = 100 33 34 // MaxIndexColumns is the maximum number of index columns we're willing to 35 // support. 36 const MaxIndexColumns = 64 37 38 // A queryCursor is: 39 // 40 // {#orders} ++ IndexColumn* ++ RawRowData 41 // IndexColumn will always contain __key__ as the last column, and so #orders 42 // must always be >= 1 43 type queryCursor []byte 44 45 func newCursor(s string) (ds.Cursor, error) { 46 d, err := base64.RawURLEncoding.DecodeString(s) 47 if err != nil { 48 return nil, fmt.Errorf("failed to Base64-decode cursor: %s", err) 49 } 50 c := queryCursor(d) 51 if _, _, err := c.decode(); err != nil { 52 return nil, err 53 } 54 return c, nil 55 } 56 57 func (q queryCursor) String() string { return base64.RawURLEncoding.EncodeToString([]byte(q)) } 58 59 // decode returns the encoded IndexColumns, the raw row (cursor) data, or an 60 // error. 61 func (q queryCursor) decode() ([]ds.IndexColumn, []byte, error) { 62 buf := bytes.NewBuffer([]byte(q)) 63 count, _, err := cmpbin.ReadUint(buf) 64 if err != nil { 65 return nil, nil, fmt.Errorf("invalid cursor: bad prefix number") 66 } 67 68 if count == 0 || count > MaxIndexColumns { 69 return nil, nil, fmt.Errorf("invalid cursor: bad column count %d", count) 70 } 71 72 if count == 0 { 73 return nil, nil, fmt.Errorf("invalid cursor: zero prefix number") 74 } 75 76 cols := make([]ds.IndexColumn, count) 77 for i := range cols { 78 if cols[i], err = ds.Deserialize.IndexColumn(buf); err != nil { 79 return nil, nil, fmt.Errorf("invalid cursor: unable to decode IndexColumn %d: %s", i, err) 80 } 81 } 82 83 if cols[len(cols)-1].Property != "__key__" { 84 return nil, nil, fmt.Errorf("invalid cursor: last column was not __key__: %v", cols[len(cols)-1]) 85 } 86 87 return cols, buf.Bytes(), nil 88 } 89 90 func sortOrdersEqual(as, bs []ds.IndexColumn) bool { 91 if len(as) != len(bs) { 92 return false 93 } 94 for i, a := range as { 95 if a != bs[i] { 96 return false 97 } 98 } 99 return true 100 } 101 102 func numComponents(fq *ds.FinalizedQuery) int { 103 numComponents := len(fq.Orders()) 104 if p, _, _ := fq.IneqFilterLow(); p != "" { 105 numComponents++ 106 } 107 if p, _, _ := fq.IneqFilterHigh(); p != "" { 108 numComponents++ 109 } 110 for _, v := range fq.EqFilters() { 111 numComponents += v.Len() 112 } 113 return numComponents 114 } 115 116 // GetBinaryBounds gets the binary encoding of the upper and lower bounds of 117 // the inequality filter on fq, if any is defined. If a bound does not exist, 118 // it is nil. 119 // 120 // NOTE: if fq specifies a descending sort order for the inequality, the bounds 121 // will be inverted, incremented, and flipped. 122 func GetBinaryBounds(fq *ds.FinalizedQuery) (lower, upper []byte) { 123 // Pick up the start/end range from the inequalities, if any. 124 // 125 // start and end in the reducedQuery are normalized so that `start >= 126 // X < end`. Because of that, we need to tweak the inequality filters 127 // contained in the query if they use the > or <= operators. 128 if ineqProp := fq.IneqFilterProp(); ineqProp != "" { 129 _, startOp, startV := fq.IneqFilterLow() 130 if startOp != "" { 131 lower = ds.Serialize.ToBytes(startV) 132 if startOp == ">" { 133 lower = increment(lower) 134 } 135 } 136 137 _, endOp, endV := fq.IneqFilterHigh() 138 if endOp != "" { 139 upper = ds.Serialize.ToBytes(endV) 140 if endOp == "<=" { 141 upper = increment(upper) 142 } 143 } 144 145 // The inequality is specified in natural (ascending) order in the query's 146 // Filter syntax, but the order information may indicate to use a descending 147 // index column for it. If that's the case, then we must invert, swap and 148 // increment the inequality endpoints. 149 // 150 // Invert so that the desired numbers are represented correctly in the index. 151 // Swap so that our iterators still go from >= start to < end. 152 // Increment so that >= and < get correctly bounded (since the iterator is 153 // still using natrual bytes ordering) 154 if fq.Orders()[0].Descending { 155 hi, lo := []byte(nil), []byte(nil) 156 if len(lower) > 0 { 157 lo = increment(cmpbin.InvertBytes(lower)) 158 } 159 if len(upper) > 0 { 160 hi = increment(cmpbin.InvertBytes(upper)) 161 } 162 upper, lower = lo, hi 163 } 164 } 165 return 166 } 167 168 func reduce(fq *ds.FinalizedQuery, kc ds.KeyContext, isTxn bool) (*reducedQuery, error) { 169 if err := fq.Valid(kc); err != nil { 170 return nil, err 171 } 172 if isTxn && !fq.Original().GetFirestoreMode() && fq.Ancestor() == nil { 173 return nil, fmt.Errorf("queries within a transaction to datastore must include an Ancestor filter") 174 } 175 if num := numComponents(fq); num > MaxQueryComponents { 176 return nil, fmt.Errorf( 177 "gae/memory: query is too large. may not have more than "+ 178 "%d filters + sort orders + ancestor total: had %d", 179 MaxQueryComponents, num) 180 } 181 182 ret := &reducedQuery{ 183 kc: kc, 184 kind: fq.Kind(), 185 suffixFormat: fq.Orders(), 186 } 187 188 eqFilts := fq.EqFilters() 189 ret.eqFilters = make(map[string]stringset.Set, len(eqFilts)) 190 for prop, vals := range eqFilts { 191 sVals := stringset.New(len(vals)) 192 for _, v := range vals { 193 sVals.Add(string(ds.Serialize.ToBytes(v))) 194 } 195 ret.eqFilters[prop] = sVals 196 } 197 198 // Only trivial IN filters with a single value are supported right now. They 199 // are identical to EQ filters: In("prop", "a") <=> Eq("prop", "a"). 200 for prop, slices := range fq.InFilters() { 201 for _, vals := range slices { 202 if len(vals) != 1 { 203 return nil, fmt.Errorf("non-trivial IN filters are not implemented yet") 204 } 205 if ret.eqFilters[prop] == nil { 206 ret.eqFilters[prop] = stringset.New(1) 207 } 208 ret.eqFilters[prop].Add(string(ds.Serialize.ToBytes(vals[0]))) 209 } 210 } 211 212 startD, endD := GetBinaryBounds(fq) 213 214 // Now we check the start and end cursors. 215 // 216 // Cursors are composed of a list of IndexColumns at the beginning, followed 217 // by the raw bytes to use for the suffix. The cursor is only valid if all of 218 // its IndexColumns match our proposed suffixFormat, as calculated above. 219 // 220 // Cursors are mutually exclusive with the start/end we picked up from the 221 // inequality. In a well formed query, they indicate a subset of results 222 // bounded by the inequality. Technically if the start cursor is not >= the 223 // low bound, or the end cursor is < the high bound, it's an error, but for 224 // simplicity we just cap to the narrowest intersection of the inequality and 225 // cursors. 226 ret.start = startD 227 ret.end = endD 228 if start, end := fq.Bounds(); start != nil || end != nil { 229 if start != nil { 230 if c, ok := start.(queryCursor); ok { 231 startCols, startD, err := c.decode() 232 if err != nil { 233 return nil, err 234 } 235 236 if !sortOrdersEqual(startCols, ret.suffixFormat) { 237 return nil, errors.New("gae/memory: start cursor is invalid for this query") 238 } 239 if ret.start == nil || bytes.Compare(ret.start, startD) < 0 { 240 ret.start = startD 241 } 242 } else { 243 return nil, errors.New("gae/memory: bad cursor type") 244 } 245 } 246 247 if end != nil { 248 if c, ok := end.(queryCursor); ok { 249 endCols, endD, err := c.decode() 250 if err != nil { 251 return nil, err 252 } 253 254 if !sortOrdersEqual(endCols, ret.suffixFormat) { 255 return nil, errors.New("gae/memory: end cursor is invalid for this query") 256 } 257 if ret.end == nil || bytes.Compare(endD, ret.end) < 0 { 258 ret.end = endD 259 } 260 } else { 261 return nil, errors.New("gae/memory: bad cursor type") 262 } 263 } 264 } 265 266 // Finally, verify that we could even /potentially/ do work. If we have 267 // overlapping range ends, then we don't have anything to do. 268 if ret.end != nil && bytes.Compare(ret.start, ret.end) >= 0 { 269 return nil, ds.ErrNullQuery 270 } 271 272 ret.numCols = len(ret.suffixFormat) 273 for prop, vals := range ret.eqFilters { 274 if len(ret.suffixFormat) == 1 && prop == "__ancestor__" { 275 continue 276 } 277 ret.numCols += vals.Len() 278 } 279 280 return ret, nil 281 } 282 283 func increment(bstr []byte) []byte { 284 ret, overflow := cmpbin.IncrementBytes(bstr) 285 if overflow { 286 // This byte string was ALL 0xFF's. The only safe incrementation to do here 287 // would be to add a new byte to the beginning of bstr with the value 0x01, 288 // and a byte to the beginning OF ALL OTHER []byte's which bstr may be 289 // compared with. This is obviously impossible to do here, so panic. If we 290 // hit this, then we would need to add a spare 0 byte before every index 291 // column. 292 // 293 // Another way to think about this is that we just accumulated a 'carry' bit, 294 // and the new value has overflowed this representation. 295 // 296 // Fortunately, the first byte of a serialized index column entry is a 297 // PropertyType byte, and the only valid values that we'll be incrementing 298 // are never equal to 0xFF, since they have the high bit set (so either they're 299 // 0x8*, or 0x7*, depending on if it's inverted). 300 impossible(fmt.Errorf("incrementing %v would require more sigfigs", bstr)) 301 } 302 return ret 303 }