go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/gae/filter/txnBuf/query_merger.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package txnBuf 16 17 import ( 18 "bytes" 19 "sort" 20 21 "go.chromium.org/luci/common/data/cmpbin" 22 "go.chromium.org/luci/common/data/stringset" 23 24 "go.chromium.org/luci/gae/impl/memory" 25 ds "go.chromium.org/luci/gae/service/datastore" 26 ) 27 28 // queryToIter takes a FinalizedQuery and returns an iterator function which 29 // will produce either *items or errors. 30 // 31 // - d is the raw datastore to run this query on 32 // - filter is a function which will return true if the given key should be 33 // excluded from the result set. 34 func queryToIter(stopChan chan struct{}, fq *ds.FinalizedQuery, d ds.RawInterface) func() (*item, error) { 35 c := make(chan *item) 36 37 go func() { 38 defer close(c) 39 40 err := d.Run(fq, func(k *ds.Key, pm ds.PropertyMap, _ ds.CursorCB) error { 41 i := &item{key: k, data: pm} 42 select { 43 case c <- i: 44 return nil 45 case <-stopChan: 46 return ds.Stop 47 } 48 }) 49 if err != nil { 50 c <- &item{err: err} 51 } 52 }() 53 54 return func() (*item, error) { 55 itm := <-c 56 if itm == nil { 57 return nil, nil 58 } 59 if itm.err != nil { 60 return nil, itm.err 61 } 62 return itm, nil 63 } 64 } 65 66 // adjustQuery applies various mutations to the query to make it suitable for 67 // merging. In general, this removes limits and offsets the 'distinct' modifier, 68 // and it ensures that if there are sort orders which won't appear in the 69 // result data that the query is transformed into a projection query which 70 // contains all of the data. A non-projection query will never be transformed 71 // in this way. 72 func adjustQuery(fq *ds.FinalizedQuery) (*ds.FinalizedQuery, error) { 73 q := fq.Original() 74 75 // The limit and offset must be done in-memory because otherwise we may 76 // request too few entities from the underlying store if many matching 77 // entities have been deleted in the buffered transaction. 78 q = q.Limit(-1) 79 q = q.Offset(-1) 80 81 // distinction must be done in-memory, because otherwise there's no way 82 // to merge in the effect of the in-flight changes (because there's no way 83 // to push back to the datastore "yeah, I know you told me that the (1, 2) 84 // result came from `/Bob,1`, but would you mind pretending that it didn't 85 // and tell me next the one instead? 86 q = q.Distinct(false) 87 88 // since we need to merge results, we must have all order-related fields 89 // in each result. The only time we wouldn't have all the data available would 90 // be for a keys-only or projection query. To fix this, we convert all 91 // Projection and KeysOnly queries to project on /all/ Orders. 92 // 93 // FinalizedQuery already guarantees that all projected fields show up in 94 // the Orders, but the projected fields could be a subset of the orders. 95 // 96 // Additionally on a keys-only query, any orders other than __key__ require 97 // conversion of this query to a projection query including those orders in 98 // order to merge the results correctly. 99 // 100 // In both cases, the resulting objects returned to the higher layers of the 101 // stack will only include the information requested by the user; keys-only 102 // queries will discard all PropertyMap data, and projection queries will 103 // discard any field data that the user didn't ask for. 104 orders := fq.Orders() 105 if len(fq.Project()) > 0 || (fq.KeysOnly() && len(orders) > 1) { 106 q = q.KeysOnly(false) 107 108 for _, o := range orders { 109 if o.Property == "__key__" { 110 continue 111 } 112 q = q.Project(o.Property) 113 } 114 } 115 116 return q.Finalize() 117 } 118 119 // runMergedQueries executes a user query `fq` against the parent datastore as 120 // well as the in-memory datastore, calling `cb` with the merged result set. 121 // 122 // It's expected that the caller of this function will apply limit and offset 123 // if the query contains those restrictions. This may convert the query to 124 // an expanded projection query with more data than the user asked for. It's the 125 // caller's responsibility to prune away the extra data. 126 // 127 // See also `dsTxnBuf.Run()`. 128 func runMergedQueries(fq *ds.FinalizedQuery, sizes *sizeTracker, 129 memDS, parentDS ds.RawInterface, cb func(k *ds.Key, data ds.PropertyMap) error) error { 130 131 toRun, err := adjustQuery(fq) 132 if err != nil { 133 return err 134 } 135 136 cmpLower, cmpUpper := memory.GetBinaryBounds(fq) 137 cmpOrder := fq.Orders() 138 cmpFn := func(i *item) string { 139 return i.getCmpRow(cmpLower, cmpUpper, cmpOrder) 140 } 141 142 dedup := stringset.Set(nil) 143 distinct := stringset.Set(nil) 144 distinctOrder := []ds.IndexColumn(nil) 145 if len(fq.Project()) > 0 { // the original query was a projection query 146 if fq.Distinct() { 147 // it was a distinct projection query, so we need to dedup by distinct 148 // options. 149 distinct = stringset.New(0) 150 proj := fq.Project() 151 distinctOrder = make([]ds.IndexColumn, len(proj)) 152 for i, p := range proj { 153 distinctOrder[i].Property = p 154 } 155 } 156 } else { 157 // the original was a normal or keys-only query, so we need to dedup by keys. 158 dedup = stringset.New(0) 159 } 160 161 stopChan := make(chan struct{}) 162 163 parIter := queryToIter(stopChan, toRun, parentDS) 164 memIter := queryToIter(stopChan, toRun, memDS) 165 166 parItemGet := func() (*item, error) { 167 for { 168 itm, err := parIter() 169 if itm == nil || err != nil { 170 return nil, err 171 } 172 encKey := itm.getEncKey() 173 if sizes.has(encKey) || (dedup != nil && dedup.Has(encKey)) { 174 continue 175 } 176 return itm, nil 177 } 178 } 179 memItemGet := func() (*item, error) { 180 for { 181 itm, err := memIter() 182 if itm == nil || err != nil { 183 return nil, err 184 } 185 if dedup != nil && dedup.Has(itm.getEncKey()) { 186 continue 187 } 188 return itm, nil 189 } 190 } 191 192 defer func() { 193 close(stopChan) 194 parItemGet() 195 memItemGet() 196 }() 197 198 pitm, err := parItemGet() 199 if err != nil { 200 return err 201 } 202 203 mitm, err := memItemGet() 204 if err != nil { 205 return err 206 } 207 208 for { 209 // the err can be set during the loop below. If we come around the bend and 210 // it's set, then we need to return it. We don't check it immediately 211 // because it's set after we already have a good result to return to the 212 // user. 213 if err != nil { 214 return err 215 } 216 217 usePitm := pitm != nil 218 if pitm != nil && mitm != nil { 219 usePitm = cmpFn(pitm) < cmpFn(mitm) 220 } else if pitm == nil && mitm == nil { 221 break 222 } 223 224 toUse := (*item)(nil) 225 // we check the error at the beginning of the loop. 226 if usePitm { 227 toUse = pitm 228 pitm, err = parItemGet() 229 } else { 230 toUse = mitm 231 mitm, err = memItemGet() 232 } 233 234 if dedup != nil { 235 if !dedup.Add(toUse.getEncKey()) { 236 continue 237 } 238 } 239 if distinct != nil { 240 // NOTE: We know that toUse will not be used after this point for 241 // comparison purposes, so re-use its cmpRow property for our distinct 242 // filter here. 243 toUse.cmpRow = "" 244 if !distinct.Add(toUse.getCmpRow(nil, nil, distinctOrder)) { 245 continue 246 } 247 } 248 if err := cb(toUse.key, toUse.data); err != nil { 249 return err 250 } 251 } 252 253 return nil 254 } 255 256 // toComparableString computes the byte-sortable 'order' string for the given 257 // key/PropertyMap. 258 // 259 // - start/end are byte sequences which are the inequality bounds of the 260 // query, if any. These are a serialized datastore.Property. If the 261 // inequality column is inverted, then start and end are also inverted and 262 // swapped with each other. 263 // - order is the list of sort orders in the actual executing queries. 264 // - k / pm are the data to derive a sortable string for. 265 // 266 // The result of this function is the series of serialized properties, one per 267 // order column, which represent this key/pm's first entry in the composite 268 // index that would point to it (e.g. the one with `order` sort orders). 269 func toComparableString(start, end []byte, order []ds.IndexColumn, k *ds.Key, pm ds.PropertyMap) (row, key []byte) { 270 doCmp := true 271 soFar := []byte{} 272 ps := ds.Serialize.IndexedPropertiesForIndicies(k, pm, order) 273 for _, ord := range order { 274 row := ps[ord.Property] 275 sort.Sort(row) 276 foundOne := false 277 for _, serialized := range row { 278 if ord.Descending { 279 serialized = cmpbin.InvertBytes(serialized) 280 } 281 if doCmp { 282 maybe := cmpbin.ConcatBytes(soFar, serialized) 283 cmp := bytes.Compare(maybe, start) 284 if cmp >= 0 { 285 foundOne = true 286 soFar = maybe 287 doCmp = len(soFar) < len(start) 288 break 289 } 290 } else { 291 foundOne = true 292 soFar = cmpbin.ConcatBytes(soFar, serialized) 293 break 294 } 295 } 296 if !foundOne { 297 return nil, nil 298 } 299 } 300 if end != nil && bytes.Compare(soFar, end) >= 0 { 301 return nil, nil 302 } 303 return soFar, ps["__key__"][0] 304 }