go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/gae/service/datastore/queryiterator.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package datastore 16 17 import ( 18 "bytes" 19 "context" 20 "sort" 21 22 "golang.org/x/sync/errgroup" 23 24 "go.chromium.org/luci/common/data/cmpbin" 25 ) 26 27 // queryIterator is an iterator for datastore query results. 28 // 29 // DANGER: This implementation assumes instances of CursorCB carry complete 30 // snapshots of the query iteration state, e.g. "remembering" a cursor callback 31 // at some iteration cycle `X`, and calling it a later cycle `Y` will produce 32 // a cursor for the cycle `X`. This property is not guaranteed by RawInterface 33 // API and it (accidentally) only holds for `impl/memory` implementation using 34 // only in unit tests. 35 // 36 // In all production implementations cursor callbacks just hold a pointer to 37 // the internal datastore iterator: calling such callback always returns 38 // the **current position** of the internal iterator (in the above example, it 39 // will produce the position representing `Y`, not `X`). As a consequence, 40 // RunMutli skips entities when resuming from a cursor. 41 // 42 // Additionally, RawInterface doesn't guarantee it is safe to call the cursor 43 // callback from another goroutine (which queryIterator does). This results in 44 // data races when using cursors with production implementations. Again, 45 // accidentally, `impl/memory` works "fine", so this problem is obscured in 46 // unit tests. 47 // 48 // One potential fix is to call the cursor callback for every fetched entity and 49 // pass around real Cursor objects (that are complete "snapshots") instead of 50 // CursorCB functions (that are just pointers to an internal non-thread safe 51 // iterator with the most recent cursor). The problem is that CursorCB is a 52 // potentially slow operations that can make RPCs (and it does in `impl/prod` 53 // implementation). That's the reason it is a callback (to be called lazily only 54 // when needed). It is not supposed to be used on every loop cycle. Using it 55 // this way may severely degrade performance of the query. 56 type queryIterator struct { 57 query *Query 58 order []IndexColumn 59 currentQueryResult *rawQueryResult 60 itemCh chan *rawQueryResult 61 done bool 62 currentItemOrderCache string // lazy loading (loaded when `CurrentItemOrder()` is called). 63 cursorCB CursorCB // for the *current* item 64 } 65 66 // startQueryIterator starts to run the given query and return the iterator for 67 // query results. 68 func startQueryIterator(ctx context.Context, eg *errgroup.Group, fq *FinalizedQuery) *queryIterator { 69 qi := &queryIterator{ 70 query: fq.Original(), 71 order: fq.Orders(), 72 itemCh: make(chan *rawQueryResult), 73 // This will be used as CurrentCursor after the first Next() call. To get 74 // the first query result, we just need to restart the query from its 75 // initial starting cursor. 76 cursorCB: func() (Cursor, error) { 77 start, _ := fq.Bounds() 78 return start, nil 79 }, 80 } 81 82 eg.Go(func() (err error) { 83 defer func() { qi.itemCh <- &rawQueryResult{err: err} }() 84 return Raw(ctx).Run(fq, func(k *Key, pm PropertyMap, cursorCB CursorCB) error { 85 if k == nil { // we use `key == nil` as an indicator of the last message 86 panic("impossible per Run contract") 87 } 88 // Do not even attempt to write to `qi.itemCh` if the context is already 89 // done. Note that if multiple cases of select {...} are ready at the same 90 // time, Go chooses one randomly to proceed. We don't want that if the 91 // context is already done. 92 if ctx.Err() != nil { 93 return ctx.Err() 94 } 95 select { 96 case <-ctx.Done(): 97 return ctx.Err() 98 case qi.itemCh <- &rawQueryResult{ 99 key: k, 100 data: pm, 101 cursorCB: cursorCB, 102 }: 103 return nil 104 } 105 }) 106 }) 107 108 return qi 109 } 110 111 // Query is the original query this iterator was started with. 112 func (qi *queryIterator) Query() *Query { 113 return qi.query 114 } 115 116 // CurrentItem returns the current query result. 117 // 118 // Returns nil key if the iterator has reached its end. 119 func (qi *queryIterator) CurrentItem() (*Key, PropertyMap) { 120 if qi.currentQueryResult == nil { 121 return nil, PropertyMap{} 122 } 123 return qi.currentQueryResult.key, qi.currentQueryResult.data 124 } 125 126 // CurrentItemKey returns a serialized current item key. 127 // 128 // Returns "" if the iterator has reached its end. 129 func (qi *queryIterator) CurrentItemKey() string { 130 if qi.currentQueryResult == nil || qi.currentQueryResult.key == nil { 131 return "" 132 } 133 return string(Serialize.ToBytes(qi.currentQueryResult.key)) 134 } 135 136 // CurrentCursor returns a cursor pointing to the current item (if any). 137 // 138 // The defining property of this cursor is that if a queryIterator is recreated 139 // with it, its first Next() call will return the current item again (if any). 140 // This is useful for repopulating the heap when restarting the query from 141 // a cursor. 142 // 143 // Note that if the iterator is exhausted already, i.e. Next() returns 144 // done == true, CurrentCursor() still returns some non-nil cursor. This cursor 145 // points to a position right after the last fetched item. When resuming from 146 // such cursor, we'll either immediately discover the iterator is still 147 // exhausted, or (if the datastore state changed between calls), we'll discover 148 // new items that can be fetched now. 149 // 150 // Returns nil only if the query produced no results whatsoever and this query 151 // didn't have a cursor set. In that case we'll need to restart the query from 152 // scratch when restarting the iteration and this is precisely what `nil` cursor 153 // does. 154 func (qi *queryIterator) CurrentCursor() (Cursor, error) { 155 return qi.cursorCB() 156 } 157 158 // CurrentItemOrder returns a serialized representation of properties used for 159 // ordering the results. 160 // 161 // Such strings are directly comparable to one another. 162 func (qi *queryIterator) CurrentItemOrder() string { 163 if qi.currentItemOrderCache != "" { 164 return qi.currentItemOrderCache 165 } 166 167 if qi.currentQueryResult == nil { 168 return "" 169 } 170 171 invBuf := cmpbin.Invertible(&bytes.Buffer{}) 172 for _, column := range qi.order { 173 invBuf.SetInvert(column.Descending) 174 if column.Property == "__key__" { 175 panicIf(Serialize.Key(invBuf, qi.currentQueryResult.key)) 176 continue 177 } 178 columnData := qi.currentQueryResult.data[column.Property].Slice() 179 sort.Sort(columnData) 180 if column.Descending { 181 panicIf(Serialize.Property(invBuf, columnData[columnData.Len()-1])) 182 } else { 183 panicIf(Serialize.Property(invBuf, columnData[0])) 184 } 185 } 186 qi.currentItemOrderCache = invBuf.String() 187 return qi.currentItemOrderCache 188 } 189 190 // Next iterates to the next item and makes it current. 191 // 192 // Note: call Next() before calling any CurrentItemXXX functions to get the 193 // right results. 194 // 195 // If the iterator has finished running returns done == true and an error 196 // (if the iterator finished due to an error). The error may be a context error 197 // if the root context was canceled or has expired. 198 func (qi *queryIterator) Next() (done bool, err error) { 199 if qi.itemCh == nil { 200 panic("item channel for queryIterator is not properly initiated") 201 } 202 if !qi.done { 203 // Let's assume currentQueryResult index among the full list of query 204 // results is `T`. It means `currentQueryResult.cursorCB` is pointing to 205 // `T+1`. Also `<-qi.itemCh` will return the next result, i.e. `T+1` as 206 // well. This new `T+1` result will become the CurrentItem(). We need to 207 // make CurrentCursor() return `T+1` cursor as well. And this is precisely 208 // what `currentQueryResult.cursorCB` does. 209 // 210 // The nil check is for the very first Next() call. We already populated 211 // cursorCB correctly for this situation in `startQueryIterator`. 212 // 213 // See also DANGER warning in queryIterator doc. This is not how cursors 214 // actually behave. 215 if qi.currentQueryResult != nil { 216 qi.cursorCB = qi.currentQueryResult.cursorCB 217 } 218 qi.currentQueryResult = <-qi.itemCh 219 qi.currentItemOrderCache = "" 220 qi.done = qi.currentQueryResult.key == nil 221 } 222 return qi.done, qi.currentQueryResult.err 223 } 224 225 // rawQueryResult captures the result from raw datastore query snapshot. 226 type rawQueryResult struct { 227 key *Key 228 data PropertyMap 229 err error 230 cursorCB CursorCB // points to the entry right after `key` 231 }