github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/cursor_bucket_replace.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package lsmkv 13 14 import ( 15 "bytes" 16 17 "github.com/pkg/errors" 18 "github.com/weaviate/weaviate/entities/lsmkv" 19 ) 20 21 type CursorReplace struct { 22 innerCursors []innerCursorReplace 23 state []cursorStateReplace 24 unlock func() 25 serveCache cursorStateReplace 26 27 reusableIDList []int 28 } 29 30 type innerCursorReplace interface { 31 first() ([]byte, []byte, error) 32 next() ([]byte, []byte, error) 33 seek([]byte) ([]byte, []byte, error) 34 } 35 36 type cursorStateReplace struct { 37 key []byte 38 value []byte 39 err error 40 } 41 42 // Cursor holds a RLock for the flushing state. It needs to be closed using the 43 // .Close() methods or otherwise the lock will never be released 44 func (b *Bucket) Cursor() *CursorReplace { 45 b.flushLock.RLock() 46 47 if b.strategy != StrategyReplace { 48 panic("Cursor() called on strategy other than 'replace'") 49 } 50 51 innerCursors, unlockSegmentGroup := b.disk.newCursors() 52 53 // we have a flush-RLock, so we have the guarantee that the flushing state 54 // will not change for the lifetime of the cursor, thus there can only be two 55 // states: either a flushing memtable currently exists - or it doesn't 56 if b.flushing != nil { 57 innerCursors = append(innerCursors, b.flushing.newCursor()) 58 } 59 60 innerCursors = append(innerCursors, b.active.newCursor()) 61 62 return &CursorReplace{ 63 // cursor are in order from oldest to newest, with the memtable cursor 64 // being at the very top 65 innerCursors: innerCursors, 66 unlock: func() { 67 unlockSegmentGroup() 68 b.flushLock.RUnlock() 69 }, 70 } 71 } 72 73 func (c *CursorReplace) Close() { 74 c.unlock() 75 } 76 77 func (c *CursorReplace) seekAll(target []byte) { 78 state := make([]cursorStateReplace, len(c.innerCursors)) 79 for i, cur := range c.innerCursors { 80 key, value, err := cur.seek(target) 81 if errors.Is(err, lsmkv.NotFound) { 82 state[i].err = err 83 continue 84 } 85 86 if errors.Is(err, lsmkv.Deleted) { 87 state[i].err = err 88 state[i].key = key 89 continue 90 } 91 92 if err != nil { 93 panic(errors.Wrap(err, "unexpected error in seek (cursor type 'replace')")) 94 } 95 96 state[i].key = key 97 state[i].value = value 98 } 99 100 c.state = state 101 } 102 103 func (c *CursorReplace) serveCurrentStateAndAdvance() ([]byte, []byte) { 104 id, err := c.cursorWithLowestKey() 105 if err != nil { 106 if errors.Is(err, lsmkv.NotFound) { 107 return nil, nil 108 } 109 } 110 111 // check if this is a duplicate key before checking for the remaining errors, 112 // as cases such as 'entities.Deleted' can be better handled inside 113 // mergeDuplicatesInCurrentStateAndAdvance where we can be sure to act on 114 // segments in the correct order 115 if ids, ok := c.haveDuplicatesInState(id); ok { 116 return c.mergeDuplicatesInCurrentStateAndAdvance(ids) 117 } else { 118 return c.mergeDuplicatesInCurrentStateAndAdvance([]int{id}) 119 } 120 } 121 122 func (c *CursorReplace) haveDuplicatesInState(idWithLowestKey int) ([]int, bool) { 123 key := c.state[idWithLowestKey].key 124 125 c.reusableIDList = c.reusableIDList[:0] 126 127 for i, cur := range c.state { 128 if i == idWithLowestKey { 129 c.reusableIDList = append(c.reusableIDList, i) 130 continue 131 } 132 133 if bytes.Equal(key, cur.key) { 134 c.reusableIDList = append(c.reusableIDList, i) 135 } 136 } 137 138 return c.reusableIDList, len(c.reusableIDList) > 1 139 } 140 141 // if there are no duplicates present it will still work as returning the 142 // latest result is the same as returning the only result 143 func (c *CursorReplace) mergeDuplicatesInCurrentStateAndAdvance(ids []int) ([]byte, []byte) { 144 c.copyStateIntoServeCache(ids[len(ids)-1]) 145 146 // with a replace strategy only the highest will be returned, but still all 147 // need to be advanced - or we would just encounter them again in the next 148 // round 149 for _, id := range ids { 150 c.advanceInner(id) 151 } 152 153 if errors.Is(c.serveCache.err, lsmkv.Deleted) { 154 // element was deleted, proceed with next round 155 return c.Next() 156 } 157 158 return c.serveCache.key, c.serveCache.value 159 } 160 161 func (c *CursorReplace) copyStateIntoServeCache(pos int) { 162 resMut := c.state[pos] 163 if len(resMut.key) > cap(c.serveCache.key) { 164 c.serveCache.key = make([]byte, len(resMut.key)) 165 } else { 166 c.serveCache.key = c.serveCache.key[:len(resMut.key)] 167 } 168 169 if len(resMut.value) > cap(c.serveCache.value) { 170 c.serveCache.value = make([]byte, len(resMut.value)) 171 } else { 172 c.serveCache.value = c.serveCache.value[:len(resMut.value)] 173 } 174 175 copy(c.serveCache.key, resMut.key) 176 copy(c.serveCache.value, resMut.value) 177 c.serveCache.err = resMut.err 178 } 179 180 func (c *CursorReplace) Seek(key []byte) ([]byte, []byte) { 181 c.seekAll(key) 182 return c.serveCurrentStateAndAdvance() 183 } 184 185 func (c *CursorReplace) cursorWithLowestKey() (int, error) { 186 err := lsmkv.NotFound 187 pos := -1 188 var lowest []byte 189 190 for i, res := range c.state { 191 if errors.Is(res.err, lsmkv.NotFound) { 192 continue 193 } 194 195 if lowest == nil || bytes.Compare(res.key, lowest) <= 0 { 196 pos = i 197 err = res.err 198 lowest = res.key 199 } 200 } 201 202 if err != nil { 203 return pos, err 204 } 205 206 return pos, nil 207 } 208 209 func (c *CursorReplace) advanceInner(id int) { 210 k, v, err := c.innerCursors[id].next() 211 if errors.Is(err, lsmkv.NotFound) { 212 c.state[id].err = err 213 c.state[id].key = nil 214 c.state[id].value = nil 215 return 216 } 217 218 if errors.Is(err, lsmkv.Deleted) { 219 c.state[id].err = err 220 c.state[id].key = k 221 c.state[id].value = nil 222 return 223 } 224 225 if err != nil { 226 panic(errors.Wrap(err, "unexpected error in advance")) 227 } 228 229 c.state[id].key = k 230 c.state[id].value = v 231 c.state[id].err = nil 232 } 233 234 func (c *CursorReplace) Next() ([]byte, []byte) { 235 return c.serveCurrentStateAndAdvance() 236 } 237 238 func (c *CursorReplace) firstAll() { 239 state := make([]cursorStateReplace, len(c.innerCursors)) 240 for i, cur := range c.innerCursors { 241 key, value, err := cur.first() 242 if errors.Is(err, lsmkv.NotFound) { 243 state[i].err = err 244 continue 245 } 246 if errors.Is(err, lsmkv.Deleted) { 247 state[i].err = err 248 state[i].key = key 249 continue 250 } 251 252 if err != nil { 253 panic(errors.Wrap(err, "unexpected error in first (cursor type 'replace')")) 254 } 255 256 state[i].key = key 257 state[i].value = value 258 } 259 260 c.state = state 261 } 262 263 func (c *CursorReplace) First() ([]byte, []byte) { 264 c.firstAll() 265 return c.serveCurrentStateAndAdvance() 266 }