github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/cursor_bucket_map.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package lsmkv 13 14 import ( 15 "bytes" 16 "errors" 17 "fmt" 18 "sort" 19 20 "github.com/weaviate/weaviate/entities/lsmkv" 21 ) 22 23 type CursorMap struct { 24 innerCursors []innerCursorMap 25 state []cursorStateMap 26 unlock func() 27 listCfg MapListOptionConfig 28 keyOnly bool 29 } 30 31 type cursorStateMap struct { 32 key []byte 33 value []MapPair 34 err error 35 } 36 37 type innerCursorMap interface { 38 first() ([]byte, []MapPair, error) 39 next() ([]byte, []MapPair, error) 40 seek([]byte) ([]byte, []MapPair, error) 41 } 42 43 func (b *Bucket) MapCursor(cfgs ...MapListOption) *CursorMap { 44 b.flushLock.RLock() 45 46 c := MapListOptionConfig{} 47 for _, cfg := range cfgs { 48 cfg(&c) 49 } 50 51 innerCursors, unlockSegmentGroup := b.disk.newMapCursors() 52 53 // we have a flush-RLock, so we have the guarantee that the flushing state 54 // will not change for the lifetime of the cursor, thus there can only be two 55 // states: either a flushing memtable currently exists - or it doesn't 56 if b.flushing != nil { 57 innerCursors = append(innerCursors, b.flushing.newMapCursor()) 58 } 59 60 innerCursors = append(innerCursors, b.active.newMapCursor()) 61 62 return &CursorMap{ 63 unlock: func() { 64 unlockSegmentGroup() 65 b.flushLock.RUnlock() 66 }, 67 // cursor are in order from oldest to newest, with the memtable cursor 68 // being at the very top 69 innerCursors: innerCursors, 70 listCfg: c, 71 } 72 } 73 74 func (b *Bucket) MapCursorKeyOnly(cfgs ...MapListOption) *CursorMap { 75 c := b.MapCursor(cfgs...) 76 c.keyOnly = true 77 return c 78 } 79 80 func (c *CursorMap) Seek(key []byte) ([]byte, []MapPair) { 81 c.seekAll(key) 82 return c.serveCurrentStateAndAdvance() 83 } 84 85 func (c *CursorMap) Next() ([]byte, []MapPair) { 86 // before := time.Now() 87 // defer func() { 88 // fmt.Printf("-- total next took %s\n", time.Since(before)) 89 // }() 90 return c.serveCurrentStateAndAdvance() 91 } 92 93 func (c *CursorMap) First() ([]byte, []MapPair) { 94 c.firstAll() 95 return c.serveCurrentStateAndAdvance() 96 } 97 98 func (c *CursorMap) Close() { 99 c.unlock() 100 } 101 102 func (c *CursorMap) seekAll(target []byte) { 103 state := make([]cursorStateMap, len(c.innerCursors)) 104 for i, cur := range c.innerCursors { 105 key, value, err := cur.seek(target) 106 if errors.Is(err, lsmkv.NotFound) { 107 state[i].err = err 108 continue 109 } 110 111 if err != nil { 112 panic(fmt.Errorf("unexpected error in seek: %w", err)) 113 } 114 115 state[i].key = key 116 if !c.keyOnly { 117 state[i].value = value 118 } 119 } 120 121 c.state = state 122 } 123 124 func (c *CursorMap) firstAll() { 125 state := make([]cursorStateMap, len(c.innerCursors)) 126 for i, cur := range c.innerCursors { 127 key, value, err := cur.first() 128 if errors.Is(err, lsmkv.NotFound) { 129 state[i].err = err 130 continue 131 } 132 133 if err != nil { 134 panic(fmt.Errorf("unexpected error in seek: %w", err)) 135 } 136 137 state[i].key = key 138 if !c.keyOnly { 139 state[i].value = value 140 } 141 } 142 143 c.state = state 144 } 145 146 func (c *CursorMap) serveCurrentStateAndAdvance() ([]byte, []MapPair) { 147 id, err := c.cursorWithLowestKey() 148 if err != nil { 149 if errors.Is(err, lsmkv.NotFound) { 150 return nil, nil 151 } 152 } 153 154 // check if this is a duplicate key before checking for the remaining errors, 155 // as cases such as 'entities.Deleted' can be better handled inside 156 // mergeDuplicatesInCurrentStateAndAdvance where we can be sure to act on 157 // segments in the correct order 158 if ids, ok := c.haveDuplicatesInState(id); ok { 159 return c.mergeDuplicatesInCurrentStateAndAdvance(ids) 160 } else { 161 return c.mergeDuplicatesInCurrentStateAndAdvance([]int{id}) 162 } 163 } 164 165 func (c *CursorMap) cursorWithLowestKey() (int, error) { 166 err := lsmkv.NotFound 167 pos := -1 168 var lowest []byte 169 170 for i, res := range c.state { 171 if errors.Is(res.err, lsmkv.NotFound) { 172 continue 173 } 174 175 if lowest == nil || bytes.Compare(res.key, lowest) <= 0 { 176 pos = i 177 err = res.err 178 lowest = res.key 179 } 180 } 181 182 if err != nil { 183 return pos, err 184 } 185 186 return pos, nil 187 } 188 189 func (c *CursorMap) haveDuplicatesInState(idWithLowestKey int) ([]int, bool) { 190 key := c.state[idWithLowestKey].key 191 192 var idsFound []int 193 194 for i, cur := range c.state { 195 if i == idWithLowestKey { 196 idsFound = append(idsFound, i) 197 continue 198 } 199 200 if bytes.Equal(key, cur.key) { 201 idsFound = append(idsFound, i) 202 } 203 } 204 205 return idsFound, len(idsFound) > 1 206 } 207 208 // if there are no duplicates present it will still work as returning the 209 // latest result is the same as returning the only result 210 func (c *CursorMap) mergeDuplicatesInCurrentStateAndAdvance(ids []int) ([]byte, []MapPair) { 211 // take the key from any of the results, we have the guarantee that they're 212 // all the same 213 key := c.state[ids[0]].key 214 215 // appending := time.Duration(0) 216 // advancing := time.Duration(0) 217 218 var perSegmentResults [][]MapPair 219 220 for _, id := range ids { 221 candidates := c.state[id].value 222 perSegmentResults = append(perSegmentResults, candidates) 223 224 // before = time.Now() 225 c.advanceInner(id) 226 // advancing += time.Since(before) 227 } 228 // fmt.Printf("--- extract values [appending] took %s\n", appending) 229 // fmt.Printf("--- extract values [advancing] took %s\n", advancing) 230 231 if c.listCfg.legacyRequireManualSorting { 232 for i := range perSegmentResults { 233 sort.Slice(perSegmentResults[i], func(a, b int) bool { 234 return bytes.Compare(perSegmentResults[i][a].Key, 235 perSegmentResults[i][b].Key) == -1 236 }) 237 } 238 } 239 240 merged, err := newSortedMapMerger().do(perSegmentResults) 241 if err != nil { 242 panic(fmt.Errorf("unexpected error decoding map values: %w", err)) 243 } 244 if len(merged) == 0 { 245 // all values deleted, skip key 246 return c.Next() 247 } 248 249 // TODO remove keyOnly option, not used anyway 250 if !c.keyOnly { 251 return key, merged 252 } else { 253 return key, nil 254 } 255 } 256 257 func (c *CursorMap) advanceInner(id int) { 258 k, v, err := c.innerCursors[id].next() 259 if errors.Is(err, lsmkv.NotFound) { 260 c.state[id].err = err 261 c.state[id].key = nil 262 c.state[id].value = nil 263 return 264 } 265 266 if errors.Is(err, lsmkv.Deleted) { 267 c.state[id].err = err 268 c.state[id].key = k 269 c.state[id].value = nil 270 return 271 } 272 273 if err != nil { 274 panic(fmt.Errorf("unexpected error in advance: %w", err)) 275 } 276 277 c.state[id].key = k 278 if !c.keyOnly { 279 c.state[id].value = v 280 } 281 c.state[id].err = nil 282 }