github.com/pingcap/badger@v1.5.1-0.20230103063557-828f39b09b6d/table/sstable/iterator.go (about) 1 /* 2 * Copyright 2017 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package sstable 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "io" 23 "math" 24 "sort" 25 26 "github.com/pingcap/badger/surf" 27 "github.com/pingcap/badger/y" 28 ) 29 30 type singleKeyIterator struct { 31 oldOffset uint32 32 loaded bool 33 latestVal []byte 34 oldVals entrySlice 35 idx int 36 oldBlock []byte 37 } 38 39 func (ski *singleKeyIterator) set(oldOffset uint32, latestVal []byte) { 40 ski.oldOffset = oldOffset 41 ski.latestVal = latestVal 42 ski.loaded = false 43 ski.idx = 0 44 } 45 46 func (ski *singleKeyIterator) getVal() (val []byte) { 47 if ski.idx == 0 { 48 return ski.latestVal 49 } 50 oldEntry := ski.oldVals.getEntry(ski.idx - 1) 51 return oldEntry 52 } 53 54 func (ski *singleKeyIterator) loadOld() { 55 numEntries := bytesToU32(ski.oldBlock[ski.oldOffset:]) 56 endOffsStartIdx := ski.oldOffset + 4 57 endOffsEndIdx := endOffsStartIdx + 4*numEntries 58 ski.oldVals.endOffs = bytesToU32Slice(ski.oldBlock[endOffsStartIdx:endOffsEndIdx]) 59 valueEndOff := endOffsEndIdx + ski.oldVals.endOffs[numEntries-1] 60 ski.oldVals.data = ski.oldBlock[endOffsEndIdx:valueEndOff] 61 ski.loaded = true 62 } 63 64 func (ski *singleKeyIterator) length() int { 65 return ski.oldVals.length() + 1 66 } 67 68 type blockIterator struct { 69 entries entrySlice 70 idx int 71 err error 72 73 globalTsBytes [8]byte 74 globalTs uint64 75 key y.Key 76 val []byte 77 78 baseLen uint16 79 ski singleKeyIterator 80 81 block *block 82 } 83 84 func (itr *blockIterator) setBlock(b *block) { 85 itr.block.done() 86 itr.block = b 87 itr.err = nil 88 itr.idx = 0 89 itr.key.Reset() 90 itr.val = itr.val[:0] 91 itr.loadEntries(b.data) 92 itr.key.UserKey = append(itr.key.UserKey[:0], b.baseKey[:itr.baseLen]...) 93 } 94 95 func (itr *blockIterator) valid() bool { 96 return itr != nil && itr.err == nil 97 } 98 99 func (itr *blockIterator) Error() error { 100 return itr.err 101 } 102 103 // loadEntries loads the entryEndOffsets for binary searching for a key. 104 func (itr *blockIterator) loadEntries(data []byte) { 105 // Get the number of entries from the end of `data` (and remove it). 106 dataLen := len(data) 107 itr.baseLen = binary.LittleEndian.Uint16(data[dataLen-2:]) 108 entriesNum := int(bytesToU32(data[dataLen-6:])) 109 entriesEnd := dataLen - 6 110 entriesStart := entriesEnd - entriesNum*4 111 itr.entries.endOffs = bytesToU32Slice(data[entriesStart:entriesEnd]) 112 itr.entries.data = data[:entriesStart] 113 } 114 115 // Seek brings us to the first block element that is >= input key. 116 // The binary search will begin at `start`, you can use it to skip some items. 117 func (itr *blockIterator) seek(key []byte) { 118 foundEntryIdx := sort.Search(itr.entries.length(), func(idx int) bool { 119 itr.setIdx(idx) 120 return bytes.Compare(itr.key.UserKey, key) >= 0 121 }) 122 itr.setIdx(foundEntryIdx) 123 } 124 125 // seekToFirst brings us to the first element. Valid should return true. 126 func (itr *blockIterator) seekToFirst() { 127 itr.setIdx(0) 128 } 129 130 // seekToLast brings us to the last element. Valid should return true. 131 func (itr *blockIterator) seekToLast() { 132 itr.setIdx(itr.entries.length() - 1) 133 } 134 135 // setIdx sets the iterator to the entry index and set the current key and value. 136 func (itr *blockIterator) setIdx(i int) { 137 itr.idx = i 138 if i >= itr.entries.length() || i < 0 { 139 itr.err = io.EOF 140 return 141 } 142 itr.err = nil 143 entryData := itr.entries.getEntry(i) 144 diffKeyLen := binary.LittleEndian.Uint16(entryData) 145 entryData = entryData[2:] 146 itr.key.UserKey = append(itr.key.UserKey[:itr.baseLen], entryData[:diffKeyLen]...) 147 entryData = entryData[diffKeyLen:] 148 hasOld := entryData[0] != 0 149 entryData = entryData[1:] 150 var oldOffset uint32 151 if hasOld { 152 oldOffset = bytesToU32(entryData) 153 entryData = entryData[4:] 154 } 155 if itr.globalTs != 0 { 156 itr.key.Version = itr.globalTs 157 } else { 158 itr.key.Version = bytesToU64(entryData) 159 } 160 itr.val = entryData 161 itr.ski.set(oldOffset, itr.val) 162 } 163 164 func (itr *blockIterator) hasOldVersion() bool { 165 return itr.ski.oldOffset != 0 166 } 167 168 func (itr *blockIterator) next() { 169 itr.setIdx(itr.idx + 1) 170 } 171 172 func (itr *blockIterator) prev() { 173 itr.setIdx(itr.idx - 1) 174 } 175 176 func (itr *blockIterator) close() { 177 itr.block.done() 178 } 179 180 // Iterator is an iterator for a Table. 181 type Iterator struct { 182 t *Table 183 tIdx *tableIndex 184 surf *surf.Iterator 185 bpos int 186 bi blockIterator 187 err error 188 189 // Internally, Iterator is bidirectional. However, we only expose the 190 // unidirectional functionality for now. 191 reversed bool 192 } 193 194 // NewIterator returns a new iterator of the Table 195 func (t *Table) newIterator(reversed bool) *Iterator { 196 idx, err := t.getIndex() 197 if err != nil { 198 return &Iterator{err: err} 199 } 200 return t.newIteratorWithIdx(reversed, idx) 201 } 202 203 func (t *Table) newIteratorWithIdx(reversed bool, index *tableIndex) *Iterator { 204 it := &Iterator{t: t, reversed: reversed, tIdx: index} 205 it.bi.globalTs = t.globalTs 206 if t.oldBlockLen > 0 { 207 y.Assert(len(t.oldBlock) > 0) 208 } 209 it.bi.ski.oldBlock = t.oldBlock 210 binary.BigEndian.PutUint64(it.bi.globalTsBytes[:], math.MaxUint64-t.globalTs) 211 if index.surf != nil { 212 it.surf = index.surf.NewIterator() 213 } 214 return it 215 } 216 217 func (itr *Iterator) reset() { 218 itr.bpos = 0 219 itr.err = nil 220 } 221 222 // Valid follows the y.Iterator interface 223 func (itr *Iterator) Valid() bool { 224 return itr.err == nil 225 } 226 227 func (itr *Iterator) Error() error { 228 if itr.err == io.EOF { 229 return nil 230 } 231 return itr.err 232 } 233 234 func (itr *Iterator) seekToFirst() { 235 numBlocks := len(itr.tIdx.blockEndOffsets) 236 if numBlocks == 0 { 237 itr.err = io.EOF 238 return 239 } 240 itr.bpos = 0 241 block, err := itr.t.block(itr.bpos, itr.tIdx) 242 if err != nil { 243 itr.err = err 244 return 245 } 246 itr.bi.setBlock(block) 247 itr.bi.seekToFirst() 248 itr.err = itr.bi.Error() 249 } 250 251 func (itr *Iterator) seekToLast() { 252 numBlocks := len(itr.tIdx.blockEndOffsets) 253 if numBlocks == 0 { 254 itr.err = io.EOF 255 return 256 } 257 itr.bpos = numBlocks - 1 258 block, err := itr.t.block(itr.bpos, itr.tIdx) 259 if err != nil { 260 itr.err = err 261 return 262 } 263 itr.bi.setBlock(block) 264 itr.bi.seekToLast() 265 itr.err = itr.bi.Error() 266 } 267 268 func (itr *Iterator) seekInBlock(blockIdx int, key []byte) { 269 itr.bpos = blockIdx 270 block, err := itr.t.block(blockIdx, itr.tIdx) 271 if err != nil { 272 itr.err = err 273 return 274 } 275 itr.bi.setBlock(block) 276 itr.bi.seek(key) 277 itr.err = itr.bi.Error() 278 } 279 280 func (itr *Iterator) seekFromOffset(blockIdx int, offset int, key []byte) { 281 itr.bpos = blockIdx 282 block, err := itr.t.block(blockIdx, itr.tIdx) 283 if err != nil { 284 itr.err = err 285 return 286 } 287 itr.bi.setBlock(block) 288 itr.bi.setIdx(offset) 289 if bytes.Compare(itr.bi.key.UserKey, key) >= 0 { 290 return 291 } 292 itr.bi.seek(key) 293 itr.err = itr.bi.err 294 } 295 296 func (itr *Iterator) seekBlock(key []byte) int { 297 return sort.Search(len(itr.tIdx.blockEndOffsets), func(idx int) bool { 298 blockBaseKey := itr.tIdx.baseKeys.getEntry(idx) 299 return bytes.Compare(blockBaseKey, key) > 0 300 }) 301 } 302 303 // seekFrom brings us to a key that is >= input key. 304 func (itr *Iterator) seekFrom(key []byte) { 305 itr.err = nil 306 itr.reset() 307 308 idx := itr.seekBlock(key) 309 if itr.err != nil { 310 return 311 } 312 if idx == 0 { 313 // The smallest key in our table is already strictly > key. We can return that. 314 // This is like a SeekToFirst. 315 itr.seekInBlock(0, key) 316 return 317 } 318 319 // block[idx].smallest is > key. 320 // Since idx>0, we know block[idx-1].smallest is <= key. 321 // There are two cases. 322 // 1) Everything in block[idx-1] is strictly < key. In this case, we should go to the first 323 // element of block[idx]. 324 // 2) Some element in block[idx-1] is >= key. We should go to that element. 325 itr.seekInBlock(idx-1, key) 326 if itr.err == io.EOF { 327 // Case 1. Need to visit block[idx]. 328 if idx == len(itr.tIdx.blockEndOffsets) { 329 // If idx == len(itr.t.blockEndOffsets), then input key is greater than ANY element of table. 330 // There's nothing we can do. Valid() should return false as we seek to end of table. 331 return 332 } 333 itr.err = nil 334 // Since block[idx].smallest is > key. This is essentially a block[idx].SeekToFirst. 335 itr.seekFromOffset(idx, 0, key) 336 } 337 // Case 2: No need to do anything. We already did the seek in block[idx-1]. 338 } 339 340 // seek will reset iterator and seek to >= key. 341 func (itr *Iterator) seek(key []byte) { 342 itr.err = nil 343 itr.reset() 344 if itr.surf == nil { 345 itr.seekFrom(key) 346 return 347 } 348 349 sit := itr.surf 350 sit.Seek(key) 351 if !sit.Valid() { 352 itr.err = io.EOF 353 return 354 } 355 356 var pos entryPosition 357 pos.decode(sit.Value()) 358 itr.seekFromOffset(int(pos.blockIdx), int(pos.offset), key) 359 } 360 361 // seekForPrev will reset iterator and seek to <= key. 362 func (itr *Iterator) seekForPrev(key []byte) { 363 // TODO: Optimize this. We shouldn't have to take a Prev step. 364 itr.seekFrom(key) 365 if !bytes.Equal(itr.Key().UserKey, key) { 366 itr.prev() 367 } 368 } 369 370 func (itr *Iterator) next() { 371 itr.err = nil 372 373 if itr.bpos >= len(itr.tIdx.blockEndOffsets) { 374 itr.err = io.EOF 375 return 376 } 377 378 if itr.bi.entries.length() == 0 { 379 block, err := itr.t.block(itr.bpos, itr.tIdx) 380 if err != nil { 381 itr.err = err 382 return 383 } 384 itr.bi.setBlock(block) 385 itr.bi.seekToFirst() 386 itr.err = itr.bi.Error() 387 return 388 } 389 390 itr.bi.next() 391 if !itr.bi.valid() { 392 itr.bpos++ 393 itr.bi.entries.reset() 394 itr.next() 395 return 396 } 397 } 398 399 func (itr *Iterator) prev() { 400 itr.err = nil 401 if itr.bpos < 0 { 402 itr.err = io.EOF 403 return 404 } 405 406 if itr.bi.entries.length() == 0 { 407 block, err := itr.t.block(itr.bpos, itr.tIdx) 408 if err != nil { 409 itr.err = err 410 return 411 } 412 itr.bi.setBlock(block) 413 itr.bi.seekToLast() 414 itr.err = itr.bi.Error() 415 return 416 } 417 418 itr.bi.prev() 419 if !itr.bi.valid() { 420 itr.bpos-- 421 itr.bi.entries.reset() 422 itr.prev() 423 return 424 } 425 } 426 427 // Key follows the y.Iterator interface 428 func (itr *Iterator) Key() y.Key { 429 return itr.bi.key 430 } 431 432 // Value follows the y.Iterator interface 433 func (itr *Iterator) Value() (ret y.ValueStruct) { 434 ret.Decode(itr.bi.val) 435 return 436 } 437 438 // FillValue fill the value struct. 439 func (itr *Iterator) FillValue(vs *y.ValueStruct) { 440 vs.Decode(itr.bi.val) 441 } 442 443 // Next follows the y.Iterator interface 444 func (itr *Iterator) Next() { 445 if !itr.reversed { 446 itr.next() 447 } else { 448 itr.prev() 449 } 450 } 451 452 func (itr *Iterator) NextVersion() bool { 453 if itr.bi.ski.oldOffset == 0 { 454 return false 455 } 456 if !itr.bi.ski.loaded { 457 itr.bi.ski.loadOld() 458 } 459 if itr.bi.ski.idx+1 < itr.bi.ski.length() { 460 itr.bi.ski.idx++ 461 itr.bi.val = itr.bi.ski.getVal() 462 itr.bi.key.Version = bytesToU64(itr.bi.val) 463 return true 464 } 465 return false 466 } 467 468 // Rewind follows the y.Iterator interface 469 func (itr *Iterator) Rewind() { 470 if !itr.reversed { 471 itr.seekToFirst() 472 } else { 473 itr.seekToLast() 474 } 475 } 476 477 // Seek follows the y.Iterator interface 478 func (itr *Iterator) Seek(key []byte) { 479 if !itr.reversed { 480 itr.seek(key) 481 } else { 482 itr.seekForPrev(key) 483 } 484 } 485 486 // Close closes the iterator (and it must be called). 487 func (itr *Iterator) Close() error { 488 itr.bi.close() 489 return nil 490 }