github.com/coocood/badger@v1.5.1-0.20200528065104-c02ac3616d04/table/sstable/iterator.go (about) 1 /* 2 * Copyright 2017 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package sstable 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "io" 23 "math" 24 "sort" 25 26 "github.com/coocood/badger/surf" 27 "github.com/coocood/badger/y" 28 ) 29 30 type singleKeyIterator struct { 31 oldOffset uint32 32 loaded bool 33 latestVal []byte 34 oldVals entrySlice 35 idx int 36 oldBlock []byte 37 } 38 39 func (ski *singleKeyIterator) set(oldOffset uint32, latestVal []byte) { 40 ski.oldOffset = oldOffset 41 ski.latestVal = latestVal 42 ski.loaded = false 43 ski.idx = 0 44 } 45 46 func (ski *singleKeyIterator) getVal() (val []byte) { 47 if ski.idx == 0 { 48 return ski.latestVal 49 } 50 oldEntry := ski.oldVals.getEntry(ski.idx - 1) 51 return oldEntry 52 } 53 54 func (ski *singleKeyIterator) loadOld() { 55 numEntries := bytesToU32(ski.oldBlock[ski.oldOffset:]) 56 endOffsStartIdx := ski.oldOffset + 4 57 endOffsEndIdx := endOffsStartIdx + 4*numEntries 58 ski.oldVals.endOffs = bytesToU32Slice(ski.oldBlock[endOffsStartIdx:endOffsEndIdx]) 59 valueEndOff := endOffsEndIdx + ski.oldVals.endOffs[numEntries-1] 60 ski.oldVals.data = ski.oldBlock[endOffsEndIdx:valueEndOff] 61 ski.loaded = true 62 } 63 64 func (ski *singleKeyIterator) length() int { 65 return ski.oldVals.length() + 1 66 } 67 68 type blockIterator struct { 69 entries entrySlice 70 idx int 71 err error 72 73 globalTsBytes [8]byte 74 globalTs uint64 75 key y.Key 76 val []byte 77 78 baseLen uint16 79 ski singleKeyIterator 80 } 81 82 func (itr *blockIterator) setBlock(b block) { 83 itr.err = nil 84 itr.idx = 0 85 itr.key.Reset() 86 itr.val = itr.val[:0] 87 itr.loadEntries(b.data) 88 itr.key.UserKey = append(itr.key.UserKey[:0], b.baseKey[:itr.baseLen]...) 89 } 90 91 func (itr *blockIterator) valid() bool { 92 return itr != nil && itr.err == nil 93 } 94 95 func (itr *blockIterator) Error() error { 96 return itr.err 97 } 98 99 // loadEntries loads the entryEndOffsets for binary searching for a key. 100 func (itr *blockIterator) loadEntries(data []byte) { 101 // Get the number of entries from the end of `data` (and remove it). 102 dataLen := len(data) 103 itr.baseLen = binary.LittleEndian.Uint16(data[dataLen-2:]) 104 entriesNum := int(bytesToU32(data[dataLen-6:])) 105 entriesEnd := dataLen - 6 106 entriesStart := entriesEnd - entriesNum*4 107 itr.entries.endOffs = bytesToU32Slice(data[entriesStart:entriesEnd]) 108 itr.entries.data = data[:entriesStart] 109 } 110 111 // Seek brings us to the first block element that is >= input key. 112 // The binary search will begin at `start`, you can use it to skip some items. 113 func (itr *blockIterator) seek(key []byte) { 114 foundEntryIdx := sort.Search(itr.entries.length(), func(idx int) bool { 115 itr.setIdx(idx) 116 return bytes.Compare(itr.key.UserKey, key) >= 0 117 }) 118 itr.setIdx(foundEntryIdx) 119 } 120 121 // seekToFirst brings us to the first element. Valid should return true. 122 func (itr *blockIterator) seekToFirst() { 123 itr.setIdx(0) 124 } 125 126 // seekToLast brings us to the last element. Valid should return true. 127 func (itr *blockIterator) seekToLast() { 128 itr.setIdx(itr.entries.length() - 1) 129 } 130 131 // setIdx sets the iterator to the entry index and set the current key and value. 132 func (itr *blockIterator) setIdx(i int) { 133 itr.idx = i 134 if i >= itr.entries.length() || i < 0 { 135 itr.err = io.EOF 136 return 137 } 138 itr.err = nil 139 entryData := itr.entries.getEntry(i) 140 diffKeyLen := binary.LittleEndian.Uint16(entryData) 141 entryData = entryData[2:] 142 itr.key.UserKey = append(itr.key.UserKey[:itr.baseLen], entryData[:diffKeyLen]...) 143 entryData = entryData[diffKeyLen:] 144 hasOld := entryData[0] != 0 145 entryData = entryData[1:] 146 var oldOffset uint32 147 if hasOld { 148 oldOffset = bytesToU32(entryData) 149 entryData = entryData[4:] 150 } 151 if itr.globalTs != 0 { 152 itr.key.Version = itr.globalTs 153 } else { 154 itr.key.Version = bytesToU64(entryData) 155 } 156 itr.val = entryData 157 itr.ski.idx = 0 158 if hasOld { 159 itr.ski.set(oldOffset, itr.val) 160 } 161 } 162 163 func (itr *blockIterator) hasOldVersion() bool { 164 return itr.ski.oldOffset != 0 165 } 166 167 func (itr *blockIterator) next() { 168 itr.setIdx(itr.idx + 1) 169 } 170 171 func (itr *blockIterator) prev() { 172 itr.setIdx(itr.idx - 1) 173 } 174 175 // Iterator is an iterator for a Table. 176 type Iterator struct { 177 t *Table 178 tIdx *tableIndex 179 surf *surf.Iterator 180 bpos int 181 bi blockIterator 182 err error 183 184 // Internally, Iterator is bidirectional. However, we only expose the 185 // unidirectional functionality for now. 186 reversed bool 187 } 188 189 // NewIterator returns a new iterator of the Table 190 func (t *Table) newIterator(reversed bool) *Iterator { 191 idx, err := t.getIndex() 192 if err != nil { 193 return &Iterator{err: err} 194 } 195 return t.newIteratorWithIdx(reversed, idx) 196 } 197 198 func (t *Table) newIteratorWithIdx(reversed bool, index *tableIndex) *Iterator { 199 it := &Iterator{t: t, reversed: reversed, tIdx: index} 200 it.bi.globalTs = t.globalTs 201 if t.oldBlockLen > 0 { 202 y.Assert(len(t.oldBlock) > 0) 203 } 204 it.bi.ski.oldBlock = t.oldBlock 205 binary.BigEndian.PutUint64(it.bi.globalTsBytes[:], math.MaxUint64-t.globalTs) 206 if index.surf != nil { 207 it.surf = index.surf.NewIterator() 208 } 209 return it 210 } 211 212 func (itr *Iterator) reset() { 213 itr.bpos = 0 214 itr.err = nil 215 } 216 217 // Valid follows the y.Iterator interface 218 func (itr *Iterator) Valid() bool { 219 return itr.err == nil 220 } 221 222 func (itr *Iterator) Error() error { 223 if itr.err == io.EOF { 224 return nil 225 } 226 return itr.err 227 } 228 229 func (itr *Iterator) seekToFirst() { 230 numBlocks := len(itr.tIdx.blockEndOffsets) 231 if numBlocks == 0 { 232 itr.err = io.EOF 233 return 234 } 235 itr.bpos = 0 236 block, err := itr.t.block(itr.bpos, itr.tIdx) 237 if err != nil { 238 itr.err = err 239 return 240 } 241 itr.bi.setBlock(block) 242 itr.bi.seekToFirst() 243 itr.err = itr.bi.Error() 244 } 245 246 func (itr *Iterator) seekToLast() { 247 numBlocks := len(itr.tIdx.blockEndOffsets) 248 if numBlocks == 0 { 249 itr.err = io.EOF 250 return 251 } 252 itr.bpos = numBlocks - 1 253 block, err := itr.t.block(itr.bpos, itr.tIdx) 254 if err != nil { 255 itr.err = err 256 return 257 } 258 itr.bi.setBlock(block) 259 itr.bi.seekToLast() 260 itr.err = itr.bi.Error() 261 } 262 263 func (itr *Iterator) seekInBlock(blockIdx int, key []byte) { 264 itr.bpos = blockIdx 265 block, err := itr.t.block(blockIdx, itr.tIdx) 266 if err != nil { 267 itr.err = err 268 return 269 } 270 itr.bi.setBlock(block) 271 itr.bi.seek(key) 272 itr.err = itr.bi.Error() 273 } 274 275 func (itr *Iterator) seekFromOffset(blockIdx int, offset int, key []byte) { 276 itr.bpos = blockIdx 277 block, err := itr.t.block(blockIdx, itr.tIdx) 278 if err != nil { 279 itr.err = err 280 return 281 } 282 itr.bi.setBlock(block) 283 itr.bi.setIdx(offset) 284 if bytes.Compare(itr.bi.key.UserKey, key) >= 0 { 285 return 286 } 287 itr.bi.seek(key) 288 itr.err = itr.bi.err 289 } 290 291 func (itr *Iterator) seekBlock(key []byte) int { 292 return sort.Search(len(itr.tIdx.blockEndOffsets), func(idx int) bool { 293 blockBaseKey := itr.tIdx.baseKeys.getEntry(idx) 294 return bytes.Compare(blockBaseKey, key) > 0 295 }) 296 } 297 298 // seekFrom brings us to a key that is >= input key. 299 func (itr *Iterator) seekFrom(key []byte) { 300 itr.err = nil 301 itr.reset() 302 303 idx := itr.seekBlock(key) 304 if itr.err != nil { 305 return 306 } 307 if idx == 0 { 308 // The smallest key in our table is already strictly > key. We can return that. 309 // This is like a SeekToFirst. 310 itr.seekInBlock(0, key) 311 return 312 } 313 314 // block[idx].smallest is > key. 315 // Since idx>0, we know block[idx-1].smallest is <= key. 316 // There are two cases. 317 // 1) Everything in block[idx-1] is strictly < key. In this case, we should go to the first 318 // element of block[idx]. 319 // 2) Some element in block[idx-1] is >= key. We should go to that element. 320 itr.seekInBlock(idx-1, key) 321 if itr.err == io.EOF { 322 // Case 1. Need to visit block[idx]. 323 if idx == len(itr.tIdx.blockEndOffsets) { 324 // If idx == len(itr.t.blockEndOffsets), then input key is greater than ANY element of table. 325 // There's nothing we can do. Valid() should return false as we seek to end of table. 326 return 327 } 328 // Since block[idx].smallest is > key. This is essentially a block[idx].SeekToFirst. 329 itr.seekFromOffset(idx, 0, key) 330 } 331 // Case 2: No need to do anything. We already did the seek in block[idx-1]. 332 } 333 334 // seek will reset iterator and seek to >= key. 335 func (itr *Iterator) seek(key []byte) { 336 itr.err = nil 337 itr.reset() 338 if itr.surf == nil { 339 itr.seekFrom(key) 340 return 341 } 342 343 sit := itr.surf 344 sit.Seek(key) 345 if !sit.Valid() { 346 itr.err = io.EOF 347 return 348 } 349 350 var pos entryPosition 351 pos.decode(sit.Value()) 352 itr.seekFromOffset(int(pos.blockIdx), int(pos.offset), key) 353 } 354 355 // seekForPrev will reset iterator and seek to <= key. 356 func (itr *Iterator) seekForPrev(key []byte) { 357 // TODO: Optimize this. We shouldn't have to take a Prev step. 358 itr.seekFrom(key) 359 if !bytes.Equal(itr.Key().UserKey, key) { 360 itr.prev() 361 } 362 } 363 364 func (itr *Iterator) next() { 365 itr.err = nil 366 367 if itr.bpos >= len(itr.tIdx.blockEndOffsets) { 368 itr.err = io.EOF 369 return 370 } 371 372 if itr.bi.entries.length() == 0 { 373 block, err := itr.t.block(itr.bpos, itr.tIdx) 374 if err != nil { 375 itr.err = err 376 return 377 } 378 itr.bi.setBlock(block) 379 itr.bi.seekToFirst() 380 itr.err = itr.bi.Error() 381 return 382 } 383 384 itr.bi.next() 385 if !itr.bi.valid() { 386 itr.bpos++ 387 itr.bi.entries.reset() 388 itr.next() 389 return 390 } 391 } 392 393 func (itr *Iterator) prev() { 394 itr.err = nil 395 if itr.bpos < 0 { 396 itr.err = io.EOF 397 return 398 } 399 400 if itr.bi.entries.length() == 0 { 401 block, err := itr.t.block(itr.bpos, itr.tIdx) 402 if err != nil { 403 itr.err = err 404 return 405 } 406 itr.bi.setBlock(block) 407 itr.bi.seekToLast() 408 itr.err = itr.bi.Error() 409 return 410 } 411 412 itr.bi.prev() 413 if !itr.bi.valid() { 414 itr.bpos-- 415 itr.bi.entries.reset() 416 itr.prev() 417 return 418 } 419 } 420 421 // Key follows the y.Iterator interface 422 func (itr *Iterator) Key() y.Key { 423 return itr.bi.key 424 } 425 426 // Value follows the y.Iterator interface 427 func (itr *Iterator) Value() (ret y.ValueStruct) { 428 ret.Decode(itr.bi.val) 429 return 430 } 431 432 // FillValue fill the value struct. 433 func (itr *Iterator) FillValue(vs *y.ValueStruct) { 434 vs.Decode(itr.bi.val) 435 } 436 437 // Next follows the y.Iterator interface 438 func (itr *Iterator) Next() { 439 if !itr.reversed { 440 itr.next() 441 } else { 442 itr.prev() 443 } 444 } 445 446 func (itr *Iterator) NextVersion() bool { 447 if itr.bi.ski.oldOffset == 0 { 448 return false 449 } 450 if !itr.bi.ski.loaded { 451 itr.bi.ski.loadOld() 452 } 453 if itr.bi.ski.idx+1 < itr.bi.ski.length() { 454 itr.bi.ski.idx++ 455 itr.bi.val = itr.bi.ski.getVal() 456 itr.bi.key.Version = bytesToU64(itr.bi.val) 457 return true 458 } 459 return false 460 } 461 462 // Rewind follows the y.Iterator interface 463 func (itr *Iterator) Rewind() { 464 if !itr.reversed { 465 itr.seekToFirst() 466 } else { 467 itr.seekToLast() 468 } 469 } 470 471 // Seek follows the y.Iterator interface 472 func (itr *Iterator) Seek(key []byte) { 473 if !itr.reversed { 474 itr.seek(key) 475 } else { 476 itr.seekForPrev(key) 477 } 478 }