github.com/df-mc/goleveldb@v1.1.9/leveldb/table/reader.go (about) 1 // Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> 2 // All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE file. 6 7 package table 8 9 import ( 10 "bytes" 11 "encoding/binary" 12 "fmt" 13 "github.com/klauspost/compress/flate" 14 "io" 15 "sort" 16 "strings" 17 "sync" 18 19 "github.com/golang/snappy" 20 21 "github.com/df-mc/goleveldb/leveldb/cache" 22 "github.com/df-mc/goleveldb/leveldb/comparer" 23 "github.com/df-mc/goleveldb/leveldb/errors" 24 "github.com/df-mc/goleveldb/leveldb/filter" 25 "github.com/df-mc/goleveldb/leveldb/iterator" 26 "github.com/df-mc/goleveldb/leveldb/opt" 27 "github.com/df-mc/goleveldb/leveldb/storage" 28 "github.com/df-mc/goleveldb/leveldb/util" 29 ) 30 31 // Reader errors. 32 var ( 33 ErrNotFound = errors.ErrNotFound 34 ErrReaderReleased = errors.New("leveldb/table: reader released") 35 ErrIterReleased = errors.New("leveldb/table: iterator released") 36 ) 37 38 // ErrCorrupted describes error due to corruption. This error will be wrapped 39 // with errors.ErrCorrupted. 40 type ErrCorrupted struct { 41 Pos int64 42 Size int64 43 Kind string 44 Reason string 45 } 46 47 func (e *ErrCorrupted) Error() string { 48 return fmt.Sprintf("leveldb/table: corruption on %s (pos=%d): %s", e.Kind, e.Pos, e.Reason) 49 } 50 51 func max(x, y int) int { 52 if x > y { 53 return x 54 } 55 return y 56 } 57 58 type block struct { 59 bpool *util.BufferPool 60 bh blockHandle 61 data []byte 62 restartsLen int 63 restartsOffset int 64 } 65 66 func (b *block) seek(cmp comparer.Comparer, rstart, rlimit int, key []byte) (index, offset int, err error) { 67 index = sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool { 68 offset := int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):])) 69 offset++ // shared always zero, since this is a restart point 70 v1, n1 := binary.Uvarint(b.data[offset:]) // key length 71 _, n2 := binary.Uvarint(b.data[offset+n1:]) // value length 72 m := offset + n1 + n2 73 return cmp.Compare(b.data[m:m+int(v1)], key) > 0 74 }) + rstart - 1 75 if index < rstart { 76 // The smallest key is greater-than key sought. 77 index = rstart 78 } 79 offset = int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:])) 80 return 81 } 82 83 func (b *block) restartIndex(rstart, rlimit, offset int) int { 84 return sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool { 85 return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):])) > offset 86 }) + rstart - 1 87 } 88 89 func (b *block) restartOffset(index int) int { 90 return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:])) 91 } 92 93 func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error) { 94 if offset >= b.restartsOffset { 95 if offset != b.restartsOffset { 96 err = &ErrCorrupted{Reason: "entries offset not aligned"} 97 } 98 return 99 } 100 v0, n0 := binary.Uvarint(b.data[offset:]) // Shared prefix length 101 v1, n1 := binary.Uvarint(b.data[offset+n0:]) // Key length 102 v2, n2 := binary.Uvarint(b.data[offset+n0+n1:]) // Value length 103 m := n0 + n1 + n2 104 n = m + int(v1) + int(v2) 105 if n0 <= 0 || n1 <= 0 || n2 <= 0 || offset+n > b.restartsOffset { 106 err = &ErrCorrupted{Reason: "entries corrupted"} 107 return 108 } 109 key = b.data[offset+m : offset+m+int(v1)] 110 value = b.data[offset+m+int(v1) : offset+n] 111 nShared = int(v0) 112 return 113 } 114 115 func (b *block) Release() { 116 b.bpool.Put(b.data) 117 b.bpool = nil 118 b.data = nil 119 } 120 121 type dir int 122 123 const ( 124 dirReleased dir = iota - 1 125 dirSOI 126 dirEOI 127 dirBackward 128 dirForward 129 ) 130 131 type blockIter struct { 132 tr *Reader 133 block *block 134 blockReleaser util.Releaser 135 releaser util.Releaser 136 key, value []byte 137 offset int 138 // Previous offset, only filled by Next. 139 prevOffset int 140 prevNode []int 141 prevKeys []byte 142 restartIndex int 143 // Iterator direction. 144 dir dir 145 // Restart index slice range. 146 riStart int 147 riLimit int 148 // Offset slice range. 149 offsetStart int 150 offsetRealStart int 151 offsetLimit int 152 // Error. 153 err error 154 } 155 156 func (i *blockIter) sErr(err error) { 157 i.err = err 158 i.key = nil 159 i.value = nil 160 i.prevNode = nil 161 i.prevKeys = nil 162 } 163 164 func (i *blockIter) reset() { 165 if i.dir == dirBackward { 166 i.prevNode = i.prevNode[:0] 167 i.prevKeys = i.prevKeys[:0] 168 } 169 i.restartIndex = i.riStart 170 i.offset = i.offsetStart 171 i.dir = dirSOI 172 i.key = i.key[:0] 173 i.value = nil 174 } 175 176 func (i *blockIter) isFirst() bool { 177 switch i.dir { 178 case dirForward: 179 return i.prevOffset == i.offsetRealStart 180 case dirBackward: 181 return len(i.prevNode) == 1 && i.restartIndex == i.riStart 182 } 183 return false 184 } 185 186 func (i *blockIter) isLast() bool { 187 switch i.dir { 188 case dirForward, dirBackward: 189 return i.offset == i.offsetLimit 190 } 191 return false 192 } 193 194 func (i *blockIter) First() bool { 195 if i.err != nil { 196 return false 197 } else if i.dir == dirReleased { 198 i.err = ErrIterReleased 199 return false 200 } 201 202 if i.dir == dirBackward { 203 i.prevNode = i.prevNode[:0] 204 i.prevKeys = i.prevKeys[:0] 205 } 206 i.dir = dirSOI 207 return i.Next() 208 } 209 210 func (i *blockIter) Last() bool { 211 if i.err != nil { 212 return false 213 } else if i.dir == dirReleased { 214 i.err = ErrIterReleased 215 return false 216 } 217 218 if i.dir == dirBackward { 219 i.prevNode = i.prevNode[:0] 220 i.prevKeys = i.prevKeys[:0] 221 } 222 i.dir = dirEOI 223 return i.Prev() 224 } 225 226 func (i *blockIter) Seek(key []byte) bool { 227 if i.err != nil { 228 return false 229 } else if i.dir == dirReleased { 230 i.err = ErrIterReleased 231 return false 232 } 233 234 ri, offset, err := i.block.seek(i.tr.cmp, i.riStart, i.riLimit, key) 235 if err != nil { 236 i.sErr(err) 237 return false 238 } 239 i.restartIndex = ri 240 i.offset = max(i.offsetStart, offset) 241 if i.dir == dirSOI || i.dir == dirEOI { 242 i.dir = dirForward 243 } 244 for i.Next() { 245 if i.tr.cmp.Compare(i.key, key) >= 0 { 246 return true 247 } 248 } 249 return false 250 } 251 252 func (i *blockIter) Next() bool { 253 if i.dir == dirEOI || i.err != nil { 254 return false 255 } else if i.dir == dirReleased { 256 i.err = ErrIterReleased 257 return false 258 } 259 260 if i.dir == dirSOI { 261 i.restartIndex = i.riStart 262 i.offset = i.offsetStart 263 } else if i.dir == dirBackward { 264 i.prevNode = i.prevNode[:0] 265 i.prevKeys = i.prevKeys[:0] 266 } 267 for i.offset < i.offsetRealStart { 268 key, value, nShared, n, err := i.block.entry(i.offset) 269 if err != nil { 270 i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err)) 271 return false 272 } 273 if n == 0 { 274 i.dir = dirEOI 275 return false 276 } 277 i.key = append(i.key[:nShared], key...) 278 i.value = value 279 i.offset += n 280 } 281 if i.offset >= i.offsetLimit { 282 i.dir = dirEOI 283 if i.offset != i.offsetLimit { 284 i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned")) 285 } 286 return false 287 } 288 key, value, nShared, n, err := i.block.entry(i.offset) 289 if err != nil { 290 i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err)) 291 return false 292 } 293 if n == 0 { 294 i.dir = dirEOI 295 return false 296 } 297 i.key = append(i.key[:nShared], key...) 298 i.value = value 299 i.prevOffset = i.offset 300 i.offset += n 301 i.dir = dirForward 302 return true 303 } 304 305 func (i *blockIter) Prev() bool { 306 if i.dir == dirSOI || i.err != nil { 307 return false 308 } else if i.dir == dirReleased { 309 i.err = ErrIterReleased 310 return false 311 } 312 313 var ri int 314 if i.dir == dirForward { 315 // Change direction. 316 i.offset = i.prevOffset 317 if i.offset == i.offsetRealStart { 318 i.dir = dirSOI 319 return false 320 } 321 ri = i.block.restartIndex(i.restartIndex, i.riLimit, i.offset) 322 i.dir = dirBackward 323 } else if i.dir == dirEOI { 324 // At the end of iterator. 325 i.restartIndex = i.riLimit 326 i.offset = i.offsetLimit 327 if i.offset == i.offsetRealStart { 328 i.dir = dirSOI 329 return false 330 } 331 ri = i.riLimit - 1 332 i.dir = dirBackward 333 } else if len(i.prevNode) == 1 { 334 // This is the end of a restart range. 335 i.offset = i.prevNode[0] 336 i.prevNode = i.prevNode[:0] 337 if i.restartIndex == i.riStart { 338 i.dir = dirSOI 339 return false 340 } 341 i.restartIndex-- 342 ri = i.restartIndex 343 } else { 344 // In the middle of restart range, get from cache. 345 n := len(i.prevNode) - 3 346 node := i.prevNode[n:] 347 i.prevNode = i.prevNode[:n] 348 // Get the key. 349 ko := node[0] 350 i.key = append(i.key[:0], i.prevKeys[ko:]...) 351 i.prevKeys = i.prevKeys[:ko] 352 // Get the value. 353 vo := node[1] 354 vl := vo + node[2] 355 i.value = i.block.data[vo:vl] 356 i.offset = vl 357 return true 358 } 359 // Build entries cache. 360 i.key = i.key[:0] 361 i.value = nil 362 offset := i.block.restartOffset(ri) 363 if offset == i.offset { 364 ri-- 365 if ri < 0 { 366 i.dir = dirSOI 367 return false 368 } 369 offset = i.block.restartOffset(ri) 370 } 371 i.prevNode = append(i.prevNode, offset) 372 for { 373 key, value, nShared, n, err := i.block.entry(offset) 374 if err != nil { 375 i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err)) 376 return false 377 } 378 if offset >= i.offsetRealStart { 379 if i.value != nil { 380 // Appends 3 variables: 381 // 1. Previous keys offset 382 // 2. Value offset in the data block 383 // 3. Value length 384 i.prevNode = append(i.prevNode, len(i.prevKeys), offset-len(i.value), len(i.value)) 385 i.prevKeys = append(i.prevKeys, i.key...) 386 } 387 i.value = value 388 } 389 i.key = append(i.key[:nShared], key...) 390 offset += n 391 // Stop if target offset reached. 392 if offset >= i.offset { 393 if offset != i.offset { 394 i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned")) 395 return false 396 } 397 398 break 399 } 400 } 401 i.restartIndex = ri 402 i.offset = offset 403 return true 404 } 405 406 func (i *blockIter) Key() []byte { 407 if i.err != nil || i.dir <= dirEOI { 408 return nil 409 } 410 return i.key 411 } 412 413 func (i *blockIter) Value() []byte { 414 if i.err != nil || i.dir <= dirEOI { 415 return nil 416 } 417 return i.value 418 } 419 420 func (i *blockIter) Release() { 421 if i.dir != dirReleased { 422 i.tr = nil 423 i.block = nil 424 i.prevNode = nil 425 i.prevKeys = nil 426 i.key = nil 427 i.value = nil 428 i.dir = dirReleased 429 if i.blockReleaser != nil { 430 i.blockReleaser.Release() 431 i.blockReleaser = nil 432 } 433 if i.releaser != nil { 434 i.releaser.Release() 435 i.releaser = nil 436 } 437 } 438 } 439 440 func (i *blockIter) SetReleaser(releaser util.Releaser) { 441 if i.dir == dirReleased { 442 panic(util.ErrReleased) 443 } 444 if i.releaser != nil && releaser != nil { 445 panic(util.ErrHasReleaser) 446 } 447 i.releaser = releaser 448 } 449 450 func (i *blockIter) Valid() bool { 451 return i.err == nil && (i.dir == dirBackward || i.dir == dirForward) 452 } 453 454 func (i *blockIter) Error() error { 455 return i.err 456 } 457 458 type filterBlock struct { 459 bpool *util.BufferPool 460 data []byte 461 oOffset int 462 baseLg uint 463 filtersNum int 464 } 465 466 func (b *filterBlock) contains(filter filter.Filter, offset uint64, key []byte) bool { 467 i := int(offset >> b.baseLg) 468 if i < b.filtersNum { 469 o := b.data[b.oOffset+i*4:] 470 n := int(binary.LittleEndian.Uint32(o)) 471 m := int(binary.LittleEndian.Uint32(o[4:])) 472 if n < m && m <= b.oOffset { 473 return filter.Contains(b.data[n:m], key) 474 } else if n == m { 475 return false 476 } 477 } 478 return true 479 } 480 481 func (b *filterBlock) Release() { 482 b.bpool.Put(b.data) 483 b.bpool = nil 484 b.data = nil 485 } 486 487 type indexIter struct { 488 *blockIter 489 tr *Reader 490 slice *util.Range 491 // Options 492 fillCache bool 493 } 494 495 func (i *indexIter) Get() iterator.Iterator { 496 value := i.Value() 497 if value == nil { 498 return nil 499 } 500 dataBH, n := decodeBlockHandle(value) 501 if n == 0 { 502 return iterator.NewEmptyIterator(i.tr.newErrCorruptedBH(i.tr.indexBH, "bad data block handle")) 503 } 504 505 var slice *util.Range 506 if i.slice != nil && (i.blockIter.isFirst() || i.blockIter.isLast()) { 507 slice = i.slice 508 } 509 return i.tr.getDataIterErr(dataBH, slice, i.tr.verifyChecksum, i.fillCache) 510 } 511 512 // Reader is a table reader. 513 type Reader struct { 514 mu sync.RWMutex 515 fd storage.FileDesc 516 reader io.ReaderAt 517 cache *cache.NamespaceGetter 518 err error 519 bpool *util.BufferPool 520 // Options 521 o *opt.Options 522 cmp comparer.Comparer 523 filter filter.Filter 524 verifyChecksum bool 525 526 dataEnd int64 527 metaBH, indexBH, filterBH blockHandle 528 indexBlock *block 529 filterBlock *filterBlock 530 } 531 532 func (r *Reader) blockKind(bh blockHandle) string { 533 switch bh.offset { 534 case r.metaBH.offset: 535 return "meta-block" 536 case r.indexBH.offset: 537 return "index-block" 538 case r.filterBH.offset: 539 if r.filterBH.length > 0 { 540 return "filter-block" 541 } 542 } 543 return "data-block" 544 } 545 546 func (r *Reader) newErrCorrupted(pos, size int64, kind, reason string) error { 547 return &errors.ErrCorrupted{Fd: r.fd, Err: &ErrCorrupted{Pos: pos, Size: size, Kind: kind, Reason: reason}} 548 } 549 550 func (r *Reader) newErrCorruptedBH(bh blockHandle, reason string) error { 551 return r.newErrCorrupted(int64(bh.offset), int64(bh.length), r.blockKind(bh), reason) 552 } 553 554 func (r *Reader) fixErrCorruptedBH(bh blockHandle, err error) error { 555 if cerr, ok := err.(*ErrCorrupted); ok { 556 cerr.Pos = int64(bh.offset) 557 cerr.Size = int64(bh.length) 558 cerr.Kind = r.blockKind(bh) 559 return &errors.ErrCorrupted{Fd: r.fd, Err: cerr} 560 } 561 return err 562 } 563 564 var bufferPool = sync.Pool{ 565 New: func() interface{} { 566 return bytes.NewBuffer(make([]byte, 0, 4096)) 567 }, 568 } 569 var reader io.ReadCloser 570 var rmu sync.Mutex 571 572 func (r *Reader) readRawBlock(bh blockHandle, verifyChecksum bool) ([]byte, error) { 573 data := r.bpool.Get(int(bh.length + blockTrailerLen)) 574 if _, err := r.reader.ReadAt(data, int64(bh.offset)); err != nil && err != io.EOF { 575 return nil, err 576 } 577 578 if verifyChecksum { 579 n := bh.length + 1 580 checksum0 := binary.LittleEndian.Uint32(data[n:]) 581 checksum1 := util.NewCRC(data[:n]).Value() 582 if checksum0 != checksum1 { 583 r.bpool.Put(data) 584 return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("checksum mismatch, want=%#x got=%#x", checksum0, checksum1)) 585 } 586 } 587 588 switch data[bh.length] { 589 case blockTypeNoCompression: 590 data = data[:bh.length] 591 case blockTypeSnappyCompression: 592 decLen, err := snappy.DecodedLen(data[:bh.length]) 593 if err != nil { 594 r.bpool.Put(data) 595 return nil, r.newErrCorruptedBH(bh, err.Error()) 596 } 597 decData := r.bpool.Get(decLen) 598 decData, err = snappy.Decode(decData, data[:bh.length]) 599 r.bpool.Put(data) 600 if err != nil { 601 r.bpool.Put(decData) 602 return nil, r.newErrCorruptedBH(bh, err.Error()) 603 } 604 data = decData 605 case blockTypeFlateCompression: 606 buf := bufferPool.Get().(*bytes.Buffer) 607 608 rmu.Lock() 609 if reader == nil { 610 reader = flate.NewReader(bytes.NewBuffer(data[:bh.length])) 611 } else { 612 _ = reader.(flate.Resetter).Reset(bytes.NewBuffer(data[:bh.length]), nil) 613 } 614 _, _ = buf.ReadFrom(reader) 615 616 if err := reader.Close(); err != nil { 617 buf.Reset() 618 bufferPool.Put(buf) 619 rmu.Unlock() 620 return nil, r.newErrCorruptedBH(bh, err.Error()) 621 } 622 data = append([]byte(nil), buf.Bytes()...) 623 buf.Reset() 624 rmu.Unlock() 625 626 bufferPool.Put(buf) 627 default: 628 r.bpool.Put(data) 629 return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("unknown compression type %#x", data[bh.length])) 630 } 631 return data, nil 632 } 633 634 func (r *Reader) readBlock(bh blockHandle, verifyChecksum bool) (*block, error) { 635 data, err := r.readRawBlock(bh, verifyChecksum) 636 if err != nil { 637 return nil, err 638 } 639 restartsLen := int(binary.LittleEndian.Uint32(data[len(data)-4:])) 640 b := &block{ 641 bpool: r.bpool, 642 bh: bh, 643 data: data, 644 restartsLen: restartsLen, 645 restartsOffset: len(data) - (restartsLen+1)*4, 646 } 647 return b, nil 648 } 649 650 func (r *Reader) readBlockCached(bh blockHandle, verifyChecksum, fillCache bool) (*block, util.Releaser, error) { 651 if r.cache != nil { 652 var ( 653 err error 654 ch *cache.Handle 655 ) 656 if fillCache { 657 ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) { 658 var b *block 659 b, err = r.readBlock(bh, verifyChecksum) 660 if err != nil { 661 return 0, nil 662 } 663 return cap(b.data), b 664 }) 665 } else { 666 ch = r.cache.Get(bh.offset, nil) 667 } 668 if ch != nil { 669 b, ok := ch.Value().(*block) 670 if !ok { 671 ch.Release() 672 return nil, nil, errors.New("leveldb/table: inconsistent block type") 673 } 674 return b, ch, err 675 } else if err != nil { 676 return nil, nil, err 677 } 678 } 679 680 b, err := r.readBlock(bh, verifyChecksum) 681 return b, b, err 682 } 683 684 func (r *Reader) readFilterBlock(bh blockHandle) (*filterBlock, error) { 685 data, err := r.readRawBlock(bh, true) 686 if err != nil { 687 return nil, err 688 } 689 n := len(data) 690 if n < 5 { 691 return nil, r.newErrCorruptedBH(bh, "too short") 692 } 693 m := n - 5 694 oOffset := int(binary.LittleEndian.Uint32(data[m:])) 695 if oOffset > m { 696 return nil, r.newErrCorruptedBH(bh, "invalid data-offsets offset") 697 } 698 b := &filterBlock{ 699 bpool: r.bpool, 700 data: data, 701 oOffset: oOffset, 702 baseLg: uint(data[n-1]), 703 filtersNum: (m - oOffset) / 4, 704 } 705 return b, nil 706 } 707 708 func (r *Reader) readFilterBlockCached(bh blockHandle, fillCache bool) (*filterBlock, util.Releaser, error) { 709 if r.cache != nil { 710 var ( 711 err error 712 ch *cache.Handle 713 ) 714 if fillCache { 715 ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) { 716 var b *filterBlock 717 b, err = r.readFilterBlock(bh) 718 if err != nil { 719 return 0, nil 720 } 721 return cap(b.data), b 722 }) 723 } else { 724 ch = r.cache.Get(bh.offset, nil) 725 } 726 if ch != nil { 727 b, ok := ch.Value().(*filterBlock) 728 if !ok { 729 ch.Release() 730 return nil, nil, errors.New("leveldb/table: inconsistent block type") 731 } 732 return b, ch, err 733 } else if err != nil { 734 return nil, nil, err 735 } 736 } 737 738 b, err := r.readFilterBlock(bh) 739 return b, b, err 740 } 741 742 func (r *Reader) getIndexBlock(fillCache bool) (b *block, rel util.Releaser, err error) { 743 if r.indexBlock == nil { 744 return r.readBlockCached(r.indexBH, true, fillCache) 745 } 746 return r.indexBlock, util.NoopReleaser{}, nil 747 } 748 749 func (r *Reader) getFilterBlock(fillCache bool) (*filterBlock, util.Releaser, error) { 750 if r.filterBlock == nil { 751 return r.readFilterBlockCached(r.filterBH, fillCache) 752 } 753 return r.filterBlock, util.NoopReleaser{}, nil 754 } 755 756 func (r *Reader) newBlockIter(b *block, bReleaser util.Releaser, slice *util.Range, inclLimit bool) *blockIter { 757 bi := &blockIter{ 758 tr: r, 759 block: b, 760 blockReleaser: bReleaser, 761 // Valid key should never be nil. 762 key: make([]byte, 0), 763 dir: dirSOI, 764 riStart: 0, 765 riLimit: b.restartsLen, 766 offsetStart: 0, 767 offsetRealStart: 0, 768 offsetLimit: b.restartsOffset, 769 } 770 if slice != nil { 771 if slice.Start != nil { 772 if bi.Seek(slice.Start) { 773 bi.riStart = b.restartIndex(bi.restartIndex, b.restartsLen, bi.prevOffset) 774 bi.offsetStart = b.restartOffset(bi.riStart) 775 bi.offsetRealStart = bi.prevOffset 776 } else { 777 bi.riStart = b.restartsLen 778 bi.offsetStart = b.restartsOffset 779 bi.offsetRealStart = b.restartsOffset 780 } 781 } 782 if slice.Limit != nil { 783 if bi.Seek(slice.Limit) && (!inclLimit || bi.Next()) { 784 bi.offsetLimit = bi.prevOffset 785 bi.riLimit = bi.restartIndex + 1 786 } 787 } 788 bi.reset() 789 if bi.offsetStart > bi.offsetLimit { 790 bi.sErr(errors.New("leveldb/table: invalid slice range")) 791 } 792 } 793 return bi 794 } 795 796 func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator { 797 b, rel, err := r.readBlockCached(dataBH, verifyChecksum, fillCache) 798 if err != nil { 799 return iterator.NewEmptyIterator(err) 800 } 801 return r.newBlockIter(b, rel, slice, false) 802 } 803 804 func (r *Reader) getDataIterErr(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator { 805 r.mu.RLock() 806 defer r.mu.RUnlock() 807 808 if r.err != nil { 809 return iterator.NewEmptyIterator(r.err) 810 } 811 812 return r.getDataIter(dataBH, slice, verifyChecksum, fillCache) 813 } 814 815 // NewIterator creates an iterator from the table. 816 // 817 // Slice allows slicing the iterator to only contains keys in the given 818 // range. A nil Range.Start is treated as a key before all keys in the 819 // table. And a nil Range.Limit is treated as a key after all keys in 820 // the table. 821 // 822 // WARNING: Any slice returned by interator (e.g. slice returned by calling 823 // Iterator.Key() or Iterator.Key() methods), its content should not be modified 824 // unless noted otherwise. 825 // 826 // The returned iterator is not safe for concurrent use and should be released 827 // after use. 828 // 829 // Also read Iterator documentation of the leveldb/iterator package. 830 func (r *Reader) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { 831 r.mu.RLock() 832 defer r.mu.RUnlock() 833 834 if r.err != nil { 835 return iterator.NewEmptyIterator(r.err) 836 } 837 838 fillCache := !ro.GetDontFillCache() 839 indexBlock, rel, err := r.getIndexBlock(fillCache) 840 if err != nil { 841 return iterator.NewEmptyIterator(err) 842 } 843 index := &indexIter{ 844 blockIter: r.newBlockIter(indexBlock, rel, slice, true), 845 tr: r, 846 slice: slice, 847 fillCache: !ro.GetDontFillCache(), 848 } 849 return iterator.NewIndexedIterator(index, opt.GetStrict(r.o, ro, opt.StrictReader)) 850 } 851 852 func (r *Reader) find(key []byte, filtered bool, ro *opt.ReadOptions, noValue bool) (rkey, value []byte, err error) { 853 r.mu.RLock() 854 defer r.mu.RUnlock() 855 856 if r.err != nil { 857 err = r.err 858 return 859 } 860 861 indexBlock, rel, err := r.getIndexBlock(true) 862 if err != nil { 863 return 864 } 865 defer rel.Release() 866 867 index := r.newBlockIter(indexBlock, nil, nil, true) 868 defer index.Release() 869 870 if !index.Seek(key) { 871 if err = index.Error(); err == nil { 872 err = ErrNotFound 873 } 874 return 875 } 876 877 dataBH, n := decodeBlockHandle(index.Value()) 878 if n == 0 { 879 r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle") 880 return nil, nil, r.err 881 } 882 883 // The filter should only used for exact match. 884 if filtered && r.filter != nil { 885 filterBlock, frel, ferr := r.getFilterBlock(true) 886 if ferr == nil { 887 if !filterBlock.contains(r.filter, dataBH.offset, key) { 888 frel.Release() 889 return nil, nil, ErrNotFound 890 } 891 frel.Release() 892 } else if !errors.IsCorrupted(ferr) { 893 return nil, nil, ferr 894 } 895 } 896 897 data := r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache()) 898 if !data.Seek(key) { 899 data.Release() 900 if err = data.Error(); err != nil { 901 return 902 } 903 904 // The nearest greater-than key is the first key of the next block. 905 if !index.Next() { 906 if err = index.Error(); err == nil { 907 err = ErrNotFound 908 } 909 return 910 } 911 912 dataBH, n = decodeBlockHandle(index.Value()) 913 if n == 0 { 914 r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle") 915 return nil, nil, r.err 916 } 917 918 data = r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache()) 919 if !data.Next() { 920 data.Release() 921 if err = data.Error(); err == nil { 922 err = ErrNotFound 923 } 924 return 925 } 926 } 927 928 // Key doesn't use block buffer, no need to copy the buffer. 929 rkey = data.Key() 930 if !noValue { 931 if r.bpool == nil { 932 value = data.Value() 933 } else { 934 // Value does use block buffer, and since the buffer will be 935 // recycled, it need to be copied. 936 value = append([]byte{}, data.Value()...) 937 } 938 } 939 data.Release() 940 return 941 } 942 943 // Find finds key/value pair whose key is greater than or equal to the 944 // given key. It returns ErrNotFound if the table doesn't contain 945 // such pair. 946 // If filtered is true then the nearest 'block' will be checked against 947 // 'filter data' (if present) and will immediately return ErrNotFound if 948 // 'filter data' indicates that such pair doesn't exist. 949 // 950 // The caller may modify the contents of the returned slice as it is its 951 // own copy. 952 // It is safe to modify the contents of the argument after Find returns. 953 func (r *Reader) Find(key []byte, filtered bool, ro *opt.ReadOptions) (rkey, value []byte, err error) { 954 return r.find(key, filtered, ro, false) 955 } 956 957 // FindKey finds key that is greater than or equal to the given key. 958 // It returns ErrNotFound if the table doesn't contain such key. 959 // If filtered is true then the nearest 'block' will be checked against 960 // 'filter data' (if present) and will immediately return ErrNotFound if 961 // 'filter data' indicates that such key doesn't exist. 962 // 963 // The caller may modify the contents of the returned slice as it is its 964 // own copy. 965 // It is safe to modify the contents of the argument after Find returns. 966 func (r *Reader) FindKey(key []byte, filtered bool, ro *opt.ReadOptions) (rkey []byte, err error) { 967 rkey, _, err = r.find(key, filtered, ro, true) 968 return 969 } 970 971 // Get gets the value for the given key. It returns errors.ErrNotFound 972 // if the table does not contain the key. 973 // 974 // The caller may modify the contents of the returned slice as it is its 975 // own copy. 976 // It is safe to modify the contents of the argument after Find returns. 977 func (r *Reader) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) { 978 r.mu.RLock() 979 defer r.mu.RUnlock() 980 981 if r.err != nil { 982 err = r.err 983 return 984 } 985 986 rkey, value, err := r.find(key, false, ro, false) 987 if err == nil && r.cmp.Compare(rkey, key) != 0 { 988 value = nil 989 err = ErrNotFound 990 } 991 return 992 } 993 994 // OffsetOf returns approximate offset for the given key. 995 // 996 // It is safe to modify the contents of the argument after Get returns. 997 func (r *Reader) OffsetOf(key []byte) (offset int64, err error) { 998 r.mu.RLock() 999 defer r.mu.RUnlock() 1000 1001 if r.err != nil { 1002 err = r.err 1003 return 1004 } 1005 1006 indexBlock, rel, err := r.readBlockCached(r.indexBH, true, true) 1007 if err != nil { 1008 return 1009 } 1010 defer rel.Release() 1011 1012 index := r.newBlockIter(indexBlock, nil, nil, true) 1013 defer index.Release() 1014 if index.Seek(key) { 1015 dataBH, n := decodeBlockHandle(index.Value()) 1016 if n == 0 { 1017 r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle") 1018 return 1019 } 1020 offset = int64(dataBH.offset) 1021 return 1022 } 1023 err = index.Error() 1024 if err == nil { 1025 offset = r.dataEnd 1026 } 1027 return 1028 } 1029 1030 // Release implements util.Releaser. 1031 // It also close the file if it is an io.Closer. 1032 func (r *Reader) Release() { 1033 r.mu.Lock() 1034 defer r.mu.Unlock() 1035 1036 if closer, ok := r.reader.(io.Closer); ok { 1037 closer.Close() 1038 } 1039 if r.indexBlock != nil { 1040 r.indexBlock.Release() 1041 r.indexBlock = nil 1042 } 1043 if r.filterBlock != nil { 1044 r.filterBlock.Release() 1045 r.filterBlock = nil 1046 } 1047 r.reader = nil 1048 r.cache = nil 1049 r.bpool = nil 1050 r.err = ErrReaderReleased 1051 } 1052 1053 // NewReader creates a new initialized table reader for the file. 1054 // The fi, cache and bpool is optional and can be nil. 1055 // 1056 // The returned table reader instance is safe for concurrent use. 1057 func NewReader(f io.ReaderAt, size int64, fd storage.FileDesc, cache *cache.NamespaceGetter, bpool *util.BufferPool, o *opt.Options) (*Reader, error) { 1058 if f == nil { 1059 return nil, errors.New("leveldb/table: nil file") 1060 } 1061 1062 r := &Reader{ 1063 fd: fd, 1064 reader: f, 1065 cache: cache, 1066 bpool: bpool, 1067 o: o, 1068 cmp: o.GetComparer(), 1069 verifyChecksum: o.GetStrict(opt.StrictBlockChecksum), 1070 } 1071 1072 if size < footerLen { 1073 r.err = r.newErrCorrupted(0, size, "table", "too small") 1074 return r, nil 1075 } 1076 1077 footerPos := size - footerLen 1078 var footer [footerLen]byte 1079 if _, err := r.reader.ReadAt(footer[:], footerPos); err != nil && err != io.EOF { 1080 return nil, err 1081 } 1082 if string(footer[footerLen-len(magic):footerLen]) != magic { 1083 r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad magic number") 1084 return r, nil 1085 } 1086 1087 var n int 1088 // Decode the metaindex block handle. 1089 r.metaBH, n = decodeBlockHandle(footer[:]) 1090 if n == 0 { 1091 r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad metaindex block handle") 1092 return r, nil 1093 } 1094 1095 // Decode the index block handle. 1096 r.indexBH, n = decodeBlockHandle(footer[n:]) 1097 if n == 0 { 1098 r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad index block handle") 1099 return r, nil 1100 } 1101 1102 // Read metaindex block. 1103 metaBlock, err := r.readBlock(r.metaBH, true) 1104 if err != nil { 1105 if errors.IsCorrupted(err) { 1106 r.err = err 1107 return r, nil 1108 } 1109 return nil, err 1110 } 1111 1112 // Set data end. 1113 r.dataEnd = int64(r.metaBH.offset) 1114 1115 // Read metaindex. 1116 metaIter := r.newBlockIter(metaBlock, nil, nil, true) 1117 for metaIter.Next() { 1118 key := string(metaIter.Key()) 1119 if !strings.HasPrefix(key, "filter.") { 1120 continue 1121 } 1122 fn := key[7:] 1123 if f0 := o.GetFilter(); f0 != nil && f0.Name() == fn { 1124 r.filter = f0 1125 } else { 1126 for _, f0 := range o.GetAltFilters() { 1127 if f0.Name() == fn { 1128 r.filter = f0 1129 break 1130 } 1131 } 1132 } 1133 if r.filter != nil { 1134 filterBH, n := decodeBlockHandle(metaIter.Value()) 1135 if n == 0 { 1136 continue 1137 } 1138 r.filterBH = filterBH 1139 // Update data end. 1140 r.dataEnd = int64(filterBH.offset) 1141 break 1142 } 1143 } 1144 metaIter.Release() 1145 metaBlock.Release() 1146 1147 // Cache index and filter block locally, since we don't have global cache. 1148 if cache == nil { 1149 r.indexBlock, err = r.readBlock(r.indexBH, true) 1150 if err != nil { 1151 if errors.IsCorrupted(err) { 1152 r.err = err 1153 return r, nil 1154 } 1155 return nil, err 1156 } 1157 if r.filter != nil { 1158 r.filterBlock, err = r.readFilterBlock(r.filterBH) 1159 if err != nil { 1160 if !errors.IsCorrupted(err) { 1161 return nil, err 1162 } 1163 1164 // Don't use filter then. 1165 r.filter = nil 1166 } 1167 } 1168 } 1169 1170 return r, nil 1171 }