github.com/df-mc/goleveldb@v1.1.9/leveldb/db.go (about) 1 // Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> 2 // All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE file. 6 7 package leveldb 8 9 import ( 10 "container/list" 11 "fmt" 12 "io" 13 "os" 14 "runtime" 15 "strings" 16 "sync" 17 "sync/atomic" 18 "time" 19 20 "github.com/df-mc/goleveldb/leveldb/errors" 21 "github.com/df-mc/goleveldb/leveldb/iterator" 22 "github.com/df-mc/goleveldb/leveldb/journal" 23 "github.com/df-mc/goleveldb/leveldb/memdb" 24 "github.com/df-mc/goleveldb/leveldb/opt" 25 "github.com/df-mc/goleveldb/leveldb/storage" 26 "github.com/df-mc/goleveldb/leveldb/table" 27 "github.com/df-mc/goleveldb/leveldb/util" 28 ) 29 30 // DB is a LevelDB database. 31 type DB struct { 32 // Need 64-bit alignment. 33 seq uint64 34 35 // Stats. Need 64-bit alignment. 36 cWriteDelay int64 // The cumulative duration of write delays 37 cWriteDelayN int32 // The cumulative number of write delays 38 inWritePaused int32 // The indicator whether write operation is paused by compaction 39 aliveSnaps, aliveIters int32 40 41 // Compaction statistic 42 memComp uint32 // The cumulative number of memory compaction 43 level0Comp uint32 // The cumulative number of level0 compaction 44 nonLevel0Comp uint32 // The cumulative number of non-level0 compaction 45 seekComp uint32 // The cumulative number of seek compaction 46 47 // Session. 48 s *session 49 50 // MemDB. 51 memMu sync.RWMutex 52 memPool chan *memdb.DB 53 mem, frozenMem *memDB 54 journal *journal.Writer 55 journalWriter storage.Writer 56 journalFd storage.FileDesc 57 frozenJournalFd storage.FileDesc 58 frozenSeq uint64 59 60 // Snapshot. 61 snapsMu sync.Mutex 62 snapsList *list.List 63 64 // Write. 65 batchPool sync.Pool 66 writeMergeC chan writeMerge 67 writeMergedC chan bool 68 writeLockC chan struct{} 69 writeAckC chan error 70 writeDelay time.Duration 71 writeDelayN int 72 tr *Transaction 73 74 // Compaction. 75 compCommitLk sync.Mutex 76 tcompCmdC chan cCmd 77 tcompPauseC chan chan<- struct{} 78 mcompCmdC chan cCmd 79 compErrC chan error 80 compPerErrC chan error 81 compErrSetC chan error 82 compWriteLocking bool 83 compStats cStats 84 memdbMaxLevel int // For testing. 85 86 // Close. 87 closeW sync.WaitGroup 88 closeC chan struct{} 89 closed uint32 90 closer io.Closer 91 } 92 93 func openDB(s *session) (*DB, error) { 94 s.log("db@open opening") 95 start := time.Now() 96 db := &DB{ 97 s: s, 98 // Initial sequence 99 seq: s.stSeqNum, 100 // MemDB 101 memPool: make(chan *memdb.DB, 1), 102 // Snapshot 103 snapsList: list.New(), 104 // Write 105 batchPool: sync.Pool{New: newBatch}, 106 writeMergeC: make(chan writeMerge), 107 writeMergedC: make(chan bool), 108 writeLockC: make(chan struct{}, 1), 109 writeAckC: make(chan error), 110 // Compaction 111 tcompCmdC: make(chan cCmd), 112 tcompPauseC: make(chan chan<- struct{}), 113 mcompCmdC: make(chan cCmd), 114 compErrC: make(chan error), 115 compPerErrC: make(chan error), 116 compErrSetC: make(chan error), 117 // Close 118 closeC: make(chan struct{}), 119 } 120 121 // Read-only mode. 122 readOnly := s.o.GetReadOnly() 123 124 if readOnly { 125 // Recover journals (read-only mode). 126 if err := db.recoverJournalRO(); err != nil { 127 return nil, err 128 } 129 } else { 130 // Recover journals. 131 if err := db.recoverJournal(); err != nil { 132 return nil, err 133 } 134 135 // Remove any obsolete files. 136 if err := db.checkAndCleanFiles(); err != nil { 137 // Close journal. 138 if db.journal != nil { 139 db.journal.Close() 140 db.journalWriter.Close() 141 } 142 return nil, err 143 } 144 145 } 146 147 // Doesn't need to be included in the wait group. 148 go db.compactionError() 149 go db.mpoolDrain() 150 151 if readOnly { 152 db.SetReadOnly() 153 } else { 154 db.closeW.Add(2) 155 go db.tCompaction() 156 go db.mCompaction() 157 // go db.jWriter() 158 } 159 160 s.logf("db@open done T·%v", time.Since(start)) 161 162 runtime.SetFinalizer(db, (*DB).Close) 163 return db, nil 164 } 165 166 // Open opens or creates a DB for the given storage. 167 // The DB will be created if not exist, unless ErrorIfMissing is true. 168 // Also, if ErrorIfExist is true and the DB exist Open will returns 169 // os.ErrExist error. 170 // 171 // Open will return an error with type of ErrCorrupted if corruption 172 // detected in the DB. Use errors.IsCorrupted to test whether an error is 173 // due to corruption. Corrupted DB can be recovered with Recover function. 174 // 175 // The returned DB instance is safe for concurrent use. 176 // The DB must be closed after use, by calling Close method. 177 func Open(stor storage.Storage, o *opt.Options) (db *DB, err error) { 178 s, err := newSession(stor, o) 179 if err != nil { 180 return 181 } 182 defer func() { 183 if err != nil { 184 s.close() 185 s.release() 186 } 187 }() 188 189 err = s.recover() 190 if err != nil { 191 if !os.IsNotExist(err) || s.o.GetErrorIfMissing() || s.o.GetReadOnly() { 192 return 193 } 194 err = s.create() 195 if err != nil { 196 return 197 } 198 } else if s.o.GetErrorIfExist() { 199 err = os.ErrExist 200 return 201 } 202 203 return openDB(s) 204 } 205 206 // OpenFile opens or creates a DB for the given path. 207 // The DB will be created if not exist, unless ErrorIfMissing is true. 208 // Also, if ErrorIfExist is true and the DB exist OpenFile will returns 209 // os.ErrExist error. 210 // 211 // OpenFile uses standard file-system backed storage implementation as 212 // described in the leveldb/storage package. 213 // 214 // OpenFile will return an error with type of ErrCorrupted if corruption 215 // detected in the DB. Use errors.IsCorrupted to test whether an error is 216 // due to corruption. Corrupted DB can be recovered with Recover function. 217 // 218 // The returned DB instance is safe for concurrent use. 219 // The DB must be closed after use, by calling Close method. 220 func OpenFile(path string, o *opt.Options) (db *DB, err error) { 221 stor, err := storage.OpenFile(path, o.GetReadOnly()) 222 if err != nil { 223 return 224 } 225 db, err = Open(stor, o) 226 if err != nil { 227 stor.Close() 228 } else { 229 db.closer = stor 230 } 231 return 232 } 233 234 // Recover recovers and opens a DB with missing or corrupted manifest files 235 // for the given storage. It will ignore any manifest files, valid or not. 236 // The DB must already exist or it will returns an error. 237 // Also, Recover will ignore ErrorIfMissing and ErrorIfExist options. 238 // 239 // The returned DB instance is safe for concurrent use. 240 // The DB must be closed after use, by calling Close method. 241 func Recover(stor storage.Storage, o *opt.Options) (db *DB, err error) { 242 s, err := newSession(stor, o) 243 if err != nil { 244 return 245 } 246 defer func() { 247 if err != nil { 248 s.close() 249 s.release() 250 } 251 }() 252 253 err = recoverTable(s, o) 254 if err != nil { 255 return 256 } 257 return openDB(s) 258 } 259 260 // RecoverFile recovers and opens a DB with missing or corrupted manifest files 261 // for the given path. It will ignore any manifest files, valid or not. 262 // The DB must already exist or it will returns an error. 263 // Also, Recover will ignore ErrorIfMissing and ErrorIfExist options. 264 // 265 // RecoverFile uses standard file-system backed storage implementation as described 266 // in the leveldb/storage package. 267 // 268 // The returned DB instance is safe for concurrent use. 269 // The DB must be closed after use, by calling Close method. 270 func RecoverFile(path string, o *opt.Options) (db *DB, err error) { 271 stor, err := storage.OpenFile(path, false) 272 if err != nil { 273 return 274 } 275 db, err = Recover(stor, o) 276 if err != nil { 277 stor.Close() 278 } else { 279 db.closer = stor 280 } 281 return 282 } 283 284 func recoverTable(s *session, o *opt.Options) error { 285 o = dupOptions(o) 286 // Mask StrictReader, lets StrictRecovery doing its job. 287 o.Strict &= ^opt.StrictReader 288 289 // Get all tables and sort it by file number. 290 fds, err := s.stor.List(storage.TypeTable) 291 if err != nil { 292 return err 293 } 294 sortFds(fds) 295 296 var ( 297 maxSeq uint64 298 recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int 299 300 // We will drop corrupted table. 301 strict = o.GetStrict(opt.StrictRecovery) 302 noSync = o.GetNoSync() 303 304 rec = &sessionRecord{} 305 bpool = util.NewBufferPool(o.GetBlockSize() + 5) 306 ) 307 buildTable := func(iter iterator.Iterator) (tmpFd storage.FileDesc, size int64, err error) { 308 tmpFd = s.newTemp() 309 writer, err := s.stor.Create(tmpFd) 310 if err != nil { 311 return 312 } 313 defer func() { 314 writer.Close() 315 if err != nil { 316 s.stor.Remove(tmpFd) 317 tmpFd = storage.FileDesc{} 318 } 319 }() 320 321 // Copy entries. 322 tw := table.NewWriter(writer, o) 323 for iter.Next() { 324 key := iter.Key() 325 if validInternalKey(key) { 326 err = tw.Append(key, iter.Value()) 327 if err != nil { 328 return 329 } 330 } 331 } 332 err = iter.Error() 333 if err != nil && !errors.IsCorrupted(err) { 334 return 335 } 336 err = tw.Close() 337 if err != nil { 338 return 339 } 340 if !noSync { 341 err = writer.Sync() 342 if err != nil { 343 return 344 } 345 } 346 size = int64(tw.BytesLen()) 347 return 348 } 349 recoverTable := func(fd storage.FileDesc) error { 350 s.logf("table@recovery recovering @%d", fd.Num) 351 reader, err := s.stor.Open(fd) 352 if err != nil { 353 return err 354 } 355 var closed bool 356 defer func() { 357 if !closed { 358 reader.Close() 359 } 360 }() 361 362 // Get file size. 363 size, err := reader.Seek(0, 2) 364 if err != nil { 365 return err 366 } 367 368 var ( 369 tSeq uint64 370 tgoodKey, tcorruptedKey, tcorruptedBlock int 371 imin, imax []byte 372 ) 373 tr, err := table.NewReader(reader, size, fd, nil, bpool, o) 374 if err != nil { 375 return err 376 } 377 iter := tr.NewIterator(nil, nil) 378 if itererr, ok := iter.(iterator.ErrorCallbackSetter); ok { 379 itererr.SetErrorCallback(func(err error) { 380 if errors.IsCorrupted(err) { 381 s.logf("table@recovery block corruption @%d %q", fd.Num, err) 382 tcorruptedBlock++ 383 } 384 }) 385 } 386 387 // Scan the table. 388 for iter.Next() { 389 key := iter.Key() 390 _, seq, _, kerr := parseInternalKey(key) 391 if kerr != nil { 392 tcorruptedKey++ 393 continue 394 } 395 tgoodKey++ 396 if seq > tSeq { 397 tSeq = seq 398 } 399 if imin == nil { 400 imin = append([]byte{}, key...) 401 } 402 imax = append(imax[:0], key...) 403 } 404 if err := iter.Error(); err != nil && !errors.IsCorrupted(err) { 405 iter.Release() 406 return err 407 } 408 iter.Release() 409 410 goodKey += tgoodKey 411 corruptedKey += tcorruptedKey 412 corruptedBlock += tcorruptedBlock 413 414 if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) { 415 droppedTable++ 416 s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) 417 return nil 418 } 419 420 if tgoodKey > 0 { 421 if tcorruptedKey > 0 || tcorruptedBlock > 0 { 422 // Rebuild the table. 423 s.logf("table@recovery rebuilding @%d", fd.Num) 424 iter := tr.NewIterator(nil, nil) 425 tmpFd, newSize, err := buildTable(iter) 426 iter.Release() 427 if err != nil { 428 return err 429 } 430 closed = true 431 reader.Close() 432 if err := s.stor.Rename(tmpFd, fd); err != nil { 433 return err 434 } 435 size = newSize 436 } 437 if tSeq > maxSeq { 438 maxSeq = tSeq 439 } 440 recoveredKey += tgoodKey 441 // Add table to level 0. 442 rec.addTable(0, fd.Num, size, imin, imax) 443 s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) 444 } else { 445 droppedTable++ 446 s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", fd.Num, tcorruptedKey, tcorruptedBlock, size) 447 } 448 449 return nil 450 } 451 452 // Recover all tables. 453 if len(fds) > 0 { 454 s.logf("table@recovery F·%d", len(fds)) 455 456 // Mark file number as used. 457 s.markFileNum(fds[len(fds)-1].Num) 458 459 for _, fd := range fds { 460 if err := recoverTable(fd); err != nil { 461 return err 462 } 463 } 464 465 s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(fds), recoveredKey, goodKey, corruptedKey, maxSeq) 466 } 467 468 // Set sequence number. 469 rec.setSeqNum(maxSeq) 470 471 // Create new manifest. 472 if err := s.create(); err != nil { 473 return err 474 } 475 476 // Commit. 477 return s.commit(rec, false) 478 } 479 480 func (db *DB) recoverJournal() error { 481 // Get all journals and sort it by file number. 482 rawFds, err := db.s.stor.List(storage.TypeJournal) 483 if err != nil { 484 return err 485 } 486 sortFds(rawFds) 487 488 // Journals that will be recovered. 489 var fds []storage.FileDesc 490 for _, fd := range rawFds { 491 if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum { 492 fds = append(fds, fd) 493 } 494 } 495 496 var ( 497 ofd storage.FileDesc // Obsolete file. 498 rec = &sessionRecord{} 499 ) 500 501 // Recover journals. 502 if len(fds) > 0 { 503 db.logf("journal@recovery F·%d", len(fds)) 504 505 // Mark file number as used. 506 db.s.markFileNum(fds[len(fds)-1].Num) 507 508 var ( 509 // Options. 510 strict = db.s.o.GetStrict(opt.StrictJournal) 511 checksum = db.s.o.GetStrict(opt.StrictJournalChecksum) 512 writeBuffer = db.s.o.GetWriteBuffer() 513 514 jr *journal.Reader 515 mdb = memdb.New(db.s.icmp, writeBuffer) 516 buf = &util.Buffer{} 517 batchSeq uint64 518 batchLen int 519 ) 520 521 for _, fd := range fds { 522 db.logf("journal@recovery recovering @%d", fd.Num) 523 524 fr, err := db.s.stor.Open(fd) 525 if err != nil { 526 return err 527 } 528 529 // Create or reset journal reader instance. 530 if jr == nil { 531 jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum) 532 } else { 533 jr.Reset(fr, dropper{db.s, fd}, strict, checksum) 534 } 535 536 // Flush memdb and remove obsolete journal file. 537 if !ofd.Zero() { 538 if mdb.Len() > 0 { 539 if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil { 540 fr.Close() 541 return err 542 } 543 } 544 545 rec.setJournalNum(fd.Num) 546 rec.setSeqNum(db.seq) 547 if err := db.s.commit(rec, false); err != nil { 548 fr.Close() 549 return err 550 } 551 rec.resetAddedTables() 552 553 db.s.stor.Remove(ofd) 554 ofd = storage.FileDesc{} 555 } 556 557 // Replay journal to memdb. 558 mdb.Reset() 559 for { 560 r, err := jr.Next() 561 if err != nil { 562 if err == io.EOF { 563 break 564 } 565 566 fr.Close() 567 return errors.SetFd(err, fd) 568 } 569 570 buf.Reset() 571 if _, err := buf.ReadFrom(r); err != nil { 572 if err == io.ErrUnexpectedEOF { 573 // This is error returned due to corruption, with strict == false. 574 continue 575 } 576 577 fr.Close() 578 return errors.SetFd(err, fd) 579 } 580 batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb) 581 if err != nil { 582 if !strict && errors.IsCorrupted(err) { 583 db.s.logf("journal error: %v (skipped)", err) 584 // We won't apply sequence number as it might be corrupted. 585 continue 586 } 587 588 fr.Close() 589 return errors.SetFd(err, fd) 590 } 591 592 // Save sequence number. 593 db.seq = batchSeq + uint64(batchLen) 594 595 // Flush it if large enough. 596 if mdb.Size() >= writeBuffer { 597 if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil { 598 fr.Close() 599 return err 600 } 601 602 mdb.Reset() 603 } 604 } 605 606 fr.Close() 607 ofd = fd 608 } 609 610 // Flush the last memdb. 611 if mdb.Len() > 0 { 612 if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil { 613 return err 614 } 615 } 616 } 617 618 // Create a new journal. 619 if _, err := db.newMem(0); err != nil { 620 return err 621 } 622 623 // Commit. 624 rec.setJournalNum(db.journalFd.Num) 625 rec.setSeqNum(db.seq) 626 if err := db.s.commit(rec, false); err != nil { 627 // Close journal on error. 628 if db.journal != nil { 629 db.journal.Close() 630 db.journalWriter.Close() 631 } 632 return err 633 } 634 635 // Remove the last obsolete journal file. 636 if !ofd.Zero() { 637 db.s.stor.Remove(ofd) 638 } 639 640 return nil 641 } 642 643 func (db *DB) recoverJournalRO() error { 644 // Get all journals and sort it by file number. 645 rawFds, err := db.s.stor.List(storage.TypeJournal) 646 if err != nil { 647 return err 648 } 649 sortFds(rawFds) 650 651 // Journals that will be recovered. 652 var fds []storage.FileDesc 653 for _, fd := range rawFds { 654 if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum { 655 fds = append(fds, fd) 656 } 657 } 658 659 var ( 660 // Options. 661 strict = db.s.o.GetStrict(opt.StrictJournal) 662 checksum = db.s.o.GetStrict(opt.StrictJournalChecksum) 663 writeBuffer = db.s.o.GetWriteBuffer() 664 665 mdb = memdb.New(db.s.icmp, writeBuffer) 666 ) 667 668 // Recover journals. 669 if len(fds) > 0 { 670 db.logf("journal@recovery RO·Mode F·%d", len(fds)) 671 672 var ( 673 jr *journal.Reader 674 buf = &util.Buffer{} 675 batchSeq uint64 676 batchLen int 677 ) 678 679 for _, fd := range fds { 680 db.logf("journal@recovery recovering @%d", fd.Num) 681 682 fr, err := db.s.stor.Open(fd) 683 if err != nil { 684 return err 685 } 686 687 // Create or reset journal reader instance. 688 if jr == nil { 689 jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum) 690 } else { 691 jr.Reset(fr, dropper{db.s, fd}, strict, checksum) 692 } 693 694 // Replay journal to memdb. 695 for { 696 r, err := jr.Next() 697 if err != nil { 698 if err == io.EOF { 699 break 700 } 701 702 fr.Close() 703 return errors.SetFd(err, fd) 704 } 705 706 buf.Reset() 707 if _, err := buf.ReadFrom(r); err != nil { 708 if err == io.ErrUnexpectedEOF { 709 // This is error returned due to corruption, with strict == false. 710 continue 711 } 712 713 fr.Close() 714 return errors.SetFd(err, fd) 715 } 716 batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb) 717 if err != nil { 718 if !strict && errors.IsCorrupted(err) { 719 db.s.logf("journal error: %v (skipped)", err) 720 // We won't apply sequence number as it might be corrupted. 721 continue 722 } 723 724 fr.Close() 725 return errors.SetFd(err, fd) 726 } 727 728 // Save sequence number. 729 db.seq = batchSeq + uint64(batchLen) 730 } 731 732 fr.Close() 733 } 734 } 735 736 // Set memDB. 737 db.mem = &memDB{db: db, DB: mdb, ref: 1} 738 739 return nil 740 } 741 742 func memGet(mdb *memdb.DB, ikey internalKey, icmp *iComparer) (ok bool, mv []byte, err error) { 743 mk, mv, err := mdb.Find(ikey) 744 if err == nil { 745 ukey, _, kt, kerr := parseInternalKey(mk) 746 if kerr != nil { 747 // Shouldn't have had happen. 748 panic(kerr) 749 } 750 if icmp.uCompare(ukey, ikey.ukey()) == 0 { 751 if kt == keyTypeDel { 752 return true, nil, ErrNotFound 753 } 754 return true, mv, nil 755 756 } 757 } else if err != ErrNotFound { 758 return true, nil, err 759 } 760 return 761 } 762 763 func (db *DB) get(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) { 764 ikey := makeInternalKey(nil, key, seq, keyTypeSeek) 765 766 if auxm != nil { 767 if ok, mv, me := memGet(auxm, ikey, db.s.icmp); ok { 768 return append([]byte{}, mv...), me 769 } 770 } 771 772 em, fm := db.getMems() 773 for _, m := range [...]*memDB{em, fm} { 774 if m == nil { 775 continue 776 } 777 defer m.decref() 778 779 if ok, mv, me := memGet(m.DB, ikey, db.s.icmp); ok { 780 return append([]byte{}, mv...), me 781 } 782 } 783 784 v := db.s.version() 785 value, cSched, err := v.get(auxt, ikey, ro, false) 786 v.release() 787 if cSched { 788 // Trigger table compaction. 789 db.compTrigger(db.tcompCmdC) 790 } 791 return 792 } 793 794 func nilIfNotFound(err error) error { 795 if err == ErrNotFound { 796 return nil 797 } 798 return err 799 } 800 801 func (db *DB) has(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) { 802 ikey := makeInternalKey(nil, key, seq, keyTypeSeek) 803 804 if auxm != nil { 805 if ok, _, me := memGet(auxm, ikey, db.s.icmp); ok { 806 return me == nil, nilIfNotFound(me) 807 } 808 } 809 810 em, fm := db.getMems() 811 for _, m := range [...]*memDB{em, fm} { 812 if m == nil { 813 continue 814 } 815 defer m.decref() 816 817 if ok, _, me := memGet(m.DB, ikey, db.s.icmp); ok { 818 return me == nil, nilIfNotFound(me) 819 } 820 } 821 822 v := db.s.version() 823 _, cSched, err := v.get(auxt, ikey, ro, true) 824 v.release() 825 if cSched { 826 // Trigger table compaction. 827 db.compTrigger(db.tcompCmdC) 828 } 829 if err == nil { 830 ret = true 831 } else if err == ErrNotFound { 832 err = nil 833 } 834 return 835 } 836 837 // Get gets the value for the given key. It returns ErrNotFound if the 838 // DB does not contains the key. 839 // 840 // The returned slice is its own copy, it is safe to modify the contents 841 // of the returned slice. 842 // It is safe to modify the contents of the argument after Get returns. 843 func (db *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) { 844 err = db.ok() 845 if err != nil { 846 return 847 } 848 849 se := db.acquireSnapshot() 850 defer db.releaseSnapshot(se) 851 return db.get(nil, nil, key, se.seq, ro) 852 } 853 854 // Has returns true if the DB does contains the given key. 855 // 856 // It is safe to modify the contents of the argument after Has returns. 857 func (db *DB) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) { 858 err = db.ok() 859 if err != nil { 860 return 861 } 862 863 se := db.acquireSnapshot() 864 defer db.releaseSnapshot(se) 865 return db.has(nil, nil, key, se.seq, ro) 866 } 867 868 // NewIterator returns an iterator for the latest snapshot of the 869 // underlying DB. 870 // The returned iterator is not safe for concurrent use, but it is safe to use 871 // multiple iterators concurrently, with each in a dedicated goroutine. 872 // It is also safe to use an iterator concurrently with modifying its 873 // underlying DB. The resultant key/value pairs are guaranteed to be 874 // consistent. 875 // 876 // Slice allows slicing the iterator to only contains keys in the given 877 // range. A nil Range.Start is treated as a key before all keys in the 878 // DB. And a nil Range.Limit is treated as a key after all keys in 879 // the DB. 880 // 881 // WARNING: Any slice returned by interator (e.g. slice returned by calling 882 // Iterator.Key() or Iterator.Key() methods), its content should not be modified 883 // unless noted otherwise. 884 // 885 // The iterator must be released after use, by calling Release method. 886 // 887 // Also read Iterator documentation of the leveldb/iterator package. 888 func (db *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { 889 if err := db.ok(); err != nil { 890 return iterator.NewEmptyIterator(err) 891 } 892 893 se := db.acquireSnapshot() 894 defer db.releaseSnapshot(se) 895 // Iterator holds 'version' lock, 'version' is immutable so snapshot 896 // can be released after iterator created. 897 return db.newIterator(nil, nil, se.seq, slice, ro) 898 } 899 900 // GetSnapshot returns a latest snapshot of the underlying DB. A snapshot 901 // is a frozen snapshot of a DB state at a particular point in time. The 902 // content of snapshot are guaranteed to be consistent. 903 // 904 // The snapshot must be released after use, by calling Release method. 905 func (db *DB) GetSnapshot() (*Snapshot, error) { 906 if err := db.ok(); err != nil { 907 return nil, err 908 } 909 910 return db.newSnapshot(), nil 911 } 912 913 // GetProperty returns value of the given property name. 914 // 915 // Property names: 916 // leveldb.num-files-at-level{n} 917 // Returns the number of files at level 'n'. 918 // leveldb.stats 919 // Returns statistics of the underlying DB. 920 // leveldb.iostats 921 // Returns statistics of effective disk read and write. 922 // leveldb.writedelay 923 // Returns cumulative write delay caused by compaction. 924 // leveldb.sstables 925 // Returns sstables list for each level. 926 // leveldb.blockpool 927 // Returns block pool stats. 928 // leveldb.cachedblock 929 // Returns size of cached block. 930 // leveldb.openedtables 931 // Returns number of opened tables. 932 // leveldb.alivesnaps 933 // Returns number of alive snapshots. 934 // leveldb.aliveiters 935 // Returns number of alive iterators. 936 func (db *DB) GetProperty(name string) (value string, err error) { 937 err = db.ok() 938 if err != nil { 939 return 940 } 941 942 const prefix = "leveldb." 943 if !strings.HasPrefix(name, prefix) { 944 return "", ErrNotFound 945 } 946 p := name[len(prefix):] 947 948 v := db.s.version() 949 defer v.release() 950 951 numFilesPrefix := "num-files-at-level" 952 switch { 953 case strings.HasPrefix(p, numFilesPrefix): 954 var level uint 955 var rest string 956 n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest) 957 if n != 1 { 958 err = ErrNotFound 959 } else { 960 value = fmt.Sprint(v.tLen(int(level))) 961 } 962 case p == "stats": 963 value = "Compactions\n" + 964 " Level | Tables | Size(MB) | Time(sec) | Read(MB) | Write(MB)\n" + 965 "-------+------------+---------------+---------------+---------------+---------------\n" 966 var totalTables int 967 var totalSize, totalRead, totalWrite int64 968 var totalDuration time.Duration 969 for level, tables := range v.levels { 970 duration, read, write := db.compStats.getStat(level) 971 if len(tables) == 0 && duration == 0 { 972 continue 973 } 974 totalTables += len(tables) 975 totalSize += tables.size() 976 totalRead += read 977 totalWrite += write 978 totalDuration += duration 979 value += fmt.Sprintf(" %3d | %10d | %13.5f | %13.5f | %13.5f | %13.5f\n", 980 level, len(tables), float64(tables.size())/1048576.0, duration.Seconds(), 981 float64(read)/1048576.0, float64(write)/1048576.0) 982 } 983 value += "-------+------------+---------------+---------------+---------------+---------------\n" 984 value += fmt.Sprintf(" Total | %10d | %13.5f | %13.5f | %13.5f | %13.5f\n", 985 totalTables, float64(totalSize)/1048576.0, totalDuration.Seconds(), 986 float64(totalRead)/1048576.0, float64(totalWrite)/1048576.0) 987 case p == "compcount": 988 value = fmt.Sprintf("MemComp:%d Level0Comp:%d NonLevel0Comp:%d SeekComp:%d", atomic.LoadUint32(&db.memComp), atomic.LoadUint32(&db.level0Comp), atomic.LoadUint32(&db.nonLevel0Comp), atomic.LoadUint32(&db.seekComp)) 989 case p == "iostats": 990 value = fmt.Sprintf("Read(MB):%.5f Write(MB):%.5f", 991 float64(db.s.stor.reads())/1048576.0, 992 float64(db.s.stor.writes())/1048576.0) 993 case p == "writedelay": 994 writeDelayN, writeDelay := atomic.LoadInt32(&db.cWriteDelayN), time.Duration(atomic.LoadInt64(&db.cWriteDelay)) 995 paused := atomic.LoadInt32(&db.inWritePaused) == 1 996 value = fmt.Sprintf("DelayN:%d Delay:%s Paused:%t", writeDelayN, writeDelay, paused) 997 case p == "sstables": 998 for level, tables := range v.levels { 999 value += fmt.Sprintf("--- level %d ---\n", level) 1000 for _, t := range tables { 1001 value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.fd.Num, t.size, t.imin, t.imax) 1002 } 1003 } 1004 case p == "blockpool": 1005 value = fmt.Sprintf("%v", db.s.tops.bpool) 1006 case p == "cachedblock": 1007 if db.s.tops.bcache != nil { 1008 value = fmt.Sprintf("%d", db.s.tops.bcache.Size()) 1009 } else { 1010 value = "<nil>" 1011 } 1012 case p == "openedtables": 1013 value = fmt.Sprintf("%d", db.s.tops.cache.Size()) 1014 case p == "alivesnaps": 1015 value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveSnaps)) 1016 case p == "aliveiters": 1017 value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveIters)) 1018 default: 1019 err = ErrNotFound 1020 } 1021 1022 return 1023 } 1024 1025 // DBStats is database statistics. 1026 type DBStats struct { 1027 WriteDelayCount int32 1028 WriteDelayDuration time.Duration 1029 WritePaused bool 1030 1031 AliveSnapshots int32 1032 AliveIterators int32 1033 1034 IOWrite uint64 1035 IORead uint64 1036 1037 BlockCacheSize int 1038 OpenedTablesCount int 1039 1040 LevelSizes Sizes 1041 LevelTablesCounts []int 1042 LevelRead Sizes 1043 LevelWrite Sizes 1044 LevelDurations []time.Duration 1045 1046 MemComp uint32 1047 Level0Comp uint32 1048 NonLevel0Comp uint32 1049 SeekComp uint32 1050 } 1051 1052 // Stats populates s with database statistics. 1053 func (db *DB) Stats(s *DBStats) error { 1054 err := db.ok() 1055 if err != nil { 1056 return err 1057 } 1058 1059 s.IORead = db.s.stor.reads() 1060 s.IOWrite = db.s.stor.writes() 1061 s.WriteDelayCount = atomic.LoadInt32(&db.cWriteDelayN) 1062 s.WriteDelayDuration = time.Duration(atomic.LoadInt64(&db.cWriteDelay)) 1063 s.WritePaused = atomic.LoadInt32(&db.inWritePaused) == 1 1064 1065 s.OpenedTablesCount = db.s.tops.cache.Size() 1066 if db.s.tops.bcache != nil { 1067 s.BlockCacheSize = db.s.tops.bcache.Size() 1068 } else { 1069 s.BlockCacheSize = 0 1070 } 1071 1072 s.AliveIterators = atomic.LoadInt32(&db.aliveIters) 1073 s.AliveSnapshots = atomic.LoadInt32(&db.aliveSnaps) 1074 1075 s.LevelDurations = s.LevelDurations[:0] 1076 s.LevelRead = s.LevelRead[:0] 1077 s.LevelWrite = s.LevelWrite[:0] 1078 s.LevelSizes = s.LevelSizes[:0] 1079 s.LevelTablesCounts = s.LevelTablesCounts[:0] 1080 1081 v := db.s.version() 1082 defer v.release() 1083 1084 for level, tables := range v.levels { 1085 duration, read, write := db.compStats.getStat(level) 1086 1087 s.LevelDurations = append(s.LevelDurations, duration) 1088 s.LevelRead = append(s.LevelRead, read) 1089 s.LevelWrite = append(s.LevelWrite, write) 1090 s.LevelSizes = append(s.LevelSizes, tables.size()) 1091 s.LevelTablesCounts = append(s.LevelTablesCounts, len(tables)) 1092 } 1093 s.MemComp = atomic.LoadUint32(&db.memComp) 1094 s.Level0Comp = atomic.LoadUint32(&db.level0Comp) 1095 s.NonLevel0Comp = atomic.LoadUint32(&db.nonLevel0Comp) 1096 s.SeekComp = atomic.LoadUint32(&db.seekComp) 1097 return nil 1098 } 1099 1100 // SizeOf calculates approximate sizes of the given key ranges. 1101 // The length of the returned sizes are equal with the length of the given 1102 // ranges. The returned sizes measure storage space usage, so if the user 1103 // data compresses by a factor of ten, the returned sizes will be one-tenth 1104 // the size of the corresponding user data size. 1105 // The results may not include the sizes of recently written data. 1106 func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) { 1107 if err := db.ok(); err != nil { 1108 return nil, err 1109 } 1110 1111 v := db.s.version() 1112 defer v.release() 1113 1114 sizes := make(Sizes, 0, len(ranges)) 1115 for _, r := range ranges { 1116 imin := makeInternalKey(nil, r.Start, keyMaxSeq, keyTypeSeek) 1117 imax := makeInternalKey(nil, r.Limit, keyMaxSeq, keyTypeSeek) 1118 start, err := v.offsetOf(imin) 1119 if err != nil { 1120 return nil, err 1121 } 1122 limit, err := v.offsetOf(imax) 1123 if err != nil { 1124 return nil, err 1125 } 1126 var size int64 1127 if limit >= start { 1128 size = limit - start 1129 } 1130 sizes = append(sizes, size) 1131 } 1132 1133 return sizes, nil 1134 } 1135 1136 // Close closes the DB. This will also releases any outstanding snapshot, 1137 // abort any in-flight compaction and discard open transaction. 1138 // 1139 // It is not safe to close a DB until all outstanding iterators are released. 1140 // It is valid to call Close multiple times. Other methods should not be 1141 // called after the DB has been closed. 1142 func (db *DB) Close() error { 1143 if !db.setClosed() { 1144 return ErrClosed 1145 } 1146 1147 start := time.Now() 1148 db.log("db@close closing") 1149 1150 // Clear the finalizer. 1151 runtime.SetFinalizer(db, nil) 1152 1153 // Get compaction error. 1154 var err error 1155 select { 1156 case err = <-db.compErrC: 1157 if err == ErrReadOnly { 1158 err = nil 1159 } 1160 default: 1161 } 1162 1163 // Signal all goroutines. 1164 close(db.closeC) 1165 1166 // Discard open transaction. 1167 if db.tr != nil { 1168 db.tr.Discard() 1169 } 1170 1171 // Acquire writer lock. 1172 db.writeLockC <- struct{}{} 1173 1174 // Wait for all gorotines to exit. 1175 db.closeW.Wait() 1176 1177 // Closes journal. 1178 if db.journal != nil { 1179 db.journal.Close() 1180 db.journalWriter.Close() 1181 db.journal = nil 1182 db.journalWriter = nil 1183 } 1184 1185 if db.writeDelayN > 0 { 1186 db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay) 1187 } 1188 1189 // Close session. 1190 db.s.close() 1191 db.logf("db@close done T·%v", time.Since(start)) 1192 db.s.release() 1193 1194 if db.closer != nil { 1195 if err1 := db.closer.Close(); err == nil { 1196 err = err1 1197 } 1198 db.closer = nil 1199 } 1200 1201 // Clear memdbs. 1202 db.clearMems() 1203 1204 return err 1205 }