github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/syndtr/goleveldb/leveldb/db.go (about)

     1  // Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
     2  // All rights reserved.
     3  //
     4  // Use of this source code is governed by a BSD-style license that can be
     5  // found in the LICENSE file.
     6  
     7  package leveldb
     8  
     9  import (
    10  	"container/list"
    11  	"fmt"
    12  	"io"
    13  	"os"
    14  	"runtime"
    15  	"strings"
    16  	"sync"
    17  	"sync/atomic"
    18  	"time"
    19  
    20  	"github.com/insionng/yougam/libraries/syndtr/goleveldb/leveldb/errors"
    21  	"github.com/insionng/yougam/libraries/syndtr/goleveldb/leveldb/iterator"
    22  	"github.com/insionng/yougam/libraries/syndtr/goleveldb/leveldb/journal"
    23  	"github.com/insionng/yougam/libraries/syndtr/goleveldb/leveldb/memdb"
    24  	"github.com/insionng/yougam/libraries/syndtr/goleveldb/leveldb/opt"
    25  	"github.com/insionng/yougam/libraries/syndtr/goleveldb/leveldb/storage"
    26  	"github.com/insionng/yougam/libraries/syndtr/goleveldb/leveldb/table"
    27  	"github.com/insionng/yougam/libraries/syndtr/goleveldb/leveldb/util"
    28  )
    29  
    30  // DB is a LevelDB database.
    31  type DB struct {
    32  	// Need 64-bit alignment.
    33  	seq uint64
    34  
    35  	// Session.
    36  	s *session
    37  
    38  	// MemDB.
    39  	memMu           sync.RWMutex
    40  	memPool         chan *memdb.DB
    41  	mem, frozenMem  *memDB
    42  	journal         *journal.Writer
    43  	journalWriter   storage.Writer
    44  	journalFd       storage.FileDesc
    45  	frozenJournalFd storage.FileDesc
    46  	frozenSeq       uint64
    47  
    48  	// Snapshot.
    49  	snapsMu   sync.Mutex
    50  	snapsList *list.List
    51  
    52  	// Stats.
    53  	aliveSnaps, aliveIters int32
    54  
    55  	// Write.
    56  	writeC       chan *Batch
    57  	writeMergedC chan bool
    58  	writeLockC   chan struct{}
    59  	writeAckC    chan error
    60  	writeDelay   time.Duration
    61  	writeDelayN  int
    62  	journalC     chan *Batch
    63  	journalAckC  chan error
    64  	tr           *Transaction
    65  
    66  	// Compaction.
    67  	compCommitLk     sync.Mutex
    68  	tcompCmdC        chan cCmd
    69  	tcompPauseC      chan chan<- struct{}
    70  	mcompCmdC        chan cCmd
    71  	compErrC         chan error
    72  	compPerErrC      chan error
    73  	compErrSetC      chan error
    74  	compWriteLocking bool
    75  	compStats        cStats
    76  	memdbMaxLevel    int // For testing.
    77  
    78  	// Close.
    79  	closeW sync.WaitGroup
    80  	closeC chan struct{}
    81  	closed uint32
    82  	closer io.Closer
    83  }
    84  
    85  func openDB(s *session) (*DB, error) {
    86  	s.log("db@open opening")
    87  	start := time.Now()
    88  	db := &DB{
    89  		s: s,
    90  		// Initial sequence
    91  		seq: s.stSeqNum,
    92  		// MemDB
    93  		memPool: make(chan *memdb.DB, 1),
    94  		// Snapshot
    95  		snapsList: list.New(),
    96  		// Write
    97  		writeC:       make(chan *Batch),
    98  		writeMergedC: make(chan bool),
    99  		writeLockC:   make(chan struct{}, 1),
   100  		writeAckC:    make(chan error),
   101  		journalC:     make(chan *Batch),
   102  		journalAckC:  make(chan error),
   103  		// Compaction
   104  		tcompCmdC:   make(chan cCmd),
   105  		tcompPauseC: make(chan chan<- struct{}),
   106  		mcompCmdC:   make(chan cCmd),
   107  		compErrC:    make(chan error),
   108  		compPerErrC: make(chan error),
   109  		compErrSetC: make(chan error),
   110  		// Close
   111  		closeC: make(chan struct{}),
   112  	}
   113  
   114  	// Read-only mode.
   115  	readOnly := s.o.GetReadOnly()
   116  
   117  	if readOnly {
   118  		// Recover journals (read-only mode).
   119  		if err := db.recoverJournalRO(); err != nil {
   120  			return nil, err
   121  		}
   122  	} else {
   123  		// Recover journals.
   124  		if err := db.recoverJournal(); err != nil {
   125  			return nil, err
   126  		}
   127  
   128  		// Remove any obsolete files.
   129  		if err := db.checkAndCleanFiles(); err != nil {
   130  			// Close journal.
   131  			if db.journal != nil {
   132  				db.journal.Close()
   133  				db.journalWriter.Close()
   134  			}
   135  			return nil, err
   136  		}
   137  
   138  	}
   139  
   140  	// Doesn't need to be included in the wait group.
   141  	go db.compactionError()
   142  	go db.mpoolDrain()
   143  
   144  	if readOnly {
   145  		db.SetReadOnly()
   146  	} else {
   147  		db.closeW.Add(3)
   148  		go db.tCompaction()
   149  		go db.mCompaction()
   150  		go db.jWriter()
   151  	}
   152  
   153  	s.logf("db@open done T·%v", time.Since(start))
   154  
   155  	runtime.SetFinalizer(db, (*DB).Close)
   156  	return db, nil
   157  }
   158  
   159  // Open opens or creates a DB for the given storage.
   160  // The DB will be created if not exist, unless ErrorIfMissing is true.
   161  // Also, if ErrorIfExist is true and the DB exist Open will returns
   162  // os.ErrExist error.
   163  //
   164  // Open will return an error with type of ErrCorrupted if corruption
   165  // detected in the DB. Corrupted DB can be recovered with Recover
   166  // function.
   167  //
   168  // The returned DB instance is goroutine-safe.
   169  // The DB must be closed after use, by calling Close method.
   170  func Open(stor storage.Storage, o *opt.Options) (db *DB, err error) {
   171  	s, err := newSession(stor, o)
   172  	if err != nil {
   173  		return
   174  	}
   175  	defer func() {
   176  		if err != nil {
   177  			s.close()
   178  			s.release()
   179  		}
   180  	}()
   181  
   182  	err = s.recover()
   183  	if err != nil {
   184  		if !os.IsNotExist(err) || s.o.GetErrorIfMissing() {
   185  			return
   186  		}
   187  		err = s.create()
   188  		if err != nil {
   189  			return
   190  		}
   191  	} else if s.o.GetErrorIfExist() {
   192  		err = os.ErrExist
   193  		return
   194  	}
   195  
   196  	return openDB(s)
   197  }
   198  
   199  // OpenFile opens or creates a DB for the given path.
   200  // The DB will be created if not exist, unless ErrorIfMissing is true.
   201  // Also, if ErrorIfExist is true and the DB exist OpenFile will returns
   202  // os.ErrExist error.
   203  //
   204  // OpenFile uses standard file-system backed storage implementation as
   205  // desribed in the leveldb/storage package.
   206  //
   207  // OpenFile will return an error with type of ErrCorrupted if corruption
   208  // detected in the DB. Corrupted DB can be recovered with Recover
   209  // function.
   210  //
   211  // The returned DB instance is goroutine-safe.
   212  // The DB must be closed after use, by calling Close method.
   213  func OpenFile(path string, o *opt.Options) (db *DB, err error) {
   214  	stor, err := storage.OpenFile(path, o.GetReadOnly())
   215  	if err != nil {
   216  		return
   217  	}
   218  	db, err = Open(stor, o)
   219  	if err != nil {
   220  		stor.Close()
   221  	} else {
   222  		db.closer = stor
   223  	}
   224  	return
   225  }
   226  
   227  // Recover recovers and opens a DB with missing or corrupted manifest files
   228  // for the given storage. It will ignore any manifest files, valid or not.
   229  // The DB must already exist or it will returns an error.
   230  // Also, Recover will ignore ErrorIfMissing and ErrorIfExist options.
   231  //
   232  // The returned DB instance is goroutine-safe.
   233  // The DB must be closed after use, by calling Close method.
   234  func Recover(stor storage.Storage, o *opt.Options) (db *DB, err error) {
   235  	s, err := newSession(stor, o)
   236  	if err != nil {
   237  		return
   238  	}
   239  	defer func() {
   240  		if err != nil {
   241  			s.close()
   242  			s.release()
   243  		}
   244  	}()
   245  
   246  	err = recoverTable(s, o)
   247  	if err != nil {
   248  		return
   249  	}
   250  	return openDB(s)
   251  }
   252  
   253  // RecoverFile recovers and opens a DB with missing or corrupted manifest files
   254  // for the given path. It will ignore any manifest files, valid or not.
   255  // The DB must already exist or it will returns an error.
   256  // Also, Recover will ignore ErrorIfMissing and ErrorIfExist options.
   257  //
   258  // RecoverFile uses standard file-system backed storage implementation as desribed
   259  // in the leveldb/storage package.
   260  //
   261  // The returned DB instance is goroutine-safe.
   262  // The DB must be closed after use, by calling Close method.
   263  func RecoverFile(path string, o *opt.Options) (db *DB, err error) {
   264  	stor, err := storage.OpenFile(path, false)
   265  	if err != nil {
   266  		return
   267  	}
   268  	db, err = Recover(stor, o)
   269  	if err != nil {
   270  		stor.Close()
   271  	} else {
   272  		db.closer = stor
   273  	}
   274  	return
   275  }
   276  
   277  func recoverTable(s *session, o *opt.Options) error {
   278  	o = dupOptions(o)
   279  	// Mask StrictReader, lets StrictRecovery doing its job.
   280  	o.Strict &= ^opt.StrictReader
   281  
   282  	// Get all tables and sort it by file number.
   283  	fds, err := s.stor.List(storage.TypeTable)
   284  	if err != nil {
   285  		return err
   286  	}
   287  	sortFds(fds)
   288  
   289  	var (
   290  		maxSeq                                                            uint64
   291  		recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int
   292  
   293  		// We will drop corrupted table.
   294  		strict = o.GetStrict(opt.StrictRecovery)
   295  		noSync = o.GetNoSync()
   296  
   297  		rec   = &sessionRecord{}
   298  		bpool = util.NewBufferPool(o.GetBlockSize() + 5)
   299  	)
   300  	buildTable := func(iter iterator.Iterator) (tmpFd storage.FileDesc, size int64, err error) {
   301  		tmpFd = s.newTemp()
   302  		writer, err := s.stor.Create(tmpFd)
   303  		if err != nil {
   304  			return
   305  		}
   306  		defer func() {
   307  			writer.Close()
   308  			if err != nil {
   309  				s.stor.Remove(tmpFd)
   310  				tmpFd = storage.FileDesc{}
   311  			}
   312  		}()
   313  
   314  		// Copy entries.
   315  		tw := table.NewWriter(writer, o)
   316  		for iter.Next() {
   317  			key := iter.Key()
   318  			if validInternalKey(key) {
   319  				err = tw.Append(key, iter.Value())
   320  				if err != nil {
   321  					return
   322  				}
   323  			}
   324  		}
   325  		err = iter.Error()
   326  		if err != nil {
   327  			return
   328  		}
   329  		err = tw.Close()
   330  		if err != nil {
   331  			return
   332  		}
   333  		if !noSync {
   334  			err = writer.Sync()
   335  			if err != nil {
   336  				return
   337  			}
   338  		}
   339  		size = int64(tw.BytesLen())
   340  		return
   341  	}
   342  	recoverTable := func(fd storage.FileDesc) error {
   343  		s.logf("table@recovery recovering @%d", fd.Num)
   344  		reader, err := s.stor.Open(fd)
   345  		if err != nil {
   346  			return err
   347  		}
   348  		var closed bool
   349  		defer func() {
   350  			if !closed {
   351  				reader.Close()
   352  			}
   353  		}()
   354  
   355  		// Get file size.
   356  		size, err := reader.Seek(0, 2)
   357  		if err != nil {
   358  			return err
   359  		}
   360  
   361  		var (
   362  			tSeq                                     uint64
   363  			tgoodKey, tcorruptedKey, tcorruptedBlock int
   364  			imin, imax                               []byte
   365  		)
   366  		tr, err := table.NewReader(reader, size, fd, nil, bpool, o)
   367  		if err != nil {
   368  			return err
   369  		}
   370  		iter := tr.NewIterator(nil, nil)
   371  		if itererr, ok := iter.(iterator.ErrorCallbackSetter); ok {
   372  			itererr.SetErrorCallback(func(err error) {
   373  				if errors.IsCorrupted(err) {
   374  					s.logf("table@recovery block corruption @%d %q", fd.Num, err)
   375  					tcorruptedBlock++
   376  				}
   377  			})
   378  		}
   379  
   380  		// Scan the table.
   381  		for iter.Next() {
   382  			key := iter.Key()
   383  			_, seq, _, kerr := parseInternalKey(key)
   384  			if kerr != nil {
   385  				tcorruptedKey++
   386  				continue
   387  			}
   388  			tgoodKey++
   389  			if seq > tSeq {
   390  				tSeq = seq
   391  			}
   392  			if imin == nil {
   393  				imin = append([]byte{}, key...)
   394  			}
   395  			imax = append(imax[:0], key...)
   396  		}
   397  		if err := iter.Error(); err != nil {
   398  			iter.Release()
   399  			return err
   400  		}
   401  		iter.Release()
   402  
   403  		goodKey += tgoodKey
   404  		corruptedKey += tcorruptedKey
   405  		corruptedBlock += tcorruptedBlock
   406  
   407  		if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) {
   408  			droppedTable++
   409  			s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
   410  			return nil
   411  		}
   412  
   413  		if tgoodKey > 0 {
   414  			if tcorruptedKey > 0 || tcorruptedBlock > 0 {
   415  				// Rebuild the table.
   416  				s.logf("table@recovery rebuilding @%d", fd.Num)
   417  				iter := tr.NewIterator(nil, nil)
   418  				tmpFd, newSize, err := buildTable(iter)
   419  				iter.Release()
   420  				if err != nil {
   421  					return err
   422  				}
   423  				closed = true
   424  				reader.Close()
   425  				if err := s.stor.Rename(tmpFd, fd); err != nil {
   426  					return err
   427  				}
   428  				size = newSize
   429  			}
   430  			if tSeq > maxSeq {
   431  				maxSeq = tSeq
   432  			}
   433  			recoveredKey += tgoodKey
   434  			// Add table to level 0.
   435  			rec.addTable(0, fd.Num, size, imin, imax)
   436  			s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
   437  		} else {
   438  			droppedTable++
   439  			s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", fd.Num, tcorruptedKey, tcorruptedBlock, size)
   440  		}
   441  
   442  		return nil
   443  	}
   444  
   445  	// Recover all tables.
   446  	if len(fds) > 0 {
   447  		s.logf("table@recovery F·%d", len(fds))
   448  
   449  		// Mark file number as used.
   450  		s.markFileNum(fds[len(fds)-1].Num)
   451  
   452  		for _, fd := range fds {
   453  			if err := recoverTable(fd); err != nil {
   454  				return err
   455  			}
   456  		}
   457  
   458  		s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(fds), recoveredKey, goodKey, corruptedKey, maxSeq)
   459  	}
   460  
   461  	// Set sequence number.
   462  	rec.setSeqNum(maxSeq)
   463  
   464  	// Create new manifest.
   465  	if err := s.create(); err != nil {
   466  		return err
   467  	}
   468  
   469  	// Commit.
   470  	return s.commit(rec)
   471  }
   472  
   473  func (db *DB) recoverJournal() error {
   474  	// Get all journals and sort it by file number.
   475  	rawFds, err := db.s.stor.List(storage.TypeJournal)
   476  	if err != nil {
   477  		return err
   478  	}
   479  	sortFds(rawFds)
   480  
   481  	// Journals that will be recovered.
   482  	var fds []storage.FileDesc
   483  	for _, fd := range rawFds {
   484  		if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
   485  			fds = append(fds, fd)
   486  		}
   487  	}
   488  
   489  	var (
   490  		ofd storage.FileDesc // Obsolete file.
   491  		rec = &sessionRecord{}
   492  	)
   493  
   494  	// Recover journals.
   495  	if len(fds) > 0 {
   496  		db.logf("journal@recovery F·%d", len(fds))
   497  
   498  		// Mark file number as used.
   499  		db.s.markFileNum(fds[len(fds)-1].Num)
   500  
   501  		var (
   502  			// Options.
   503  			strict      = db.s.o.GetStrict(opt.StrictJournal)
   504  			checksum    = db.s.o.GetStrict(opt.StrictJournalChecksum)
   505  			writeBuffer = db.s.o.GetWriteBuffer()
   506  
   507  			jr    *journal.Reader
   508  			mdb   = memdb.New(db.s.icmp, writeBuffer)
   509  			buf   = &util.Buffer{}
   510  			batch = &Batch{}
   511  		)
   512  
   513  		for _, fd := range fds {
   514  			db.logf("journal@recovery recovering @%d", fd.Num)
   515  
   516  			fr, err := db.s.stor.Open(fd)
   517  			if err != nil {
   518  				return err
   519  			}
   520  
   521  			// Create or reset journal reader instance.
   522  			if jr == nil {
   523  				jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
   524  			} else {
   525  				jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
   526  			}
   527  
   528  			// Flush memdb and remove obsolete journal file.
   529  			if !ofd.Nil() {
   530  				if mdb.Len() > 0 {
   531  					if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
   532  						fr.Close()
   533  						return err
   534  					}
   535  				}
   536  
   537  				rec.setJournalNum(fd.Num)
   538  				rec.setSeqNum(db.seq)
   539  				if err := db.s.commit(rec); err != nil {
   540  					fr.Close()
   541  					return err
   542  				}
   543  				rec.resetAddedTables()
   544  
   545  				db.s.stor.Remove(ofd)
   546  				ofd = storage.FileDesc{}
   547  			}
   548  
   549  			// Replay journal to memdb.
   550  			mdb.Reset()
   551  			for {
   552  				r, err := jr.Next()
   553  				if err != nil {
   554  					if err == io.EOF {
   555  						break
   556  					}
   557  
   558  					fr.Close()
   559  					return errors.SetFd(err, fd)
   560  				}
   561  
   562  				buf.Reset()
   563  				if _, err := buf.ReadFrom(r); err != nil {
   564  					if err == io.ErrUnexpectedEOF {
   565  						// This is error returned due to corruption, with strict == false.
   566  						continue
   567  					}
   568  
   569  					fr.Close()
   570  					return errors.SetFd(err, fd)
   571  				}
   572  				if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mdb); err != nil {
   573  					if !strict && errors.IsCorrupted(err) {
   574  						db.s.logf("journal error: %v (skipped)", err)
   575  						// We won't apply sequence number as it might be corrupted.
   576  						continue
   577  					}
   578  
   579  					fr.Close()
   580  					return errors.SetFd(err, fd)
   581  				}
   582  
   583  				// Save sequence number.
   584  				db.seq = batch.seq + uint64(batch.Len())
   585  
   586  				// Flush it if large enough.
   587  				if mdb.Size() >= writeBuffer {
   588  					if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
   589  						fr.Close()
   590  						return err
   591  					}
   592  
   593  					mdb.Reset()
   594  				}
   595  			}
   596  
   597  			fr.Close()
   598  			ofd = fd
   599  		}
   600  
   601  		// Flush the last memdb.
   602  		if mdb.Len() > 0 {
   603  			if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
   604  				return err
   605  			}
   606  		}
   607  	}
   608  
   609  	// Create a new journal.
   610  	if _, err := db.newMem(0); err != nil {
   611  		return err
   612  	}
   613  
   614  	// Commit.
   615  	rec.setJournalNum(db.journalFd.Num)
   616  	rec.setSeqNum(db.seq)
   617  	if err := db.s.commit(rec); err != nil {
   618  		// Close journal on error.
   619  		if db.journal != nil {
   620  			db.journal.Close()
   621  			db.journalWriter.Close()
   622  		}
   623  		return err
   624  	}
   625  
   626  	// Remove the last obsolete journal file.
   627  	if !ofd.Nil() {
   628  		db.s.stor.Remove(ofd)
   629  	}
   630  
   631  	return nil
   632  }
   633  
   634  func (db *DB) recoverJournalRO() error {
   635  	// Get all journals and sort it by file number.
   636  	rawFds, err := db.s.stor.List(storage.TypeJournal)
   637  	if err != nil {
   638  		return err
   639  	}
   640  	sortFds(rawFds)
   641  
   642  	// Journals that will be recovered.
   643  	var fds []storage.FileDesc
   644  	for _, fd := range rawFds {
   645  		if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
   646  			fds = append(fds, fd)
   647  		}
   648  	}
   649  
   650  	var (
   651  		// Options.
   652  		strict      = db.s.o.GetStrict(opt.StrictJournal)
   653  		checksum    = db.s.o.GetStrict(opt.StrictJournalChecksum)
   654  		writeBuffer = db.s.o.GetWriteBuffer()
   655  
   656  		mdb = memdb.New(db.s.icmp, writeBuffer)
   657  	)
   658  
   659  	// Recover journals.
   660  	if len(fds) > 0 {
   661  		db.logf("journal@recovery RO·Mode F·%d", len(fds))
   662  
   663  		var (
   664  			jr    *journal.Reader
   665  			buf   = &util.Buffer{}
   666  			batch = &Batch{}
   667  		)
   668  
   669  		for _, fd := range fds {
   670  			db.logf("journal@recovery recovering @%d", fd.Num)
   671  
   672  			fr, err := db.s.stor.Open(fd)
   673  			if err != nil {
   674  				return err
   675  			}
   676  
   677  			// Create or reset journal reader instance.
   678  			if jr == nil {
   679  				jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
   680  			} else {
   681  				jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
   682  			}
   683  
   684  			// Replay journal to memdb.
   685  			for {
   686  				r, err := jr.Next()
   687  				if err != nil {
   688  					if err == io.EOF {
   689  						break
   690  					}
   691  
   692  					fr.Close()
   693  					return errors.SetFd(err, fd)
   694  				}
   695  
   696  				buf.Reset()
   697  				if _, err := buf.ReadFrom(r); err != nil {
   698  					if err == io.ErrUnexpectedEOF {
   699  						// This is error returned due to corruption, with strict == false.
   700  						continue
   701  					}
   702  
   703  					fr.Close()
   704  					return errors.SetFd(err, fd)
   705  				}
   706  				if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mdb); err != nil {
   707  					if !strict && errors.IsCorrupted(err) {
   708  						db.s.logf("journal error: %v (skipped)", err)
   709  						// We won't apply sequence number as it might be corrupted.
   710  						continue
   711  					}
   712  
   713  					fr.Close()
   714  					return errors.SetFd(err, fd)
   715  				}
   716  
   717  				// Save sequence number.
   718  				db.seq = batch.seq + uint64(batch.Len())
   719  			}
   720  
   721  			fr.Close()
   722  		}
   723  	}
   724  
   725  	// Set memDB.
   726  	db.mem = &memDB{db: db, DB: mdb, ref: 1}
   727  
   728  	return nil
   729  }
   730  
   731  func memGet(mdb *memdb.DB, ikey internalKey, icmp *iComparer) (ok bool, mv []byte, err error) {
   732  	mk, mv, err := mdb.Find(ikey)
   733  	if err == nil {
   734  		ukey, _, kt, kerr := parseInternalKey(mk)
   735  		if kerr != nil {
   736  			// Shouldn't have had happen.
   737  			panic(kerr)
   738  		}
   739  		if icmp.uCompare(ukey, ikey.ukey()) == 0 {
   740  			if kt == keyTypeDel {
   741  				return true, nil, ErrNotFound
   742  			}
   743  			return true, mv, nil
   744  
   745  		}
   746  	} else if err != ErrNotFound {
   747  		return true, nil, err
   748  	}
   749  	return
   750  }
   751  
   752  func (db *DB) get(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) {
   753  	ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
   754  
   755  	if auxm != nil {
   756  		if ok, mv, me := memGet(auxm, ikey, db.s.icmp); ok {
   757  			return append([]byte{}, mv...), me
   758  		}
   759  	}
   760  
   761  	em, fm := db.getMems()
   762  	for _, m := range [...]*memDB{em, fm} {
   763  		if m == nil {
   764  			continue
   765  		}
   766  		defer m.decref()
   767  
   768  		if ok, mv, me := memGet(m.DB, ikey, db.s.icmp); ok {
   769  			return append([]byte{}, mv...), me
   770  		}
   771  	}
   772  
   773  	v := db.s.version()
   774  	value, cSched, err := v.get(auxt, ikey, ro, false)
   775  	v.release()
   776  	if cSched {
   777  		// Trigger table compaction.
   778  		db.compTrigger(db.tcompCmdC)
   779  	}
   780  	return
   781  }
   782  
   783  func nilIfNotFound(err error) error {
   784  	if err == ErrNotFound {
   785  		return nil
   786  	}
   787  	return err
   788  }
   789  
   790  func (db *DB) has(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) {
   791  	ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
   792  
   793  	if auxm != nil {
   794  		if ok, _, me := memGet(auxm, ikey, db.s.icmp); ok {
   795  			return me == nil, nilIfNotFound(me)
   796  		}
   797  	}
   798  
   799  	em, fm := db.getMems()
   800  	for _, m := range [...]*memDB{em, fm} {
   801  		if m == nil {
   802  			continue
   803  		}
   804  		defer m.decref()
   805  
   806  		if ok, _, me := memGet(m.DB, ikey, db.s.icmp); ok {
   807  			return me == nil, nilIfNotFound(me)
   808  		}
   809  	}
   810  
   811  	v := db.s.version()
   812  	_, cSched, err := v.get(auxt, ikey, ro, true)
   813  	v.release()
   814  	if cSched {
   815  		// Trigger table compaction.
   816  		db.compTrigger(db.tcompCmdC)
   817  	}
   818  	if err == nil {
   819  		ret = true
   820  	} else if err == ErrNotFound {
   821  		err = nil
   822  	}
   823  	return
   824  }
   825  
   826  // Get gets the value for the given key. It returns ErrNotFound if the
   827  // DB does not contains the key.
   828  //
   829  // The returned slice is its own copy, it is safe to modify the contents
   830  // of the returned slice.
   831  // It is safe to modify the contents of the argument after Get returns.
   832  func (db *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
   833  	err = db.ok()
   834  	if err != nil {
   835  		return
   836  	}
   837  
   838  	se := db.acquireSnapshot()
   839  	defer db.releaseSnapshot(se)
   840  	return db.get(nil, nil, key, se.seq, ro)
   841  }
   842  
   843  // Has returns true if the DB does contains the given key.
   844  //
   845  // It is safe to modify the contents of the argument after Get returns.
   846  func (db *DB) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
   847  	err = db.ok()
   848  	if err != nil {
   849  		return
   850  	}
   851  
   852  	se := db.acquireSnapshot()
   853  	defer db.releaseSnapshot(se)
   854  	return db.has(nil, nil, key, se.seq, ro)
   855  }
   856  
   857  // NewIterator returns an iterator for the latest snapshot of the
   858  // underlying DB.
   859  // The returned iterator is not goroutine-safe, but it is safe to use
   860  // multiple iterators concurrently, with each in a dedicated goroutine.
   861  // It is also safe to use an iterator concurrently with modifying its
   862  // underlying DB. The resultant key/value pairs are guaranteed to be
   863  // consistent.
   864  //
   865  // Slice allows slicing the iterator to only contains keys in the given
   866  // range. A nil Range.Start is treated as a key before all keys in the
   867  // DB. And a nil Range.Limit is treated as a key after all keys in
   868  // the DB.
   869  //
   870  // The iterator must be released after use, by calling Release method.
   871  //
   872  // Also read Iterator documentation of the leveldb/iterator package.
   873  func (db *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
   874  	if err := db.ok(); err != nil {
   875  		return iterator.NewEmptyIterator(err)
   876  	}
   877  
   878  	se := db.acquireSnapshot()
   879  	defer db.releaseSnapshot(se)
   880  	// Iterator holds 'version' lock, 'version' is immutable so snapshot
   881  	// can be released after iterator created.
   882  	return db.newIterator(nil, nil, se.seq, slice, ro)
   883  }
   884  
   885  // GetSnapshot returns a latest snapshot of the underlying DB. A snapshot
   886  // is a frozen snapshot of a DB state at a particular point in time. The
   887  // content of snapshot are guaranteed to be consistent.
   888  //
   889  // The snapshot must be released after use, by calling Release method.
   890  func (db *DB) GetSnapshot() (*Snapshot, error) {
   891  	if err := db.ok(); err != nil {
   892  		return nil, err
   893  	}
   894  
   895  	return db.newSnapshot(), nil
   896  }
   897  
   898  // GetProperty returns value of the given property name.
   899  //
   900  // Property names:
   901  //	leveldb.num-files-at-level{n}
   902  //		Returns the number of files at level 'n'.
   903  //	leveldb.stats
   904  //		Returns statistics of the underlying DB.
   905  //	leveldb.sstables
   906  //		Returns sstables list for each level.
   907  //	leveldb.blockpool
   908  //		Returns block pool stats.
   909  //	leveldb.cachedblock
   910  //		Returns size of cached block.
   911  //	leveldb.openedtables
   912  //		Returns number of opened tables.
   913  //	leveldb.alivesnaps
   914  //		Returns number of alive snapshots.
   915  //	leveldb.aliveiters
   916  //		Returns number of alive iterators.
   917  func (db *DB) GetProperty(name string) (value string, err error) {
   918  	err = db.ok()
   919  	if err != nil {
   920  		return
   921  	}
   922  
   923  	const prefix = "leveldb."
   924  	if !strings.HasPrefix(name, prefix) {
   925  		return "", ErrNotFound
   926  	}
   927  	p := name[len(prefix):]
   928  
   929  	v := db.s.version()
   930  	defer v.release()
   931  
   932  	numFilesPrefix := "num-files-at-level"
   933  	switch {
   934  	case strings.HasPrefix(p, numFilesPrefix):
   935  		var level uint
   936  		var rest string
   937  		n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest)
   938  		if n != 1 {
   939  			err = ErrNotFound
   940  		} else {
   941  			value = fmt.Sprint(v.tLen(int(level)))
   942  		}
   943  	case p == "stats":
   944  		value = "Compactions\n" +
   945  			" Level |   Tables   |    Size(MB)   |    Time(sec)  |    Read(MB)   |   Write(MB)\n" +
   946  			"-------+------------+---------------+---------------+---------------+---------------\n"
   947  		for level, tables := range v.levels {
   948  			duration, read, write := db.compStats.getStat(level)
   949  			if len(tables) == 0 && duration == 0 {
   950  				continue
   951  			}
   952  			value += fmt.Sprintf(" %3d   | %10d | %13.5f | %13.5f | %13.5f | %13.5f\n",
   953  				level, len(tables), float64(tables.size())/1048576.0, duration.Seconds(),
   954  				float64(read)/1048576.0, float64(write)/1048576.0)
   955  		}
   956  	case p == "sstables":
   957  		for level, tables := range v.levels {
   958  			value += fmt.Sprintf("--- level %d ---\n", level)
   959  			for _, t := range tables {
   960  				value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.fd.Num, t.size, t.imin, t.imax)
   961  			}
   962  		}
   963  	case p == "blockpool":
   964  		value = fmt.Sprintf("%v", db.s.tops.bpool)
   965  	case p == "cachedblock":
   966  		if db.s.tops.bcache != nil {
   967  			value = fmt.Sprintf("%d", db.s.tops.bcache.Size())
   968  		} else {
   969  			value = "<nil>"
   970  		}
   971  	case p == "openedtables":
   972  		value = fmt.Sprintf("%d", db.s.tops.cache.Size())
   973  	case p == "alivesnaps":
   974  		value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveSnaps))
   975  	case p == "aliveiters":
   976  		value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveIters))
   977  	default:
   978  		err = ErrNotFound
   979  	}
   980  
   981  	return
   982  }
   983  
   984  // SizeOf calculates approximate sizes of the given key ranges.
   985  // The length of the returned sizes are equal with the length of the given
   986  // ranges. The returned sizes measure storage space usage, so if the user
   987  // data compresses by a factor of ten, the returned sizes will be one-tenth
   988  // the size of the corresponding user data size.
   989  // The results may not include the sizes of recently written data.
   990  func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) {
   991  	if err := db.ok(); err != nil {
   992  		return nil, err
   993  	}
   994  
   995  	v := db.s.version()
   996  	defer v.release()
   997  
   998  	sizes := make(Sizes, 0, len(ranges))
   999  	for _, r := range ranges {
  1000  		imin := makeInternalKey(nil, r.Start, keyMaxSeq, keyTypeSeek)
  1001  		imax := makeInternalKey(nil, r.Limit, keyMaxSeq, keyTypeSeek)
  1002  		start, err := v.offsetOf(imin)
  1003  		if err != nil {
  1004  			return nil, err
  1005  		}
  1006  		limit, err := v.offsetOf(imax)
  1007  		if err != nil {
  1008  			return nil, err
  1009  		}
  1010  		var size int64
  1011  		if limit >= start {
  1012  			size = limit - start
  1013  		}
  1014  		sizes = append(sizes, size)
  1015  	}
  1016  
  1017  	return sizes, nil
  1018  }
  1019  
  1020  // Close closes the DB. This will also releases any outstanding snapshot,
  1021  // abort any in-flight compaction and discard open transaction.
  1022  //
  1023  // It is not safe to close a DB until all outstanding iterators are released.
  1024  // It is valid to call Close multiple times. Other methods should not be
  1025  // called after the DB has been closed.
  1026  func (db *DB) Close() error {
  1027  	if !db.setClosed() {
  1028  		return ErrClosed
  1029  	}
  1030  
  1031  	start := time.Now()
  1032  	db.log("db@close closing")
  1033  
  1034  	// Clear the finalizer.
  1035  	runtime.SetFinalizer(db, nil)
  1036  
  1037  	// Get compaction error.
  1038  	var err error
  1039  	select {
  1040  	case err = <-db.compErrC:
  1041  		if err == ErrReadOnly {
  1042  			err = nil
  1043  		}
  1044  	default:
  1045  	}
  1046  
  1047  	// Signal all goroutines.
  1048  	close(db.closeC)
  1049  
  1050  	// Discard open transaction.
  1051  	if db.tr != nil {
  1052  		db.tr.Discard()
  1053  	}
  1054  
  1055  	// Acquire writer lock.
  1056  	db.writeLockC <- struct{}{}
  1057  
  1058  	// Wait for all gorotines to exit.
  1059  	db.closeW.Wait()
  1060  
  1061  	// Closes journal.
  1062  	if db.journal != nil {
  1063  		db.journal.Close()
  1064  		db.journalWriter.Close()
  1065  	}
  1066  
  1067  	if db.writeDelayN > 0 {
  1068  		db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
  1069  	}
  1070  
  1071  	// Close session.
  1072  	db.s.close()
  1073  	db.logf("db@close done T·%v", time.Since(start))
  1074  	db.s.release()
  1075  
  1076  	if db.closer != nil {
  1077  		if err1 := db.closer.Close(); err == nil {
  1078  			err = err1
  1079  		}
  1080  	}
  1081  
  1082  	// NIL'ing pointers.
  1083  	db.s = nil
  1084  	db.mem = nil
  1085  	db.frozenMem = nil
  1086  	db.journal = nil
  1087  	db.journalWriter = nil
  1088  	db.closer = nil
  1089  
  1090  	return err
  1091  }