github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/bitree/bdb/db.go (about)

     1  // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bdb
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"hash/fnv"
    21  	"os"
    22  	"runtime"
    23  	"sort"
    24  	"sync"
    25  	"time"
    26  	"unsafe"
    27  
    28  	"github.com/cockroachdb/errors"
    29  	"github.com/zuoyebang/bitalosdb/internal/bitask"
    30  	"github.com/zuoyebang/bitalosdb/internal/options"
    31  
    32  	"github.com/zuoyebang/bitalosdb/internal/base"
    33  	"github.com/zuoyebang/bitalosdb/internal/consts"
    34  	"github.com/zuoyebang/bitalosdb/internal/utils"
    35  )
    36  
    37  const maxMmapStep = 1 << 30
    38  
    39  const (
    40  	versionFreelistUint uint32 = 2 + iota
    41  	versionFreelistBitmap
    42  )
    43  const version uint32 = 3
    44  
    45  const magic uint32 = 0xED0CDAED
    46  
    47  const pgidNoFreelist pgid = 0xffffffffffffffff
    48  
    49  const IgnoreNoSync = runtime.GOOS == "openbsd"
    50  
    51  const (
    52  	DefaultMaxBatchSize  int = 1000
    53  	DefaultMaxBatchDelay     = 10 * time.Millisecond
    54  )
    55  
    56  var defaultPageSize = os.Getpagesize()
    57  
    58  const flockRetryTimeout = 50 * time.Millisecond
    59  
    60  type DB struct {
    61  	StrictMode     bool
    62  	NoSync         bool
    63  	NoFreelistSync bool
    64  	FreelistType   string
    65  	NoGrowSync     bool
    66  	MmapFlags      int
    67  	MaxBatchSize   int
    68  	MaxBatchDelay  time.Duration
    69  	AllocSize      int
    70  	Mlock          bool
    71  
    72  	ops struct {
    73  		writeAt           func(b []byte, off int64) (n int, err error)
    74  		pushTaskCB        func(task *bitask.BitpageTaskData)
    75  		checkPageSplitted func(pn uint32) bool
    76  	}
    77  
    78  	path            string
    79  	openFile        func(string, int, os.FileMode) (*os.File, error)
    80  	file            *os.File
    81  	dataref         []byte
    82  	data            *[maxMapSize]byte
    83  	datasz          int
    84  	filesz          int
    85  	meta0           *meta
    86  	meta1           *meta
    87  	pageSize        int
    88  	opened          bool
    89  	rwtx            *Tx
    90  	txs             []*Tx
    91  	stats           Stats
    92  	pagePool        sync.Pool
    93  	batchMu         sync.Mutex
    94  	batch           *batch
    95  	rwlock          sync.Mutex
    96  	metalock        sync.Mutex
    97  	mmaplock        sync.RWMutex
    98  	statlock        sync.RWMutex
    99  	readOnly        bool
   100  	freelist        *freelist
   101  	freelistLoad    sync.Once
   102  	upgradeFreelist bool
   103  	version         uint32
   104  	logger          base.Logger
   105  	cmp             base.Compare
   106  	index           int
   107  }
   108  
   109  func (db *DB) Path() string {
   110  	return db.path
   111  }
   112  
   113  func (db *DB) GoString() string {
   114  	return fmt.Sprintf("bdb.DB{path:%q}", db.path)
   115  }
   116  
   117  func (db *DB) String() string {
   118  	return fmt.Sprintf("DB<%q>", db.path)
   119  }
   120  
   121  func Open(path string, opts *options.BdbOptions) (*DB, error) {
   122  	db := &DB{
   123  		opened: true,
   124  	}
   125  
   126  	if opts == nil {
   127  		opts = options.DefaultBdbOptions
   128  	}
   129  
   130  	db.NoSync = opts.NoSync
   131  	db.NoGrowSync = opts.NoGrowSync
   132  	db.MmapFlags = opts.MmapFlags
   133  	db.NoFreelistSync = opts.NoFreelistSync
   134  	db.FreelistType = opts.FreelistType
   135  	db.Mlock = opts.Mlock
   136  	db.logger = opts.Logger
   137  	db.cmp = opts.Cmp
   138  	db.index = opts.Index
   139  
   140  	db.MaxBatchSize = DefaultMaxBatchSize
   141  	db.MaxBatchDelay = DefaultMaxBatchDelay
   142  	db.AllocSize = consts.BdbAllocSize
   143  
   144  	flag := os.O_RDWR
   145  	if opts.ReadOnly {
   146  		flag = os.O_RDONLY
   147  		db.readOnly = true
   148  	}
   149  
   150  	db.openFile = opts.OpenFile
   151  	if db.openFile == nil {
   152  		db.openFile = os.OpenFile
   153  	}
   154  
   155  	var err error
   156  	if db.file, err = db.openFile(path, flag|os.O_CREATE, consts.FileMode); err != nil {
   157  		_ = db.close()
   158  		return nil, err
   159  	}
   160  	db.path = db.file.Name()
   161  
   162  	if err = flock(db, !db.readOnly, opts.Timeout); err != nil {
   163  		_ = db.close()
   164  		return nil, err
   165  	}
   166  
   167  	db.ops.writeAt = db.file.WriteAt
   168  	db.ops.pushTaskCB = opts.BitpageTaskPushFunc
   169  	db.ops.checkPageSplitted = opts.CheckPageSplitted
   170  
   171  	if db.pageSize = opts.PageSize; db.pageSize == 0 {
   172  		db.pageSize = defaultPageSize
   173  	}
   174  
   175  	if info, err := db.file.Stat(); err != nil {
   176  		_ = db.close()
   177  		return nil, err
   178  	} else if info.Size() == 0 {
   179  		if err := db.init(); err != nil {
   180  			_ = db.close()
   181  			return nil, err
   182  		}
   183  	} else {
   184  		var buf [0x1000]byte
   185  		if bw, err := db.file.ReadAt(buf[:], 0); err == nil && bw == len(buf) {
   186  			if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil {
   187  				db.pageSize = int(m.pageSize)
   188  			}
   189  		} else {
   190  			_ = db.close()
   191  			return nil, ErrInvalid
   192  		}
   193  	}
   194  
   195  	db.pagePool = sync.Pool{
   196  		New: func() interface{} {
   197  			return make([]byte, db.pageSize)
   198  		},
   199  	}
   200  
   201  	if err := db.mmap(opts.InitialMmapSize); err != nil {
   202  		_ = db.close()
   203  		return nil, err
   204  	}
   205  
   206  	if db.readOnly {
   207  		return db, nil
   208  	}
   209  
   210  	db.version = version
   211  	db.loadFreelist()
   212  	if err := db.upgradeFreelistVersion(); err != nil {
   213  		_ = db.close()
   214  		return nil, err
   215  	}
   216  
   217  	if !db.NoFreelistSync && !db.hasSyncedFreelist() {
   218  		tx, err := db.Begin(true)
   219  		if tx != nil {
   220  			err = tx.Commit()
   221  		}
   222  		if err != nil {
   223  			_ = db.close()
   224  			return nil, err
   225  		}
   226  	}
   227  
   228  	return db, nil
   229  }
   230  
   231  func (db *DB) loadFreelist() {
   232  	db.freelistLoad.Do(func() {
   233  		db.freelist = newFreelist(db.FreelistType)
   234  		if !db.hasSyncedFreelist() {
   235  			db.freelist.readIDs(db.freepages())
   236  		} else {
   237  			dbMeta := db.meta()
   238  			db.logger.Infof("[BDB %d] load freelist [version:%d] [txid:%d]", db.index, dbMeta.version, dbMeta.txid)
   239  			if dbMeta.version == versionFreelistBitmap {
   240  				db.freelist.readFromBitmap(db.page(dbMeta.freelist))
   241  			} else {
   242  				db.freelist.read(db.page(dbMeta.freelist))
   243  				db.upgradeFreelist = true
   244  			}
   245  		}
   246  		db.stats.FreePageN = db.freelist.free_count()
   247  	})
   248  }
   249  
   250  func (db *DB) upgradeFreelistVersion() error {
   251  	if !db.upgradeFreelist {
   252  		return nil
   253  	}
   254  
   255  	tx, err := db.Begin(true)
   256  	if tx != nil {
   257  		err = tx.Commit()
   258  	}
   259  	if err != nil {
   260  		_ = db.close()
   261  		return err
   262  	}
   263  
   264  	db.upgradeFreelist = false
   265  	db.logger.Infof("bdb upgradeFreelistVersion success [bdb:%s] [version:%d] [txid:%d]", db.path, db.meta().version, db.meta().txid)
   266  	return nil
   267  }
   268  
   269  func (db *DB) hasSyncedFreelist() bool {
   270  	return db.meta().freelist != pgidNoFreelist
   271  }
   272  
   273  func (db *DB) mmap(minsz int) error {
   274  	db.mmaplock.Lock()
   275  	defer db.mmaplock.Unlock()
   276  
   277  	info, err := db.file.Stat()
   278  	if err != nil {
   279  		return errors.Wrap(err, "mmap stat err")
   280  	} else if int(info.Size()) < db.pageSize*2 {
   281  		return errors.New("file size too small")
   282  	}
   283  
   284  	fileSize := int(info.Size())
   285  	var size = fileSize
   286  	if size < minsz {
   287  		size = minsz
   288  	}
   289  	size, err = db.mmapSize(size)
   290  	if err != nil {
   291  		return err
   292  	}
   293  
   294  	if db.Mlock {
   295  		if err := db.munlock(fileSize); err != nil {
   296  			return err
   297  		}
   298  	}
   299  
   300  	if db.rwtx != nil {
   301  		db.rwtx.root.dereference()
   302  	}
   303  
   304  	if err := db.munmap(); err != nil {
   305  		return err
   306  	}
   307  
   308  	if err := mmap(db, size); err != nil {
   309  		return err
   310  	}
   311  
   312  	if db.Mlock {
   313  		if err := db.mlock(fileSize); err != nil {
   314  			return err
   315  		}
   316  	}
   317  
   318  	db.meta0 = db.page(0).meta()
   319  	db.meta1 = db.page(1).meta()
   320  
   321  	err0 := db.meta0.validate()
   322  	err1 := db.meta1.validate()
   323  	if err0 != nil && err1 != nil {
   324  		return err0
   325  	}
   326  
   327  	return nil
   328  }
   329  
   330  func (db *DB) munmap() error {
   331  	if err := munmap(db); err != nil {
   332  		return errors.Wrap(err, "unmap err")
   333  	}
   334  	return nil
   335  }
   336  
   337  func (db *DB) mmapSize(size int) (int, error) {
   338  	for i := uint(15); i <= 30; i++ {
   339  		if size <= 1<<i {
   340  			return 1 << i, nil
   341  		}
   342  	}
   343  
   344  	if size > maxMapSize {
   345  		return 0, errors.New("mmap too large")
   346  	}
   347  
   348  	sz := int64(size)
   349  	if remainder := sz % int64(maxMmapStep); remainder > 0 {
   350  		sz += int64(maxMmapStep) - remainder
   351  	}
   352  
   353  	pageSize := int64(db.pageSize)
   354  	if (sz % pageSize) != 0 {
   355  		sz = ((sz / pageSize) + 1) * pageSize
   356  	}
   357  
   358  	if sz > maxMapSize {
   359  		sz = maxMapSize
   360  	}
   361  
   362  	return int(sz), nil
   363  }
   364  
   365  func (db *DB) munlock(fileSize int) error {
   366  	if err := munlock(db, fileSize); err != nil {
   367  		return errors.Wrap(err, "munlock err")
   368  	}
   369  	return nil
   370  }
   371  
   372  func (db *DB) mlock(fileSize int) error {
   373  	if err := mlock(db, fileSize); err != nil {
   374  		return errors.Wrap(err, "mlock err")
   375  	}
   376  	return nil
   377  }
   378  
   379  func (db *DB) mrelock(fileSizeFrom, fileSizeTo int) error {
   380  	if err := db.munlock(fileSizeFrom); err != nil {
   381  		return err
   382  	}
   383  	if err := db.mlock(fileSizeTo); err != nil {
   384  		return err
   385  	}
   386  	return nil
   387  }
   388  
   389  func (db *DB) init() error {
   390  	buf := make([]byte, db.pageSize*4)
   391  	for i := 0; i < 2; i++ {
   392  		p := db.pageInBuffer(buf, pgid(i))
   393  		p.id = pgid(i)
   394  		p.flags = metaPageFlag
   395  
   396  		m := p.meta()
   397  		m.magic = magic
   398  		m.version = version
   399  		m.pageSize = uint32(db.pageSize)
   400  		m.freelist = 2
   401  		m.root = bucket{root: 3}
   402  		m.pgid = 4
   403  		m.txid = txid(i)
   404  		m.checksum = m.sum64()
   405  	}
   406  
   407  	p := db.pageInBuffer(buf, pgid(2))
   408  	p.id = pgid(2)
   409  	p.flags = freelistPageFlag
   410  	p.count = 0
   411  
   412  	p = db.pageInBuffer(buf, pgid(3))
   413  	p.id = pgid(3)
   414  	p.flags = leafPageFlag
   415  	p.count = 0
   416  
   417  	if _, err := db.ops.writeAt(buf, 0); err != nil {
   418  		return err
   419  	}
   420  	if err := fdatasync(db); err != nil {
   421  		return err
   422  	}
   423  	db.filesz = len(buf)
   424  
   425  	return nil
   426  }
   427  
   428  func (db *DB) Close() error {
   429  	db.rwlock.Lock()
   430  	defer db.rwlock.Unlock()
   431  
   432  	db.metalock.Lock()
   433  	defer db.metalock.Unlock()
   434  
   435  	db.mmaplock.Lock()
   436  	defer db.mmaplock.Unlock()
   437  
   438  	return db.close()
   439  }
   440  
   441  func (db *DB) close() error {
   442  	if !db.opened {
   443  		return nil
   444  	}
   445  
   446  	db.opened = false
   447  
   448  	db.freelist = nil
   449  
   450  	db.ops.writeAt = nil
   451  
   452  	if err := db.munmap(); err != nil {
   453  		return err
   454  	}
   455  
   456  	if db.file != nil {
   457  		if !db.readOnly {
   458  			if err := funlock(db); err != nil {
   459  				return errors.Wrap(err, "funlock err")
   460  			}
   461  		}
   462  
   463  		if err := db.file.Close(); err != nil {
   464  			return errors.Wrap(err, "file close err")
   465  		}
   466  		db.file = nil
   467  	}
   468  
   469  	db.path = ""
   470  	return nil
   471  }
   472  
   473  func (db *DB) Begin(writable bool) (*Tx, error) {
   474  	if writable {
   475  		return db.beginRWTx()
   476  	}
   477  	return db.beginTx()
   478  }
   479  
   480  func (db *DB) beginTx() (*Tx, error) {
   481  	db.metalock.Lock()
   482  	db.mmaplock.RLock()
   483  
   484  	if !db.opened {
   485  		db.mmaplock.RUnlock()
   486  		db.metalock.Unlock()
   487  		return nil, ErrDatabaseNotOpen
   488  	}
   489  
   490  	t := &Tx{}
   491  	t.init(db)
   492  
   493  	db.txs = append(db.txs, t)
   494  	n := len(db.txs)
   495  
   496  	db.metalock.Unlock()
   497  
   498  	db.statlock.Lock()
   499  	db.stats.TxN++
   500  	db.stats.OpenTxN = n
   501  	db.statlock.Unlock()
   502  
   503  	return t, nil
   504  }
   505  
   506  func (db *DB) beginRWTx() (*Tx, error) {
   507  	if db.readOnly {
   508  		return nil, ErrDatabaseReadOnly
   509  	}
   510  
   511  	db.rwlock.Lock()
   512  
   513  	db.metalock.Lock()
   514  	defer db.metalock.Unlock()
   515  
   516  	if !db.opened {
   517  		db.rwlock.Unlock()
   518  		return nil, ErrDatabaseNotOpen
   519  	}
   520  
   521  	t := &Tx{writable: true}
   522  	t.init(db)
   523  	db.rwtx = t
   524  	db.freePages()
   525  	return t, nil
   526  }
   527  
   528  func (db *DB) freePages() {
   529  	var m, m1, m2, m3 pgids
   530  	sort.Sort(txsById(db.txs))
   531  	minid := txid(0xFFFFFFFFFFFFFFFF)
   532  	if len(db.txs) > 0 {
   533  		minid = db.txs[0].meta.txid
   534  	}
   535  	if minid > 0 {
   536  		m1 = db.freelist.release(minid - 1)
   537  	}
   538  	for _, t := range db.txs {
   539  		m2 = db.freelist.releaseRange(minid, t.meta.txid-1)
   540  		minid = t.meta.txid + 1
   541  	}
   542  	m3 = db.freelist.releaseRange(minid, txid(0xFFFFFFFFFFFFFFFF))
   543  
   544  	dstCache := make(map[pgid]struct{}, 0)
   545  	mergePgids := func(pids pgids) {
   546  		if len(pids) == 0 {
   547  			return
   548  		}
   549  		for i := range pids {
   550  			if _, ok := dstCache[pids[i]]; !ok {
   551  				dstCache[pids[i]] = struct{}{}
   552  				m = append(m, pids[i])
   553  			}
   554  		}
   555  	}
   556  	mergePgids(m1)
   557  	mergePgids(m2)
   558  	mergePgids(m3)
   559  	db.freePagesForTask(m)
   560  }
   561  
   562  func (db *DB) freePagesForTask(pids pgids) {
   563  	pidLen := len(pids)
   564  	if pidLen == 0 {
   565  		return
   566  	}
   567  
   568  	sort.Sort(pids)
   569  
   570  	var skipPid pgid
   571  	pns := make([]uint32, 0, 16)
   572  	pnsDup := make(map[uint32]struct{})
   573  
   574  	parseValue := func(v []byte) {
   575  		if len(v) != 4 {
   576  			return
   577  		}
   578  
   579  		pn := utils.BytesToUint32(v)
   580  		if db.ops.checkPageSplitted(pn) {
   581  			if _, exist := pnsDup[pn]; !exist {
   582  				pns = append(pns, pn)
   583  				pnsDup[pn] = struct{}{}
   584  			}
   585  		}
   586  	}
   587  
   588  	for index := 0; index < pidLen; index++ {
   589  		pid := pids[index]
   590  		if pid <= skipPid {
   591  			continue
   592  		}
   593  		p := db.page(pid)
   594  		if p.overflow > 0 {
   595  			skipPid = pid + pgid(p.overflow)
   596  		}
   597  		if (p.flags & leafPageFlag) == 0 {
   598  			continue
   599  		}
   600  
   601  		for i := 0; i < int(p.count); i++ {
   602  			elem := p.leafPageElement(uint16(i))
   603  			if elem.flags == bucketLeafFlag {
   604  				if !bytes.Equal(consts.BdbBucketName, elem.key()) {
   605  					continue
   606  				}
   607  				bktPage := openBucketPage(elem.value())
   608  				if bktPage == nil || (bktPage.flags&leafPageFlag) == 0 {
   609  					continue
   610  				}
   611  
   612  				for j := 0; j < int(bktPage.count); j++ {
   613  					e := bktPage.leafPageElement(uint16(j))
   614  					parseValue(e.value())
   615  				}
   616  			} else {
   617  				parseValue(elem.value())
   618  			}
   619  		}
   620  	}
   621  
   622  	if len(pns) == 0 {
   623  		return
   624  	}
   625  
   626  	db.ops.pushTaskCB(&bitask.BitpageTaskData{
   627  		Index: db.index,
   628  		Event: bitask.BitpageEventFreePage,
   629  		Pns:   pns,
   630  	})
   631  }
   632  
   633  type txsById []*Tx
   634  
   635  func (t txsById) Len() int           { return len(t) }
   636  func (t txsById) Swap(i, j int)      { t[i], t[j] = t[j], t[i] }
   637  func (t txsById) Less(i, j int) bool { return t[i].meta.txid < t[j].meta.txid }
   638  
   639  func (db *DB) removeTx(tx *Tx) {
   640  	db.mmaplock.RUnlock()
   641  	db.metalock.Lock()
   642  
   643  	for i, t := range db.txs {
   644  		if t == tx {
   645  			last := len(db.txs) - 1
   646  			db.txs[i] = db.txs[last]
   647  			db.txs[last] = nil
   648  			db.txs = db.txs[:last]
   649  			break
   650  		}
   651  	}
   652  	n := len(db.txs)
   653  
   654  	db.metalock.Unlock()
   655  
   656  	db.statlock.Lock()
   657  	db.stats.OpenTxN = n
   658  	db.stats.TxStats.add(&tx.stats)
   659  	db.statlock.Unlock()
   660  }
   661  
   662  func (db *DB) NewIter(rtx *ReadTx) *BdbIterator {
   663  	return &BdbIterator{
   664  		iter: rtx.Bucket().Cursor(),
   665  		rTx:  rtx,
   666  	}
   667  }
   668  
   669  func (db *DB) Update(fn func(*Tx) error) error {
   670  	t, err := db.Begin(true)
   671  	if err != nil {
   672  		return err
   673  	}
   674  
   675  	defer func() {
   676  		if t.db != nil {
   677  			t.rollback()
   678  		}
   679  	}()
   680  
   681  	t.managed = true
   682  
   683  	err = fn(t)
   684  	t.managed = false
   685  	if err != nil {
   686  		_ = t.Rollback()
   687  		return err
   688  	}
   689  
   690  	return t.Commit()
   691  }
   692  
   693  func (db *DB) View(fn func(*Tx) error) error {
   694  	t, err := db.Begin(false)
   695  	if err != nil {
   696  		return err
   697  	}
   698  
   699  	defer func() {
   700  		if t.db != nil {
   701  			t.rollback()
   702  		}
   703  	}()
   704  
   705  	t.managed = true
   706  
   707  	err = fn(t)
   708  	t.managed = false
   709  	if err != nil {
   710  		_ = t.Rollback()
   711  		return err
   712  	}
   713  
   714  	return t.Rollback()
   715  }
   716  
   717  func (db *DB) Batch(fn func(*Tx) error) error {
   718  	errCh := make(chan error, 1)
   719  
   720  	db.batchMu.Lock()
   721  	if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
   722  		db.batch = &batch{
   723  			db: db,
   724  		}
   725  		db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger)
   726  	}
   727  	db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
   728  	if len(db.batch.calls) >= db.MaxBatchSize {
   729  		go db.batch.trigger()
   730  	}
   731  	db.batchMu.Unlock()
   732  
   733  	err := <-errCh
   734  	if err == trySolo {
   735  		err = db.Update(fn)
   736  	}
   737  	return err
   738  }
   739  
   740  type call struct {
   741  	fn  func(*Tx) error
   742  	err chan<- error
   743  }
   744  
   745  type batch struct {
   746  	db    *DB
   747  	timer *time.Timer
   748  	start sync.Once
   749  	calls []call
   750  }
   751  
   752  func (b *batch) trigger() {
   753  	b.start.Do(b.run)
   754  }
   755  
   756  func (b *batch) run() {
   757  	b.db.batchMu.Lock()
   758  	b.timer.Stop()
   759  	if b.db.batch == b {
   760  		b.db.batch = nil
   761  	}
   762  	b.db.batchMu.Unlock()
   763  
   764  retry:
   765  	for len(b.calls) > 0 {
   766  		var failIdx = -1
   767  		err := b.db.Update(func(tx *Tx) error {
   768  			for i, c := range b.calls {
   769  				if err := safelyCall(c.fn, tx); err != nil {
   770  					failIdx = i
   771  					return err
   772  				}
   773  			}
   774  			return nil
   775  		})
   776  
   777  		if failIdx >= 0 {
   778  			c := b.calls[failIdx]
   779  			b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1]
   780  			c.err <- trySolo
   781  			continue retry
   782  		}
   783  
   784  		for _, c := range b.calls {
   785  			c.err <- err
   786  		}
   787  		break retry
   788  	}
   789  }
   790  
   791  var trySolo = errors.New("batch function returned an error and should be re-run solo")
   792  
   793  type panicked struct {
   794  	reason interface{}
   795  }
   796  
   797  func (p panicked) Error() string {
   798  	if err, ok := p.reason.(error); ok {
   799  		return err.Error()
   800  	}
   801  	return fmt.Sprintf("panic: %v", p.reason)
   802  }
   803  
   804  func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
   805  	defer func() {
   806  		if p := recover(); p != nil {
   807  			err = panicked{p}
   808  		}
   809  	}()
   810  	return fn(tx)
   811  }
   812  
   813  func (db *DB) Sync() error { return fdatasync(db) }
   814  
   815  func (db *DB) Stats() Stats {
   816  	db.statlock.RLock()
   817  	defer db.statlock.RUnlock()
   818  	return db.stats
   819  }
   820  
   821  func (db *DB) DiskSize() int64 {
   822  	if info, err := db.file.Stat(); err != nil {
   823  		return 0
   824  	} else {
   825  		return info.Size()
   826  	}
   827  }
   828  
   829  func (db *DB) TotalPage() int {
   830  	return int(db.DiskSize() / int64(db.pageSize))
   831  }
   832  
   833  func (db *DB) FreePage() int {
   834  	stat := db.Stats()
   835  	return stat.FreePageN
   836  }
   837  
   838  func (db *DB) GetPageSize() int {
   839  	return db.pageSize
   840  }
   841  
   842  func (db *DB) Info() *Info {
   843  	return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize}
   844  }
   845  
   846  func (db *DB) page(id pgid) *page {
   847  	pos := id * pgid(db.pageSize)
   848  	return (*page)(unsafe.Pointer(&db.data[pos]))
   849  }
   850  
   851  func (db *DB) pageInBuffer(b []byte, id pgid) *page {
   852  	return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)]))
   853  }
   854  
   855  func (db *DB) meta() *meta {
   856  	metaA := db.meta0
   857  	metaB := db.meta1
   858  	if db.meta1.txid > db.meta0.txid {
   859  		metaA = db.meta1
   860  		metaB = db.meta0
   861  	}
   862  
   863  	if err := metaA.validate(); err == nil {
   864  		return metaA
   865  	} else if err := metaB.validate(); err == nil {
   866  		return metaB
   867  	}
   868  
   869  	panic("bdb.DB.meta(): invalid meta pages")
   870  }
   871  
   872  func (db *DB) allocate(txid txid, count int) (*page, bool, error) {
   873  	var buf []byte
   874  	if count == 1 {
   875  		buf = db.pagePool.Get().([]byte)
   876  	} else {
   877  		buf = make([]byte, count*db.pageSize)
   878  	}
   879  	p := (*page)(unsafe.Pointer(&buf[0]))
   880  	p.overflow = uint32(count - 1)
   881  
   882  	if p.id = db.freelist.allocate(txid, count); p.id != 0 {
   883  		return p, true, nil
   884  	}
   885  
   886  	p.id = db.rwtx.meta.pgid
   887  	var minsz = int((p.id+pgid(count))+1) * db.pageSize
   888  	if minsz >= db.datasz {
   889  		if err := db.mmap(minsz); err != nil {
   890  			return nil, false, errors.Wrap(err, "mmap allocate err")
   891  		}
   892  	}
   893  
   894  	db.rwtx.meta.pgid += pgid(count)
   895  
   896  	return p, false, nil
   897  }
   898  
   899  func (db *DB) grow(sz int) error {
   900  	if sz <= db.filesz {
   901  		return nil
   902  	}
   903  
   904  	if db.datasz < db.AllocSize {
   905  		sz = db.datasz
   906  	} else {
   907  		sz += db.AllocSize
   908  	}
   909  
   910  	if !db.NoGrowSync && !db.readOnly {
   911  		if runtime.GOOS != "windows" {
   912  			if err := db.file.Truncate(int64(sz)); err != nil {
   913  				return errors.Wrap(err, "file resize err")
   914  			}
   915  		}
   916  		if err := db.file.Sync(); err != nil {
   917  			return errors.Wrap(err, "file sync err")
   918  		}
   919  		if db.Mlock {
   920  			if err := db.mrelock(db.filesz, sz); err != nil {
   921  				return errors.Wrap(err, "mlock/munlock err")
   922  			}
   923  		}
   924  	}
   925  
   926  	db.filesz = sz
   927  	return nil
   928  }
   929  
   930  func (db *DB) IsReadOnly() bool {
   931  	return db.readOnly
   932  }
   933  
   934  func (db *DB) freepages() []pgid {
   935  	tx, err := db.beginTx()
   936  	defer func() {
   937  		err = tx.Rollback()
   938  		if err != nil {
   939  			panic("freepages: failed to rollback tx")
   940  		}
   941  	}()
   942  	if err != nil {
   943  		panic("freepages: failed to open read only tx")
   944  	}
   945  
   946  	reachable := make(map[pgid]*page)
   947  	nofreed := make(map[pgid]bool)
   948  	ech := make(chan error)
   949  	go func() {
   950  		for e := range ech {
   951  			panic(fmt.Sprintf("freepages: failed to get all reachable pages (%v)", e))
   952  		}
   953  	}()
   954  	tx.checkBucket(&tx.root, reachable, nofreed, ech)
   955  	close(ech)
   956  
   957  	var fids []pgid
   958  	for i := pgid(2); i < db.meta().pgid; i++ {
   959  		if _, ok := reachable[i]; !ok {
   960  			fids = append(fids, i)
   961  		}
   962  	}
   963  	return fids
   964  }
   965  
   966  type Stats struct {
   967  	FreePageN     int
   968  	PendingPageN  int
   969  	FreeAlloc     int
   970  	FreelistInuse int
   971  	TxN           int
   972  	OpenTxN       int
   973  	TxStats       TxStats
   974  }
   975  
   976  func (s *Stats) Sub(other *Stats) Stats {
   977  	if other == nil {
   978  		return *s
   979  	}
   980  	var diff Stats
   981  	diff.FreePageN = s.FreePageN
   982  	diff.PendingPageN = s.PendingPageN
   983  	diff.FreeAlloc = s.FreeAlloc
   984  	diff.FreelistInuse = s.FreelistInuse
   985  	diff.TxN = s.TxN - other.TxN
   986  	diff.TxStats = s.TxStats.Sub(&other.TxStats)
   987  	return diff
   988  }
   989  
   990  type Info struct {
   991  	Data     uintptr
   992  	PageSize int
   993  }
   994  
   995  type meta struct {
   996  	magic    uint32
   997  	version  uint32
   998  	pageSize uint32
   999  	flags    uint32
  1000  	root     bucket
  1001  	freelist pgid
  1002  	pgid     pgid
  1003  	txid     txid
  1004  	checksum uint64
  1005  }
  1006  
  1007  func (m *meta) validate() error {
  1008  	if m.magic != magic {
  1009  		return ErrInvalid
  1010  	} else if m.version != versionFreelistUint && m.version != versionFreelistBitmap {
  1011  		return ErrVersionMismatch
  1012  	} else if m.checksum != 0 && m.checksum != m.sum64() {
  1013  		return ErrChecksum
  1014  	}
  1015  	return nil
  1016  }
  1017  
  1018  func (m *meta) copy(dest *meta) {
  1019  	*dest = *m
  1020  }
  1021  
  1022  func (m *meta) setVersion(ver uint32) {
  1023  	m.version = ver
  1024  }
  1025  
  1026  func (m *meta) write(p *page) {
  1027  	if m.root.root >= m.pgid {
  1028  		panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
  1029  	} else if m.freelist >= m.pgid && m.freelist != pgidNoFreelist {
  1030  		panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
  1031  	}
  1032  
  1033  	p.id = pgid(m.txid % 2)
  1034  	p.flags |= metaPageFlag
  1035  
  1036  	m.checksum = m.sum64()
  1037  
  1038  	m.copy(p.meta())
  1039  }
  1040  
  1041  func (m *meta) sum64() uint64 {
  1042  	var h = fnv.New64a()
  1043  	_, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
  1044  	return h.Sum64()
  1045  }
  1046  
  1047  func _assert(condition bool, msg string, v ...interface{}) {
  1048  	if !condition {
  1049  		panic(fmt.Sprintf("assertion failed: "+msg, v...))
  1050  	}
  1051  }