github.com/ooni/psiphon/tunnel-core@v0.0.0-20230105123940-fe12a24c96ee/oovendor/bolt/tx.go (about)

     1  package bolt
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"os"
     7  	"sort"
     8  	"strings"
     9  	"time"
    10  	"unsafe"
    11  )
    12  
    13  // txid represents the internal transaction identifier.
    14  type txid uint64
    15  
    16  // Tx represents a read-only or read/write transaction on the database.
    17  // Read-only transactions can be used for retrieving values for keys and creating cursors.
    18  // Read/write transactions can create and remove buckets and create and remove keys.
    19  //
    20  // IMPORTANT: You must commit or rollback transactions when you are done with
    21  // them. Pages can not be reclaimed by the writer until no more transactions
    22  // are using them. A long running read transaction can cause the database to
    23  // quickly grow.
    24  type Tx struct {
    25  	writable       bool
    26  	managed        bool
    27  	db             *DB
    28  	meta           *meta
    29  	root           Bucket
    30  	pages          map[pgid]*page
    31  	stats          TxStats
    32  	commitHandlers []func()
    33  
    34  	// WriteFlag specifies the flag for write-related methods like WriteTo().
    35  	// Tx opens the database file with the specified flag to copy the data.
    36  	//
    37  	// By default, the flag is unset, which works well for mostly in-memory
    38  	// workloads. For databases that are much larger than available RAM,
    39  	// set the flag to syscall.O_DIRECT to avoid trashing the page cache.
    40  	WriteFlag int
    41  }
    42  
    43  // init initializes the transaction.
    44  func (tx *Tx) init(db *DB) {
    45  	tx.db = db
    46  	tx.pages = nil
    47  
    48  	// Copy the meta page since it can be changed by the writer.
    49  	tx.meta = &meta{}
    50  	db.meta().copy(tx.meta)
    51  
    52  	// Copy over the root bucket.
    53  	tx.root = newBucket(tx)
    54  	tx.root.bucket = &bucket{}
    55  	*tx.root.bucket = tx.meta.root
    56  
    57  	// Increment the transaction id and add a page cache for writable transactions.
    58  	if tx.writable {
    59  		tx.pages = make(map[pgid]*page)
    60  		tx.meta.txid += txid(1)
    61  	}
    62  }
    63  
    64  // ID returns the transaction id.
    65  func (tx *Tx) ID() int {
    66  	return int(tx.meta.txid)
    67  }
    68  
    69  // DB returns a reference to the database that created the transaction.
    70  func (tx *Tx) DB() *DB {
    71  	return tx.db
    72  }
    73  
    74  // Size returns current database size in bytes as seen by this transaction.
    75  func (tx *Tx) Size() int64 {
    76  	return int64(tx.meta.pgid) * int64(tx.db.pageSize)
    77  }
    78  
    79  // Writable returns whether the transaction can perform write operations.
    80  func (tx *Tx) Writable() bool {
    81  	return tx.writable
    82  }
    83  
    84  // Cursor creates a cursor associated with the root bucket.
    85  // All items in the cursor will return a nil value because all root bucket keys point to buckets.
    86  // The cursor is only valid as long as the transaction is open.
    87  // Do not use a cursor after the transaction is closed.
    88  func (tx *Tx) Cursor() *Cursor {
    89  	return tx.root.Cursor()
    90  }
    91  
    92  // Stats retrieves a copy of the current transaction statistics.
    93  func (tx *Tx) Stats() TxStats {
    94  	return tx.stats
    95  }
    96  
    97  // Bucket retrieves a bucket by name.
    98  // Returns nil if the bucket does not exist.
    99  // The bucket instance is only valid for the lifetime of the transaction.
   100  func (tx *Tx) Bucket(name []byte) *Bucket {
   101  	return tx.root.Bucket(name)
   102  }
   103  
   104  // CreateBucket creates a new bucket.
   105  // Returns an error if the bucket already exists, if the bucket name is blank, or if the bucket name is too long.
   106  // The bucket instance is only valid for the lifetime of the transaction.
   107  func (tx *Tx) CreateBucket(name []byte) (*Bucket, error) {
   108  	return tx.root.CreateBucket(name)
   109  }
   110  
   111  // CreateBucketIfNotExists creates a new bucket if it doesn't already exist.
   112  // Returns an error if the bucket name is blank, or if the bucket name is too long.
   113  // The bucket instance is only valid for the lifetime of the transaction.
   114  func (tx *Tx) CreateBucketIfNotExists(name []byte) (*Bucket, error) {
   115  	return tx.root.CreateBucketIfNotExists(name)
   116  }
   117  
   118  // DeleteBucket deletes a bucket.
   119  // Returns an error if the bucket cannot be found or if the key represents a non-bucket value.
   120  func (tx *Tx) DeleteBucket(name []byte) error {
   121  	return tx.root.DeleteBucket(name)
   122  }
   123  
   124  // ForEach executes a function for each bucket in the root.
   125  // If the provided function returns an error then the iteration is stopped and
   126  // the error is returned to the caller.
   127  func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error {
   128  	return tx.root.ForEach(func(k, v []byte) error {
   129  		return fn(k, tx.root.Bucket(k))
   130  	})
   131  }
   132  
   133  // OnCommit adds a handler function to be executed after the transaction successfully commits.
   134  func (tx *Tx) OnCommit(fn func()) {
   135  	tx.commitHandlers = append(tx.commitHandlers, fn)
   136  }
   137  
   138  // Commit writes all changes to disk and updates the meta page.
   139  // Returns an error if a disk write error occurs, or if Commit is
   140  // called on a read-only transaction.
   141  func (tx *Tx) Commit() error {
   142  	_assert(!tx.managed, "managed tx commit not allowed")
   143  	if tx.db == nil {
   144  		return ErrTxClosed
   145  	} else if !tx.writable {
   146  		return ErrTxNotWritable
   147  	}
   148  
   149  	// TODO(benbjohnson): Use vectorized I/O to write out dirty pages.
   150  
   151  	// Rebalance nodes which have had deletions.
   152  	var startTime = time.Now()
   153  	tx.root.rebalance()
   154  	if tx.stats.Rebalance > 0 {
   155  		tx.stats.RebalanceTime += time.Since(startTime)
   156  	}
   157  
   158  	// spill data onto dirty pages.
   159  	startTime = time.Now()
   160  	if err := tx.root.spill(); err != nil {
   161  		tx.rollback()
   162  		return err
   163  	}
   164  	tx.stats.SpillTime += time.Since(startTime)
   165  
   166  	// Free the old root bucket.
   167  	tx.meta.root.root = tx.root.root
   168  
   169  	// Free the old freelist because commit writes out a fresh freelist.
   170  	if tx.meta.freelist != pgidNoFreelist {
   171  		tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
   172  	}
   173  
   174  	if !tx.db.NoFreelistSync {
   175  		err := tx.commitFreelist()
   176  		if err != nil {
   177  			return err
   178  		}
   179  	} else {
   180  		tx.meta.freelist = pgidNoFreelist
   181  	}
   182  
   183  	// Write dirty pages to disk.
   184  	startTime = time.Now()
   185  	if err := tx.write(); err != nil {
   186  		tx.rollback()
   187  		return err
   188  	}
   189  
   190  	// If strict mode is enabled then perform a consistency check.
   191  	// Only the first consistency error is reported in the panic.
   192  	if tx.db.StrictMode {
   193  		ch := tx.Check()
   194  		var errs []string
   195  		for {
   196  			err, ok := <-ch
   197  			if !ok {
   198  				break
   199  			}
   200  			errs = append(errs, err.Error())
   201  		}
   202  		if len(errs) > 0 {
   203  			panic("check fail: " + strings.Join(errs, "\n"))
   204  		}
   205  	}
   206  
   207  	// Write meta to disk.
   208  	if err := tx.writeMeta(); err != nil {
   209  		tx.rollback()
   210  		return err
   211  	}
   212  	tx.stats.WriteTime += time.Since(startTime)
   213  
   214  	// Finalize the transaction.
   215  	tx.close()
   216  
   217  	// Execute commit handlers now that the locks have been removed.
   218  	for _, fn := range tx.commitHandlers {
   219  		fn()
   220  	}
   221  
   222  	return nil
   223  }
   224  
   225  func (tx *Tx) commitFreelist() error {
   226  	// Allocate new pages for the new free list. This will overestimate
   227  	// the size of the freelist but not underestimate the size (which would be bad).
   228  	opgid := tx.meta.pgid
   229  	p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
   230  	if err != nil {
   231  		tx.rollback()
   232  		return err
   233  	}
   234  	if err := tx.db.freelist.write(p); err != nil {
   235  		tx.rollback()
   236  		return err
   237  	}
   238  	tx.meta.freelist = p.id
   239  	// If the high water mark has moved up then attempt to grow the database.
   240  	if tx.meta.pgid > opgid {
   241  		if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
   242  			tx.rollback()
   243  			return err
   244  		}
   245  	}
   246  
   247  	return nil
   248  }
   249  
   250  // Rollback closes the transaction and ignores all previous updates. Read-only
   251  // transactions must be rolled back and not committed.
   252  func (tx *Tx) Rollback() error {
   253  	_assert(!tx.managed, "managed tx rollback not allowed")
   254  	if tx.db == nil {
   255  		return ErrTxClosed
   256  	}
   257  	tx.nonPhysicalRollback()
   258  	return nil
   259  }
   260  
   261  // nonPhysicalRollback is called when user calls Rollback directly, in this case we do not need to reload the free pages from disk.
   262  func (tx *Tx) nonPhysicalRollback() {
   263  	if tx.db == nil {
   264  		return
   265  	}
   266  	if tx.writable {
   267  		tx.db.freelist.rollback(tx.meta.txid)
   268  	}
   269  	tx.close()
   270  }
   271  
   272  // rollback needs to reload the free pages from disk in case some system error happens like fsync error.
   273  func (tx *Tx) rollback() {
   274  	if tx.db == nil {
   275  		return
   276  	}
   277  
   278  	// [Psiphon]
   279  	// https://github.com/etcd-io/bbolt/commit/b3e98dcb3752e0a8d5db6503b80fe19e462fdb73
   280  	// If the transaction failed due to mmap, rollback is futile.
   281  	if tx.db.mmapErr != nil {
   282  		tx.close()
   283  		return
   284  	}
   285  
   286  	if tx.writable {
   287  		tx.db.freelist.rollback(tx.meta.txid)
   288  		if !tx.db.hasSyncedFreelist() {
   289  			// Reconstruct free page list by scanning the DB to get the whole free page list.
   290  			// Note: scaning the whole db is heavy if your db size is large in NoSyncFreeList mode.
   291  			tx.db.freelist.noSyncReload(tx.db.freepages())
   292  		} else {
   293  			// Read free page list from freelist page.
   294  			tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
   295  		}
   296  	}
   297  	tx.close()
   298  }
   299  
   300  func (tx *Tx) close() {
   301  	if tx.db == nil {
   302  		return
   303  	}
   304  	if tx.writable {
   305  		// Grab freelist stats.
   306  		var freelistFreeN = tx.db.freelist.free_count()
   307  		var freelistPendingN = tx.db.freelist.pending_count()
   308  		var freelistAlloc = tx.db.freelist.size()
   309  
   310  		// Remove transaction ref & writer lock.
   311  		tx.db.rwtx = nil
   312  		tx.db.rwlock.Unlock()
   313  
   314  		// Merge statistics.
   315  		tx.db.statlock.Lock()
   316  		tx.db.stats.FreePageN = freelistFreeN
   317  		tx.db.stats.PendingPageN = freelistPendingN
   318  		tx.db.stats.FreeAlloc = (freelistFreeN + freelistPendingN) * tx.db.pageSize
   319  		tx.db.stats.FreelistInuse = freelistAlloc
   320  		tx.db.stats.TxStats.add(&tx.stats)
   321  		tx.db.statlock.Unlock()
   322  	} else {
   323  		tx.db.removeTx(tx)
   324  	}
   325  
   326  	// Clear all references.
   327  	tx.db = nil
   328  	tx.meta = nil
   329  	tx.root = Bucket{tx: tx}
   330  	tx.pages = nil
   331  }
   332  
   333  // Copy writes the entire database to a writer.
   334  // This function exists for backwards compatibility.
   335  //
   336  // Deprecated; Use WriteTo() instead.
   337  func (tx *Tx) Copy(w io.Writer) error {
   338  	_, err := tx.WriteTo(w)
   339  	return err
   340  }
   341  
   342  // WriteTo writes the entire database to a writer.
   343  // If err == nil then exactly tx.Size() bytes will be written into the writer.
   344  func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
   345  	// Attempt to open reader with WriteFlag
   346  	f, err := tx.db.openFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
   347  	if err != nil {
   348  		return 0, err
   349  	}
   350  	defer func() {
   351  		if cerr := f.Close(); err == nil {
   352  			err = cerr
   353  		}
   354  	}()
   355  
   356  	// Generate a meta page. We use the same page data for both meta pages.
   357  	buf := make([]byte, tx.db.pageSize)
   358  	page := (*page)(unsafe.Pointer(&buf[0]))
   359  	page.flags = metaPageFlag
   360  	*page.meta() = *tx.meta
   361  
   362  	// Write meta 0.
   363  	page.id = 0
   364  	page.meta().checksum = page.meta().sum64()
   365  	nn, err := w.Write(buf)
   366  	n += int64(nn)
   367  	if err != nil {
   368  		return n, fmt.Errorf("meta 0 copy: %s", err)
   369  	}
   370  
   371  	// Write meta 1 with a lower transaction id.
   372  	page.id = 1
   373  	page.meta().txid -= 1
   374  	page.meta().checksum = page.meta().sum64()
   375  	nn, err = w.Write(buf)
   376  	n += int64(nn)
   377  	if err != nil {
   378  		return n, fmt.Errorf("meta 1 copy: %s", err)
   379  	}
   380  
   381  	// Move past the meta pages in the file.
   382  	if _, err := f.Seek(int64(tx.db.pageSize*2), io.SeekStart); err != nil {
   383  		return n, fmt.Errorf("seek: %s", err)
   384  	}
   385  
   386  	// Copy data pages.
   387  	wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2))
   388  	n += wn
   389  	if err != nil {
   390  		return n, err
   391  	}
   392  
   393  	return n, nil
   394  }
   395  
   396  // CopyFile copies the entire database to file at the given path.
   397  // A reader transaction is maintained during the copy so it is safe to continue
   398  // using the database while a copy is in progress.
   399  func (tx *Tx) CopyFile(path string, mode os.FileMode) error {
   400  	f, err := tx.db.openFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
   401  	if err != nil {
   402  		return err
   403  	}
   404  
   405  	err = tx.Copy(f)
   406  	if err != nil {
   407  		_ = f.Close()
   408  		return err
   409  	}
   410  	return f.Close()
   411  }
   412  
   413  // Check performs several consistency checks on the database for this transaction.
   414  // An error is returned if any inconsistency is found.
   415  //
   416  // It can be safely run concurrently on a writable transaction. However, this
   417  // incurs a high cost for large databases and databases with a lot of subbuckets
   418  // because of caching. This overhead can be removed if running on a read-only
   419  // transaction, however, it is not safe to execute other writer transactions at
   420  // the same time.
   421  func (tx *Tx) Check() <-chan error {
   422  	ch := make(chan error)
   423  	// [Psiphon]
   424  	// This code is modified to use the single-error check function while
   425  	// preserving the existing bolt Check API.
   426  	go func() {
   427  		err := tx.check()
   428  		if err != nil {
   429  			ch <- err
   430  		}
   431  		close(ch)
   432  	}()
   433  	return ch
   434  }
   435  
   436  // [Psiphon]
   437  // SynchronousCheck performs the Check function in the current goroutine,
   438  // allowing the caller to recover from any panics or faults.
   439  func (tx *Tx) SynchronousCheck() error {
   440  	return tx.check()
   441  }
   442  
   443  // [Psiphon]
   444  // check is modified to stop and return on the first error. This prevents some
   445  // long running loops, perhaps due to looping based on corrupt data, that we
   446  // have observed when testing check against corrupted database files. Since
   447  // Psiphon will recover by resetting (deleting) the datastore on any error,
   448  // more than one error is not useful information in our case.
   449  func (tx *Tx) check() error {
   450  
   451  	// Force loading free list if opened in ReadOnly mode.
   452  	tx.db.loadFreelist()
   453  
   454  	// Check if any pages are double freed.
   455  	freed := make(map[pgid]bool)
   456  	all := make([]pgid, tx.db.freelist.count())
   457  	tx.db.freelist.copyall(all)
   458  	for _, id := range all {
   459  		if freed[id] {
   460  			return fmt.Errorf("page %d: already freed", id)
   461  		}
   462  		freed[id] = true
   463  	}
   464  
   465  	// Track every reachable page.
   466  	reachable := make(map[pgid]*page)
   467  	reachable[0] = tx.page(0) // meta0
   468  	reachable[1] = tx.page(1) // meta1
   469  	if tx.meta.freelist != pgidNoFreelist {
   470  		for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
   471  			reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
   472  		}
   473  	}
   474  
   475  	// Recursively check buckets.
   476  	err := tx.checkBucket(&tx.root, reachable, freed)
   477  	if err != nil {
   478  		return err
   479  	}
   480  
   481  	// Ensure all pages below high water mark are either reachable or freed.
   482  	for i := pgid(0); i < tx.meta.pgid; i++ {
   483  		_, isReachable := reachable[i]
   484  		if !isReachable && !freed[i] {
   485  			return fmt.Errorf("page %d: unreachable unfreed", int(i))
   486  		}
   487  	}
   488  
   489  	return nil
   490  }
   491  
   492  // [Psiphon]
   493  // checkBucket is modified to stop and return on the first error.
   494  func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool) error {
   495  	// Ignore inline buckets.
   496  	if b.root == 0 {
   497  		return nil
   498  	}
   499  
   500  	var err error
   501  
   502  	// Check every page used by this bucket.
   503  	b.tx.forEachPage(b.root, 0, func(p *page, _ int) {
   504  		if p.id > tx.meta.pgid {
   505  			err = fmt.Errorf("page %d: out of bounds: %d", int(p.id), int(b.tx.meta.pgid))
   506  			return
   507  		}
   508  
   509  		// Ensure each page is only referenced once.
   510  		for i := pgid(0); i <= pgid(p.overflow); i++ {
   511  			var id = p.id + i
   512  			if _, ok := reachable[id]; ok {
   513  				err = fmt.Errorf("page %d: multiple references", int(id))
   514  				return
   515  			}
   516  			reachable[id] = p
   517  		}
   518  
   519  		// We should only encounter un-freed leaf and branch pages.
   520  		if freed[p.id] {
   521  			err = fmt.Errorf("page %d: reachable freed", int(p.id))
   522  			return
   523  		} else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 {
   524  			err = fmt.Errorf("page %d: invalid type: %s", int(p.id), p.typ())
   525  			return
   526  		}
   527  	})
   528  
   529  	if err != nil {
   530  		return err
   531  	}
   532  
   533  	// Check each bucket within this bucket.
   534  	return b.ForEach(func(k, v []byte) error {
   535  		if child := b.Bucket(k); child != nil {
   536  			err := tx.checkBucket(child, reachable, freed)
   537  			if err != nil {
   538  				return err
   539  			}
   540  		}
   541  		return nil
   542  	})
   543  }
   544  
   545  // allocate returns a contiguous block of memory starting at a given page.
   546  func (tx *Tx) allocate(count int) (*page, error) {
   547  	p, err := tx.db.allocate(tx.meta.txid, count)
   548  	if err != nil {
   549  		return nil, err
   550  	}
   551  
   552  	// Save to our page cache.
   553  	tx.pages[p.id] = p
   554  
   555  	// Update statistics.
   556  	tx.stats.PageCount += count
   557  	tx.stats.PageAlloc += count * tx.db.pageSize
   558  
   559  	return p, nil
   560  }
   561  
   562  // write writes any dirty pages to disk.
   563  func (tx *Tx) write() error {
   564  	// Sort pages by id.
   565  	pages := make(pages, 0, len(tx.pages))
   566  	for _, p := range tx.pages {
   567  		pages = append(pages, p)
   568  	}
   569  	// Clear out page cache early.
   570  	tx.pages = make(map[pgid]*page)
   571  	sort.Sort(pages)
   572  
   573  	// Write pages to disk in order.
   574  	for _, p := range pages {
   575  		rem := (uint64(p.overflow) + 1) * uint64(tx.db.pageSize)
   576  		offset := int64(p.id) * int64(tx.db.pageSize)
   577  		var written uintptr
   578  
   579  		// Write out page in "max allocation" sized chunks.
   580  		for {
   581  			sz := rem
   582  			if sz > maxAllocSize-1 {
   583  				sz = maxAllocSize - 1
   584  			}
   585  			buf := unsafeByteSlice(unsafe.Pointer(p), written, 0, int(sz))
   586  
   587  			if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
   588  				return err
   589  			}
   590  
   591  			// Update statistics.
   592  			tx.stats.Write++
   593  
   594  			// Exit inner for loop if we've written all the chunks.
   595  			rem -= sz
   596  			if rem == 0 {
   597  				break
   598  			}
   599  
   600  			// Otherwise move offset forward and move pointer to next chunk.
   601  			offset += int64(sz)
   602  			written += uintptr(sz)
   603  		}
   604  	}
   605  
   606  	// Ignore file sync if flag is set on DB.
   607  	if !tx.db.NoSync || IgnoreNoSync {
   608  		if err := fdatasync(tx.db); err != nil {
   609  			return err
   610  		}
   611  	}
   612  
   613  	// Put small pages back to page pool.
   614  	for _, p := range pages {
   615  		// Ignore page sizes over 1 page.
   616  		// These are allocated using make() instead of the page pool.
   617  		if int(p.overflow) != 0 {
   618  			continue
   619  		}
   620  
   621  		buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, tx.db.pageSize)
   622  
   623  		// See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1
   624  		for i := range buf {
   625  			buf[i] = 0
   626  		}
   627  		tx.db.pagePool.Put(buf)
   628  	}
   629  
   630  	return nil
   631  }
   632  
   633  // writeMeta writes the meta to the disk.
   634  func (tx *Tx) writeMeta() error {
   635  	// Create a temporary buffer for the meta page.
   636  	buf := make([]byte, tx.db.pageSize)
   637  	p := tx.db.pageInBuffer(buf, 0)
   638  	tx.meta.write(p)
   639  
   640  	// Write the meta page to file.
   641  	if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil {
   642  		return err
   643  	}
   644  	if !tx.db.NoSync || IgnoreNoSync {
   645  		if err := fdatasync(tx.db); err != nil {
   646  			return err
   647  		}
   648  	}
   649  
   650  	// Update statistics.
   651  	tx.stats.Write++
   652  
   653  	return nil
   654  }
   655  
   656  // page returns a reference to the page with a given id.
   657  // If page has been written to then a temporary buffered page is returned.
   658  func (tx *Tx) page(id pgid) *page {
   659  	// Check the dirty pages first.
   660  	if tx.pages != nil {
   661  		if p, ok := tx.pages[id]; ok {
   662  			return p
   663  		}
   664  	}
   665  
   666  	// Otherwise return directly from the mmap.
   667  	return tx.db.page(id)
   668  }
   669  
   670  // forEachPage iterates over every page within a given page and executes a function.
   671  func (tx *Tx) forEachPage(pgid pgid, depth int, fn func(*page, int)) {
   672  	p := tx.page(pgid)
   673  
   674  	// Execute function.
   675  	fn(p, depth)
   676  
   677  	// Recursively loop over children.
   678  	if (p.flags & branchPageFlag) != 0 {
   679  		for i := 0; i < int(p.count); i++ {
   680  			elem := p.branchPageElement(uint16(i))
   681  			tx.forEachPage(elem.pgid, depth+1, fn)
   682  		}
   683  	}
   684  }
   685  
   686  // Page returns page information for a given page number.
   687  // This is only safe for concurrent use when used by a writable transaction.
   688  func (tx *Tx) Page(id int) (*PageInfo, error) {
   689  	if tx.db == nil {
   690  		return nil, ErrTxClosed
   691  	} else if pgid(id) >= tx.meta.pgid {
   692  		return nil, nil
   693  	}
   694  
   695  	// Build the page info.
   696  	p := tx.db.page(pgid(id))
   697  	info := &PageInfo{
   698  		ID:            id,
   699  		Count:         int(p.count),
   700  		OverflowCount: int(p.overflow),
   701  	}
   702  
   703  	// Determine the type (or if it's free).
   704  	if tx.db.freelist.freed(pgid(id)) {
   705  		info.Type = "free"
   706  	} else {
   707  		info.Type = p.typ()
   708  	}
   709  
   710  	return info, nil
   711  }
   712  
   713  // TxStats represents statistics about the actions performed by the transaction.
   714  type TxStats struct {
   715  	// Page statistics.
   716  	PageCount int // number of page allocations
   717  	PageAlloc int // total bytes allocated
   718  
   719  	// Cursor statistics.
   720  	CursorCount int // number of cursors created
   721  
   722  	// Node statistics
   723  	NodeCount int // number of node allocations
   724  	NodeDeref int // number of node dereferences
   725  
   726  	// Rebalance statistics.
   727  	Rebalance     int           // number of node rebalances
   728  	RebalanceTime time.Duration // total time spent rebalancing
   729  
   730  	// Split/Spill statistics.
   731  	Split     int           // number of nodes split
   732  	Spill     int           // number of nodes spilled
   733  	SpillTime time.Duration // total time spent spilling
   734  
   735  	// Write statistics.
   736  	Write     int           // number of writes performed
   737  	WriteTime time.Duration // total time spent writing to disk
   738  }
   739  
   740  func (s *TxStats) add(other *TxStats) {
   741  	s.PageCount += other.PageCount
   742  	s.PageAlloc += other.PageAlloc
   743  	s.CursorCount += other.CursorCount
   744  	s.NodeCount += other.NodeCount
   745  	s.NodeDeref += other.NodeDeref
   746  	s.Rebalance += other.Rebalance
   747  	s.RebalanceTime += other.RebalanceTime
   748  	s.Split += other.Split
   749  	s.Spill += other.Spill
   750  	s.SpillTime += other.SpillTime
   751  	s.Write += other.Write
   752  	s.WriteTime += other.WriteTime
   753  }
   754  
   755  // Sub calculates and returns the difference between two sets of transaction stats.
   756  // This is useful when obtaining stats at two different points and time and
   757  // you need the performance counters that occurred within that time span.
   758  func (s *TxStats) Sub(other *TxStats) TxStats {
   759  	var diff TxStats
   760  	diff.PageCount = s.PageCount - other.PageCount
   761  	diff.PageAlloc = s.PageAlloc - other.PageAlloc
   762  	diff.CursorCount = s.CursorCount - other.CursorCount
   763  	diff.NodeCount = s.NodeCount - other.NodeCount
   764  	diff.NodeDeref = s.NodeDeref - other.NodeDeref
   765  	diff.Rebalance = s.Rebalance - other.Rebalance
   766  	diff.RebalanceTime = s.RebalanceTime - other.RebalanceTime
   767  	diff.Split = s.Split - other.Split
   768  	diff.Spill = s.Spill - other.Spill
   769  	diff.SpillTime = s.SpillTime - other.SpillTime
   770  	diff.Write = s.Write - other.Write
   771  	diff.WriteTime = s.WriteTime - other.WriteTime
   772  	return diff
   773  }