github.com/bhojpur/cache@v0.0.4/pkg/memory/tx.go (about)

     1  package memory
     2  
     3  // Copyright (c) 2018 Bhojpur Consulting Private Limited, India. All rights reserved.
     4  
     5  // Permission is hereby granted, free of charge, to any person obtaining a copy
     6  // of this software and associated documentation files (the "Software"), to deal
     7  // in the Software without restriction, including without limitation the rights
     8  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     9  // copies of the Software, and to permit persons to whom the Software is
    10  // furnished to do so, subject to the following conditions:
    11  
    12  // The above copyright notice and this permission notice shall be included in
    13  // all copies or substantial portions of the Software.
    14  
    15  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    16  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    17  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    18  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    19  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    20  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    21  // THE SOFTWARE.
    22  
    23  import (
    24  	"fmt"
    25  	"io"
    26  	"os"
    27  	"sort"
    28  	"strings"
    29  	"time"
    30  	"unsafe"
    31  )
    32  
    33  // txid represents the internal transaction identifier.
    34  type txid uint64
    35  
    36  // Tx represents a read-only or read/write transaction on the database.
    37  // Read-only transactions can be used for retrieving values for keys and creating cursors.
    38  // Read/write transactions can create and remove buckets and create and remove keys.
    39  //
    40  // IMPORTANT: You must commit or rollback transactions when you are done with
    41  // them. Pages can not be reclaimed by the writer until no more transactions
    42  // are using them. A long running read transaction can cause the database to
    43  // quickly grow.
    44  type Tx struct {
    45  	writable       bool
    46  	managed        bool
    47  	db             *DB
    48  	meta           *meta
    49  	root           Bucket
    50  	pages          map[pgid]*page
    51  	stats          TxStats
    52  	commitHandlers []func()
    53  
    54  	// WriteFlag specifies the flag for write-related methods like WriteTo().
    55  	// Tx opens the database file with the specified flag to copy the data.
    56  	//
    57  	// By default, the flag is unset, which works well for mostly in-memory
    58  	// workloads. For databases that are much larger than available RAM,
    59  	// set the flag to syscall.O_DIRECT to avoid trashing the page cache.
    60  	WriteFlag int
    61  }
    62  
    63  // init initializes the transaction.
    64  func (tx *Tx) init(db *DB) {
    65  	tx.db = db
    66  	tx.pages = nil
    67  
    68  	// Copy the meta page since it can be changed by the writer.
    69  	tx.meta = &meta{}
    70  	db.meta().copy(tx.meta)
    71  
    72  	// Copy over the root bucket.
    73  	tx.root = newBucket(tx)
    74  	tx.root.bucket = &bucket{}
    75  	*tx.root.bucket = tx.meta.root
    76  
    77  	// Increment the transaction id and add a page cache for writable transactions.
    78  	if tx.writable {
    79  		tx.pages = make(map[pgid]*page)
    80  		tx.meta.txid += txid(1)
    81  	}
    82  }
    83  
    84  // ID returns the transaction id.
    85  func (tx *Tx) ID() int {
    86  	return int(tx.meta.txid)
    87  }
    88  
    89  // DB returns a reference to the database that created the transaction.
    90  func (tx *Tx) DB() *DB {
    91  	return tx.db
    92  }
    93  
    94  // Size returns current database size in bytes as seen by this transaction.
    95  func (tx *Tx) Size() int64 {
    96  	return int64(tx.meta.pgid) * int64(tx.db.pageSize)
    97  }
    98  
    99  // Writable returns whether the transaction can perform write operations.
   100  func (tx *Tx) Writable() bool {
   101  	return tx.writable
   102  }
   103  
   104  // Cursor creates a cursor associated with the root bucket.
   105  // All items in the cursor will return a nil value because all root bucket keys point to buckets.
   106  // The cursor is only valid as long as the transaction is open.
   107  // Do not use a cursor after the transaction is closed.
   108  func (tx *Tx) Cursor() *Cursor {
   109  	return tx.root.Cursor()
   110  }
   111  
   112  // Stats retrieves a copy of the current transaction statistics.
   113  func (tx *Tx) Stats() TxStats {
   114  	return tx.stats
   115  }
   116  
   117  // Bucket retrieves a bucket by name.
   118  // Returns nil if the bucket does not exist.
   119  // The bucket instance is only valid for the lifetime of the transaction.
   120  func (tx *Tx) Bucket(name []byte) *Bucket {
   121  	return tx.root.Bucket(name)
   122  }
   123  
   124  // CreateBucket creates a new bucket.
   125  // Returns an error if the bucket already exists, if the bucket name is blank, or if the bucket name is too long.
   126  // The bucket instance is only valid for the lifetime of the transaction.
   127  func (tx *Tx) CreateBucket(name []byte) (*Bucket, error) {
   128  	return tx.root.CreateBucket(name)
   129  }
   130  
   131  // CreateBucketIfNotExists creates a new bucket if it doesn't already exist.
   132  // Returns an error if the bucket name is blank, or if the bucket name is too long.
   133  // The bucket instance is only valid for the lifetime of the transaction.
   134  func (tx *Tx) CreateBucketIfNotExists(name []byte) (*Bucket, error) {
   135  	return tx.root.CreateBucketIfNotExists(name)
   136  }
   137  
   138  // DeleteBucket deletes a bucket.
   139  // Returns an error if the bucket cannot be found or if the key represents a non-bucket value.
   140  func (tx *Tx) DeleteBucket(name []byte) error {
   141  	return tx.root.DeleteBucket(name)
   142  }
   143  
   144  // ForEach executes a function for each bucket in the root.
   145  // If the provided function returns an error then the iteration is stopped and
   146  // the error is returned to the caller.
   147  func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error {
   148  	return tx.root.ForEach(func(k, v []byte) error {
   149  		return fn(k, tx.root.Bucket(k))
   150  	})
   151  }
   152  
   153  // OnCommit adds a handler function to be executed after the transaction successfully commits.
   154  func (tx *Tx) OnCommit(fn func()) {
   155  	tx.commitHandlers = append(tx.commitHandlers, fn)
   156  }
   157  
   158  // Commit writes all changes to disk and updates the meta page.
   159  // Returns an error if a disk write error occurs, or if Commit is
   160  // called on a read-only transaction.
   161  func (tx *Tx) Commit() error {
   162  	_assert(!tx.managed, "managed tx commit not allowed")
   163  	if tx.db == nil {
   164  		return ErrTxClosed
   165  	} else if !tx.writable {
   166  		return ErrTxNotWritable
   167  	}
   168  
   169  	// TODO(benbjohnson): Use vectorized I/O to write out dirty pages.
   170  
   171  	// Rebalance nodes which have had deletions.
   172  	var startTime = time.Now()
   173  	tx.root.rebalance()
   174  	if tx.stats.Rebalance > 0 {
   175  		tx.stats.RebalanceTime += time.Since(startTime)
   176  	}
   177  
   178  	// spill data onto dirty pages.
   179  	startTime = time.Now()
   180  	if err := tx.root.spill(); err != nil {
   181  		tx.rollback()
   182  		return err
   183  	}
   184  	tx.stats.SpillTime += time.Since(startTime)
   185  
   186  	// Free the old root bucket.
   187  	tx.meta.root.root = tx.root.root
   188  
   189  	// Free the old freelist because commit writes out a fresh freelist.
   190  	if tx.meta.freelist != pgidNoFreelist {
   191  		tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
   192  	}
   193  
   194  	if !tx.db.NoFreelistSync {
   195  		err := tx.commitFreelist()
   196  		if err != nil {
   197  			return err
   198  		}
   199  	} else {
   200  		tx.meta.freelist = pgidNoFreelist
   201  	}
   202  
   203  	// Write dirty pages to disk.
   204  	startTime = time.Now()
   205  	if err := tx.write(); err != nil {
   206  		tx.rollback()
   207  		return err
   208  	}
   209  
   210  	// If strict mode is enabled then perform a consistency check.
   211  	if tx.db.StrictMode {
   212  		ch := tx.Check()
   213  		var errs []string
   214  		for {
   215  			err, ok := <-ch
   216  			if !ok {
   217  				break
   218  			}
   219  			errs = append(errs, err.Error())
   220  		}
   221  		if len(errs) > 0 {
   222  			panic("check fail: " + strings.Join(errs, "\n"))
   223  		}
   224  	}
   225  
   226  	// Write meta to disk.
   227  	if err := tx.writeMeta(); err != nil {
   228  		tx.rollback()
   229  		return err
   230  	}
   231  	tx.stats.WriteTime += time.Since(startTime)
   232  
   233  	// Finalize the transaction.
   234  	tx.close()
   235  
   236  	// Execute commit handlers now that the locks have been removed.
   237  	for _, fn := range tx.commitHandlers {
   238  		fn()
   239  	}
   240  
   241  	return nil
   242  }
   243  
   244  func (tx *Tx) commitFreelist() error {
   245  	// Allocate new pages for the new free list. This will overestimate
   246  	// the size of the freelist but not underestimate the size (which would be bad).
   247  	opgid := tx.meta.pgid
   248  	p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
   249  	if err != nil {
   250  		tx.rollback()
   251  		return err
   252  	}
   253  	if err := tx.db.freelist.write(p); err != nil {
   254  		tx.rollback()
   255  		return err
   256  	}
   257  	tx.meta.freelist = p.id
   258  	// If the high water mark has moved up then attempt to grow the database.
   259  	if tx.meta.pgid > opgid {
   260  		if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
   261  			tx.rollback()
   262  			return err
   263  		}
   264  	}
   265  
   266  	return nil
   267  }
   268  
   269  // Rollback closes the transaction and ignores all previous updates. Read-only
   270  // transactions must be rolled back and not committed.
   271  func (tx *Tx) Rollback() error {
   272  	_assert(!tx.managed, "managed tx rollback not allowed")
   273  	if tx.db == nil {
   274  		return ErrTxClosed
   275  	}
   276  	tx.nonPhysicalRollback()
   277  	return nil
   278  }
   279  
   280  // nonPhysicalRollback is called when user calls Rollback directly, in this case we do not need to reload the free pages from disk.
   281  func (tx *Tx) nonPhysicalRollback() {
   282  	if tx.db == nil {
   283  		return
   284  	}
   285  	if tx.writable {
   286  		tx.db.freelist.rollback(tx.meta.txid)
   287  	}
   288  	tx.close()
   289  }
   290  
   291  // rollback needs to reload the free pages from disk in case some system error happens like fsync error.
   292  func (tx *Tx) rollback() {
   293  	if tx.db == nil {
   294  		return
   295  	}
   296  	if tx.writable {
   297  		tx.db.freelist.rollback(tx.meta.txid)
   298  		if !tx.db.hasSyncedFreelist() {
   299  			// Reconstruct free page list by scanning the DB to get the whole free page list.
   300  			// Note: scaning the whole db is heavy if your db size is large in NoSyncFreeList mode.
   301  			tx.db.freelist.noSyncReload(tx.db.freepages())
   302  		} else {
   303  			// Read free page list from freelist page.
   304  			tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
   305  		}
   306  	}
   307  	tx.close()
   308  }
   309  
   310  func (tx *Tx) close() {
   311  	if tx.db == nil {
   312  		return
   313  	}
   314  	if tx.writable {
   315  		// Grab freelist stats.
   316  		var freelistFreeN = tx.db.freelist.free_count()
   317  		var freelistPendingN = tx.db.freelist.pending_count()
   318  		var freelistAlloc = tx.db.freelist.size()
   319  
   320  		// Remove transaction ref & writer lock.
   321  		tx.db.rwtx = nil
   322  		tx.db.rwlock.Unlock()
   323  
   324  		// Merge statistics.
   325  		tx.db.statlock.Lock()
   326  		tx.db.stats.FreePageN = freelistFreeN
   327  		tx.db.stats.PendingPageN = freelistPendingN
   328  		tx.db.stats.FreeAlloc = (freelistFreeN + freelistPendingN) * tx.db.pageSize
   329  		tx.db.stats.FreelistInuse = freelistAlloc
   330  		tx.db.stats.TxStats.add(&tx.stats)
   331  		tx.db.statlock.Unlock()
   332  	} else {
   333  		tx.db.removeTx(tx)
   334  	}
   335  
   336  	// Clear all references.
   337  	tx.db = nil
   338  	tx.meta = nil
   339  	tx.root = Bucket{tx: tx}
   340  	tx.pages = nil
   341  }
   342  
   343  // Copy writes the entire database to a writer.
   344  // This function exists for backwards compatibility.
   345  //
   346  // Deprecated; Use WriteTo() instead.
   347  func (tx *Tx) Copy(w io.Writer) error {
   348  	_, err := tx.WriteTo(w)
   349  	return err
   350  }
   351  
   352  // WriteTo writes the entire database to a writer.
   353  // If err == nil then exactly tx.Size() bytes will be written into the writer.
   354  func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
   355  	// Attempt to open reader with WriteFlag
   356  	f, err := tx.db.openFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
   357  	if err != nil {
   358  		return 0, err
   359  	}
   360  	defer func() {
   361  		if cerr := f.Close(); err == nil {
   362  			err = cerr
   363  		}
   364  	}()
   365  
   366  	// Generate a meta page. We use the same page data for both meta pages.
   367  	buf := make([]byte, tx.db.pageSize)
   368  	page := (*page)(unsafe.Pointer(&buf[0]))
   369  	page.flags = metaPageFlag
   370  	*page.meta() = *tx.meta
   371  
   372  	// Write meta 0.
   373  	page.id = 0
   374  	page.meta().checksum = page.meta().sum64()
   375  	nn, err := w.Write(buf)
   376  	n += int64(nn)
   377  	if err != nil {
   378  		return n, fmt.Errorf("meta 0 copy: %s", err)
   379  	}
   380  
   381  	// Write meta 1 with a lower transaction id.
   382  	page.id = 1
   383  	page.meta().txid -= 1
   384  	page.meta().checksum = page.meta().sum64()
   385  	nn, err = w.Write(buf)
   386  	n += int64(nn)
   387  	if err != nil {
   388  		return n, fmt.Errorf("meta 1 copy: %s", err)
   389  	}
   390  
   391  	// Move past the meta pages in the file.
   392  	if _, err := f.Seek(int64(tx.db.pageSize*2), io.SeekStart); err != nil {
   393  		return n, fmt.Errorf("seek: %s", err)
   394  	}
   395  
   396  	// Copy data pages.
   397  	wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2))
   398  	n += wn
   399  	if err != nil {
   400  		return n, err
   401  	}
   402  
   403  	return n, nil
   404  }
   405  
   406  // CopyFile copies the entire database to file at the given path.
   407  // A reader transaction is maintained during the copy so it is safe to continue
   408  // using the database while a copy is in progress.
   409  func (tx *Tx) CopyFile(path string, mode os.FileMode) error {
   410  	f, err := tx.db.openFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
   411  	if err != nil {
   412  		return err
   413  	}
   414  
   415  	_, err = tx.WriteTo(f)
   416  	if err != nil {
   417  		_ = f.Close()
   418  		return err
   419  	}
   420  	return f.Close()
   421  }
   422  
   423  // Check performs several consistency checks on the database for this transaction.
   424  // An error is returned if any inconsistency is found.
   425  //
   426  // It can be safely run concurrently on a writable transaction. However, this
   427  // incurs a high cost for large databases and databases with a lot of subbuckets
   428  // because of caching. This overhead can be removed if running on a read-only
   429  // transaction, however, it is not safe to execute other writer transactions at
   430  // the same time.
   431  func (tx *Tx) Check() <-chan error {
   432  	ch := make(chan error)
   433  	go tx.check(ch)
   434  	return ch
   435  }
   436  
   437  func (tx *Tx) check(ch chan error) {
   438  	// Force loading free list if opened in ReadOnly mode.
   439  	tx.db.loadFreelist()
   440  
   441  	// Check if any pages are double freed.
   442  	freed := make(map[pgid]bool)
   443  	all := make([]pgid, tx.db.freelist.count())
   444  	tx.db.freelist.copyall(all)
   445  	for _, id := range all {
   446  		if freed[id] {
   447  			ch <- fmt.Errorf("page %d: already freed", id)
   448  		}
   449  		freed[id] = true
   450  	}
   451  
   452  	// Track every reachable page.
   453  	reachable := make(map[pgid]*page)
   454  	reachable[0] = tx.page(0) // meta0
   455  	reachable[1] = tx.page(1) // meta1
   456  	if tx.meta.freelist != pgidNoFreelist {
   457  		for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
   458  			reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
   459  		}
   460  	}
   461  
   462  	// Recursively check buckets.
   463  	tx.checkBucket(&tx.root, reachable, freed, ch)
   464  
   465  	// Ensure all pages below high water mark are either reachable or freed.
   466  	for i := pgid(0); i < tx.meta.pgid; i++ {
   467  		_, isReachable := reachable[i]
   468  		if !isReachable && !freed[i] {
   469  			ch <- fmt.Errorf("page %d: unreachable unfreed", int(i))
   470  		}
   471  	}
   472  
   473  	// Close the channel to signal completion.
   474  	close(ch)
   475  }
   476  
   477  func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool, ch chan error) {
   478  	// Ignore inline buckets.
   479  	if b.root == 0 {
   480  		return
   481  	}
   482  
   483  	// Check every page used by this bucket.
   484  	b.tx.forEachPage(b.root, 0, func(p *page, _ int) {
   485  		if p.id > tx.meta.pgid {
   486  			ch <- fmt.Errorf("page %d: out of bounds: %d", int(p.id), int(b.tx.meta.pgid))
   487  		}
   488  
   489  		// Ensure each page is only referenced once.
   490  		for i := pgid(0); i <= pgid(p.overflow); i++ {
   491  			var id = p.id + i
   492  			if _, ok := reachable[id]; ok {
   493  				ch <- fmt.Errorf("page %d: multiple references", int(id))
   494  			}
   495  			reachable[id] = p
   496  		}
   497  
   498  		// We should only encounter un-freed leaf and branch pages.
   499  		if freed[p.id] {
   500  			ch <- fmt.Errorf("page %d: reachable freed", int(p.id))
   501  		} else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 {
   502  			ch <- fmt.Errorf("page %d: invalid type: %s", int(p.id), p.typ())
   503  		}
   504  	})
   505  
   506  	// Check each bucket within this bucket.
   507  	_ = b.ForEach(func(k, v []byte) error {
   508  		if child := b.Bucket(k); child != nil {
   509  			tx.checkBucket(child, reachable, freed, ch)
   510  		}
   511  		return nil
   512  	})
   513  }
   514  
   515  // allocate returns a contiguous block of memory starting at a given page.
   516  func (tx *Tx) allocate(count int) (*page, error) {
   517  	p, err := tx.db.allocate(tx.meta.txid, count)
   518  	if err != nil {
   519  		return nil, err
   520  	}
   521  
   522  	// Save to our page cache.
   523  	tx.pages[p.id] = p
   524  
   525  	// Update statistics.
   526  	tx.stats.PageCount += count
   527  	tx.stats.PageAlloc += count * tx.db.pageSize
   528  
   529  	return p, nil
   530  }
   531  
   532  // write writes any dirty pages to disk.
   533  func (tx *Tx) write() error {
   534  	// Sort pages by id.
   535  	pages := make(pages, 0, len(tx.pages))
   536  	for _, p := range tx.pages {
   537  		pages = append(pages, p)
   538  	}
   539  	// Clear out page cache early.
   540  	tx.pages = make(map[pgid]*page)
   541  	sort.Sort(pages)
   542  
   543  	// Write pages to disk in order.
   544  	for _, p := range pages {
   545  		rem := (uint64(p.overflow) + 1) * uint64(tx.db.pageSize)
   546  		offset := int64(p.id) * int64(tx.db.pageSize)
   547  		var written uintptr
   548  
   549  		// Write out page in "max allocation" sized chunks.
   550  		for {
   551  			sz := rem
   552  			if sz > maxAllocSize-1 {
   553  				sz = maxAllocSize - 1
   554  			}
   555  			buf := unsafeByteSlice(unsafe.Pointer(p), written, 0, int(sz))
   556  
   557  			if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
   558  				return err
   559  			}
   560  
   561  			// Update statistics.
   562  			tx.stats.Write++
   563  
   564  			// Exit inner for loop if we've written all the chunks.
   565  			rem -= sz
   566  			if rem == 0 {
   567  				break
   568  			}
   569  
   570  			// Otherwise move offset forward and move pointer to next chunk.
   571  			offset += int64(sz)
   572  			written += uintptr(sz)
   573  		}
   574  	}
   575  
   576  	// Ignore file sync if flag is set on DB.
   577  	if !tx.db.NoSync || IgnoreNoSync {
   578  		if err := fdatasync(tx.db); err != nil {
   579  			return err
   580  		}
   581  	}
   582  
   583  	// Put small pages back to page pool.
   584  	for _, p := range pages {
   585  		// Ignore page sizes over 1 page.
   586  		// These are allocated using make() instead of the page pool.
   587  		if int(p.overflow) != 0 {
   588  			continue
   589  		}
   590  
   591  		buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, tx.db.pageSize)
   592  
   593  		// See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1
   594  		for i := range buf {
   595  			buf[i] = 0
   596  		}
   597  		tx.db.pagePool.Put(buf)
   598  	}
   599  
   600  	return nil
   601  }
   602  
   603  // writeMeta writes the meta to the disk.
   604  func (tx *Tx) writeMeta() error {
   605  	// Create a temporary buffer for the meta page.
   606  	buf := make([]byte, tx.db.pageSize)
   607  	p := tx.db.pageInBuffer(buf, 0)
   608  	tx.meta.write(p)
   609  
   610  	// Write the meta page to file.
   611  	if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil {
   612  		return err
   613  	}
   614  	if !tx.db.NoSync || IgnoreNoSync {
   615  		if err := fdatasync(tx.db); err != nil {
   616  			return err
   617  		}
   618  	}
   619  
   620  	// Update statistics.
   621  	tx.stats.Write++
   622  
   623  	return nil
   624  }
   625  
   626  // page returns a reference to the page with a given id.
   627  // If page has been written to then a temporary buffered page is returned.
   628  func (tx *Tx) page(id pgid) *page {
   629  	// Check the dirty pages first.
   630  	if tx.pages != nil {
   631  		if p, ok := tx.pages[id]; ok {
   632  			return p
   633  		}
   634  	}
   635  
   636  	// Otherwise return directly from the mmap.
   637  	return tx.db.page(id)
   638  }
   639  
   640  // forEachPage iterates over every page within a given page and executes a function.
   641  func (tx *Tx) forEachPage(pgid pgid, depth int, fn func(*page, int)) {
   642  	p := tx.page(pgid)
   643  
   644  	// Execute function.
   645  	fn(p, depth)
   646  
   647  	// Recursively loop over children.
   648  	if (p.flags & branchPageFlag) != 0 {
   649  		for i := 0; i < int(p.count); i++ {
   650  			elem := p.branchPageElement(uint16(i))
   651  			tx.forEachPage(elem.pgid, depth+1, fn)
   652  		}
   653  	}
   654  }
   655  
   656  // Page returns page information for a given page number.
   657  // This is only safe for concurrent use when used by a writable transaction.
   658  func (tx *Tx) Page(id int) (*PageInfo, error) {
   659  	if tx.db == nil {
   660  		return nil, ErrTxClosed
   661  	} else if pgid(id) >= tx.meta.pgid {
   662  		return nil, nil
   663  	}
   664  
   665  	// Build the page info.
   666  	p := tx.db.page(pgid(id))
   667  	info := &PageInfo{
   668  		ID:            id,
   669  		Count:         int(p.count),
   670  		OverflowCount: int(p.overflow),
   671  	}
   672  
   673  	// Determine the type (or if it's free).
   674  	if tx.db.freelist.freed(pgid(id)) {
   675  		info.Type = "free"
   676  	} else {
   677  		info.Type = p.typ()
   678  	}
   679  
   680  	return info, nil
   681  }
   682  
   683  // TxStats represents statistics about the actions performed by the transaction.
   684  type TxStats struct {
   685  	// Page statistics.
   686  	PageCount int // number of page allocations
   687  	PageAlloc int // total bytes allocated
   688  
   689  	// Cursor statistics.
   690  	CursorCount int // number of cursors created
   691  
   692  	// Node statistics
   693  	NodeCount int // number of node allocations
   694  	NodeDeref int // number of node dereferences
   695  
   696  	// Rebalance statistics.
   697  	Rebalance     int           // number of node rebalances
   698  	RebalanceTime time.Duration // total time spent rebalancing
   699  
   700  	// Split/Spill statistics.
   701  	Split     int           // number of nodes split
   702  	Spill     int           // number of nodes spilled
   703  	SpillTime time.Duration // total time spent spilling
   704  
   705  	// Write statistics.
   706  	Write     int           // number of writes performed
   707  	WriteTime time.Duration // total time spent writing to disk
   708  }
   709  
   710  func (s *TxStats) add(other *TxStats) {
   711  	s.PageCount += other.PageCount
   712  	s.PageAlloc += other.PageAlloc
   713  	s.CursorCount += other.CursorCount
   714  	s.NodeCount += other.NodeCount
   715  	s.NodeDeref += other.NodeDeref
   716  	s.Rebalance += other.Rebalance
   717  	s.RebalanceTime += other.RebalanceTime
   718  	s.Split += other.Split
   719  	s.Spill += other.Spill
   720  	s.SpillTime += other.SpillTime
   721  	s.Write += other.Write
   722  	s.WriteTime += other.WriteTime
   723  }
   724  
   725  // Sub calculates and returns the difference between two sets of transaction stats.
   726  // This is useful when obtaining stats at two different points and time and
   727  // you need the performance counters that occurred within that time span.
   728  func (s *TxStats) Sub(other *TxStats) TxStats {
   729  	var diff TxStats
   730  	diff.PageCount = s.PageCount - other.PageCount
   731  	diff.PageAlloc = s.PageAlloc - other.PageAlloc
   732  	diff.CursorCount = s.CursorCount - other.CursorCount
   733  	diff.NodeCount = s.NodeCount - other.NodeCount
   734  	diff.NodeDeref = s.NodeDeref - other.NodeDeref
   735  	diff.Rebalance = s.Rebalance - other.Rebalance
   736  	diff.RebalanceTime = s.RebalanceTime - other.RebalanceTime
   737  	diff.Split = s.Split - other.Split
   738  	diff.Spill = s.Spill - other.Spill
   739  	diff.SpillTime = s.SpillTime - other.SpillTime
   740  	diff.Write = s.Write - other.Write
   741  	diff.WriteTime = s.WriteTime - other.WriteTime
   742  	return diff
   743  }