github.com/ooni/psiphon/tunnel-core@v0.0.0-20230105123940-fe12a24c96ee/oovendor/bolt/bucket.go (about)

     1  package bolt
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"unsafe"
     7  )
     8  
     9  const (
    10  	// MaxKeySize is the maximum length of a key, in bytes.
    11  	MaxKeySize = 32768
    12  
    13  	// MaxValueSize is the maximum length of a value, in bytes.
    14  	MaxValueSize = (1 << 31) - 2
    15  )
    16  
    17  const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
    18  
    19  const (
    20  	minFillPercent = 0.1
    21  	maxFillPercent = 1.0
    22  )
    23  
    24  // DefaultFillPercent is the percentage that split pages are filled.
    25  // This value can be changed by setting Bucket.FillPercent.
    26  const DefaultFillPercent = 0.5
    27  
    28  // Bucket represents a collection of key/value pairs inside the database.
    29  type Bucket struct {
    30  	*bucket
    31  	tx       *Tx                // the associated transaction
    32  	buckets  map[string]*Bucket // subbucket cache
    33  	page     *page              // inline page reference
    34  	rootNode *node              // materialized node for the root page.
    35  	nodes    map[pgid]*node     // node cache
    36  
    37  	// Sets the threshold for filling nodes when they split. By default,
    38  	// the bucket will fill to 50% but it can be useful to increase this
    39  	// amount if you know that your write workloads are mostly append-only.
    40  	//
    41  	// This is non-persisted across transactions so it must be set in every Tx.
    42  	FillPercent float64
    43  }
    44  
    45  // bucket represents the on-file representation of a bucket.
    46  // This is stored as the "value" of a bucket key. If the bucket is small enough,
    47  // then its root page can be stored inline in the "value", after the bucket
    48  // header. In the case of inline buckets, the "root" will be 0.
    49  type bucket struct {
    50  	root     pgid   // page id of the bucket's root-level page
    51  	sequence uint64 // monotonically incrementing, used by NextSequence()
    52  }
    53  
    54  // newBucket returns a new bucket associated with a transaction.
    55  func newBucket(tx *Tx) Bucket {
    56  	var b = Bucket{tx: tx, FillPercent: DefaultFillPercent}
    57  	if tx.writable {
    58  		b.buckets = make(map[string]*Bucket)
    59  		b.nodes = make(map[pgid]*node)
    60  	}
    61  	return b
    62  }
    63  
    64  // Tx returns the tx of the bucket.
    65  func (b *Bucket) Tx() *Tx {
    66  	return b.tx
    67  }
    68  
    69  // Root returns the root of the bucket.
    70  func (b *Bucket) Root() pgid {
    71  	return b.root
    72  }
    73  
    74  // Writable returns whether the bucket is writable.
    75  func (b *Bucket) Writable() bool {
    76  	return b.tx.writable
    77  }
    78  
    79  // Cursor creates a cursor associated with the bucket.
    80  // The cursor is only valid as long as the transaction is open.
    81  // Do not use a cursor after the transaction is closed.
    82  func (b *Bucket) Cursor() *Cursor {
    83  	// Update transaction statistics.
    84  	b.tx.stats.CursorCount++
    85  
    86  	// Allocate and return a cursor.
    87  	return &Cursor{
    88  		bucket: b,
    89  		stack:  make([]elemRef, 0),
    90  	}
    91  }
    92  
    93  // Bucket retrieves a nested bucket by name.
    94  // Returns nil if the bucket does not exist.
    95  // The bucket instance is only valid for the lifetime of the transaction.
    96  func (b *Bucket) Bucket(name []byte) *Bucket {
    97  	if b.buckets != nil {
    98  		if child := b.buckets[string(name)]; child != nil {
    99  			return child
   100  		}
   101  	}
   102  
   103  	// Move cursor to key.
   104  	c := b.Cursor()
   105  	k, v, flags := c.seek(name)
   106  
   107  	// Return nil if the key doesn't exist or it is not a bucket.
   108  	if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 {
   109  		return nil
   110  	}
   111  
   112  	// Otherwise create a bucket and cache it.
   113  	var child = b.openBucket(v)
   114  	if b.buckets != nil {
   115  		b.buckets[string(name)] = child
   116  	}
   117  
   118  	return child
   119  }
   120  
   121  // Helper method that re-interprets a sub-bucket value
   122  // from a parent into a Bucket
   123  func (b *Bucket) openBucket(value []byte) *Bucket {
   124  	var child = newBucket(b.tx)
   125  
   126  	// Unaligned access requires a copy to be made.
   127  	const unalignedMask = unsafe.Alignof(struct {
   128  		bucket
   129  		page
   130  	}{}) - 1
   131  	unaligned := uintptr(unsafe.Pointer(&value[0]))&unalignedMask != 0
   132  	if unaligned {
   133  		value = cloneBytes(value)
   134  	}
   135  
   136  	// If this is a writable transaction then we need to copy the bucket entry.
   137  	// Read-only transactions can point directly at the mmap entry.
   138  	if b.tx.writable && !unaligned {
   139  		child.bucket = &bucket{}
   140  		*child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
   141  	} else {
   142  		child.bucket = (*bucket)(unsafe.Pointer(&value[0]))
   143  	}
   144  
   145  	// Save a reference to the inline page if the bucket is inline.
   146  	if child.root == 0 {
   147  		child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
   148  	}
   149  
   150  	return &child
   151  }
   152  
   153  // CreateBucket creates a new bucket at the given key and returns the new bucket.
   154  // Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long.
   155  // The bucket instance is only valid for the lifetime of the transaction.
   156  func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
   157  	if b.tx.db == nil {
   158  		return nil, ErrTxClosed
   159  	} else if !b.tx.writable {
   160  		return nil, ErrTxNotWritable
   161  	} else if len(key) == 0 {
   162  		return nil, ErrBucketNameRequired
   163  	}
   164  
   165  	// Move cursor to correct position.
   166  	c := b.Cursor()
   167  	k, _, flags := c.seek(key)
   168  
   169  	// Return an error if there is an existing key.
   170  	if bytes.Equal(key, k) {
   171  		if (flags & bucketLeafFlag) != 0 {
   172  			return nil, ErrBucketExists
   173  		}
   174  		return nil, ErrIncompatibleValue
   175  	}
   176  
   177  	// Create empty, inline bucket.
   178  	var bucket = Bucket{
   179  		bucket:      &bucket{},
   180  		rootNode:    &node{isLeaf: true},
   181  		FillPercent: DefaultFillPercent,
   182  	}
   183  	var value = bucket.write()
   184  
   185  	// Insert into node.
   186  	key = cloneBytes(key)
   187  	c.node().put(key, key, value, 0, bucketLeafFlag)
   188  
   189  	// Since subbuckets are not allowed on inline buckets, we need to
   190  	// dereference the inline page, if it exists. This will cause the bucket
   191  	// to be treated as a regular, non-inline bucket for the rest of the tx.
   192  	b.page = nil
   193  
   194  	return b.Bucket(key), nil
   195  }
   196  
   197  // CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it.
   198  // Returns an error if the bucket name is blank, or if the bucket name is too long.
   199  // The bucket instance is only valid for the lifetime of the transaction.
   200  func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
   201  	child, err := b.CreateBucket(key)
   202  	if err == ErrBucketExists {
   203  		return b.Bucket(key), nil
   204  	} else if err != nil {
   205  		return nil, err
   206  	}
   207  	return child, nil
   208  }
   209  
   210  // DeleteBucket deletes a bucket at the given key.
   211  // Returns an error if the bucket does not exist, or if the key represents a non-bucket value.
   212  func (b *Bucket) DeleteBucket(key []byte) error {
   213  	if b.tx.db == nil {
   214  		return ErrTxClosed
   215  	} else if !b.Writable() {
   216  		return ErrTxNotWritable
   217  	}
   218  
   219  	// Move cursor to correct position.
   220  	c := b.Cursor()
   221  	k, _, flags := c.seek(key)
   222  
   223  	// Return an error if bucket doesn't exist or is not a bucket.
   224  	if !bytes.Equal(key, k) {
   225  		return ErrBucketNotFound
   226  	} else if (flags & bucketLeafFlag) == 0 {
   227  		return ErrIncompatibleValue
   228  	}
   229  
   230  	// Recursively delete all child buckets.
   231  	child := b.Bucket(key)
   232  	err := child.ForEach(func(k, v []byte) error {
   233  		if _, _, childFlags := child.Cursor().seek(k); (childFlags & bucketLeafFlag) != 0 {
   234  			if err := child.DeleteBucket(k); err != nil {
   235  				return fmt.Errorf("delete bucket: %s", err)
   236  			}
   237  		}
   238  		return nil
   239  	})
   240  	if err != nil {
   241  		return err
   242  	}
   243  
   244  	// Remove cached copy.
   245  	delete(b.buckets, string(key))
   246  
   247  	// Release all bucket pages to freelist.
   248  	child.nodes = nil
   249  	child.rootNode = nil
   250  	child.free()
   251  
   252  	// Delete the node if we have a matching key.
   253  	c.node().del(key)
   254  
   255  	return nil
   256  }
   257  
   258  // Get retrieves the value for a key in the bucket.
   259  // Returns a nil value if the key does not exist or if the key is a nested bucket.
   260  // The returned value is only valid for the life of the transaction.
   261  func (b *Bucket) Get(key []byte) []byte {
   262  	k, v, flags := b.Cursor().seek(key)
   263  
   264  	// Return nil if this is a bucket.
   265  	if (flags & bucketLeafFlag) != 0 {
   266  		return nil
   267  	}
   268  
   269  	// If our target node isn't the same key as what's passed in then return nil.
   270  	if !bytes.Equal(key, k) {
   271  		return nil
   272  	}
   273  	return v
   274  }
   275  
   276  // Put sets the value for a key in the bucket.
   277  // If the key exist then its previous value will be overwritten.
   278  // Supplied value must remain valid for the life of the transaction.
   279  // Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large.
   280  func (b *Bucket) Put(key []byte, value []byte) error {
   281  	if b.tx.db == nil {
   282  		return ErrTxClosed
   283  	} else if !b.Writable() {
   284  		return ErrTxNotWritable
   285  	} else if len(key) == 0 {
   286  		return ErrKeyRequired
   287  	} else if len(key) > MaxKeySize {
   288  		return ErrKeyTooLarge
   289  	} else if int64(len(value)) > MaxValueSize {
   290  		return ErrValueTooLarge
   291  	}
   292  
   293  	// Move cursor to correct position.
   294  	c := b.Cursor()
   295  	k, _, flags := c.seek(key)
   296  
   297  	// Return an error if there is an existing key with a bucket value.
   298  	if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 {
   299  		return ErrIncompatibleValue
   300  	}
   301  
   302  	// Insert into node.
   303  	key = cloneBytes(key)
   304  	c.node().put(key, key, value, 0, 0)
   305  
   306  	return nil
   307  }
   308  
   309  // Delete removes a key from the bucket.
   310  // If the key does not exist then nothing is done and a nil error is returned.
   311  // Returns an error if the bucket was created from a read-only transaction.
   312  func (b *Bucket) Delete(key []byte) error {
   313  	if b.tx.db == nil {
   314  		return ErrTxClosed
   315  	} else if !b.Writable() {
   316  		return ErrTxNotWritable
   317  	}
   318  
   319  	// Move cursor to correct position.
   320  	c := b.Cursor()
   321  	k, _, flags := c.seek(key)
   322  
   323  	// Return nil if the key doesn't exist.
   324  	if !bytes.Equal(key, k) {
   325  		return nil
   326  	}
   327  
   328  	// Return an error if there is already existing bucket value.
   329  	if (flags & bucketLeafFlag) != 0 {
   330  		return ErrIncompatibleValue
   331  	}
   332  
   333  	// Delete the node if we have a matching key.
   334  	c.node().del(key)
   335  
   336  	return nil
   337  }
   338  
   339  // Sequence returns the current integer for the bucket without incrementing it.
   340  func (b *Bucket) Sequence() uint64 { return b.bucket.sequence }
   341  
   342  // SetSequence updates the sequence number for the bucket.
   343  func (b *Bucket) SetSequence(v uint64) error {
   344  	if b.tx.db == nil {
   345  		return ErrTxClosed
   346  	} else if !b.Writable() {
   347  		return ErrTxNotWritable
   348  	}
   349  
   350  	// Materialize the root node if it hasn't been already so that the
   351  	// bucket will be saved during commit.
   352  	if b.rootNode == nil {
   353  		_ = b.node(b.root, nil)
   354  	}
   355  
   356  	// Increment and return the sequence.
   357  	b.bucket.sequence = v
   358  	return nil
   359  }
   360  
   361  // NextSequence returns an autoincrementing integer for the bucket.
   362  func (b *Bucket) NextSequence() (uint64, error) {
   363  	if b.tx.db == nil {
   364  		return 0, ErrTxClosed
   365  	} else if !b.Writable() {
   366  		return 0, ErrTxNotWritable
   367  	}
   368  
   369  	// Materialize the root node if it hasn't been already so that the
   370  	// bucket will be saved during commit.
   371  	if b.rootNode == nil {
   372  		_ = b.node(b.root, nil)
   373  	}
   374  
   375  	// Increment and return the sequence.
   376  	b.bucket.sequence++
   377  	return b.bucket.sequence, nil
   378  }
   379  
   380  // ForEach executes a function for each key/value pair in a bucket.
   381  // If the provided function returns an error then the iteration is stopped and
   382  // the error is returned to the caller. The provided function must not modify
   383  // the bucket; this will result in undefined behavior.
   384  func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
   385  	if b.tx.db == nil {
   386  		return ErrTxClosed
   387  	}
   388  	c := b.Cursor()
   389  	for k, v := c.First(); k != nil; k, v = c.Next() {
   390  		if err := fn(k, v); err != nil {
   391  			return err
   392  		}
   393  	}
   394  	return nil
   395  }
   396  
   397  // Stat returns stats on a bucket.
   398  func (b *Bucket) Stats() BucketStats {
   399  	var s, subStats BucketStats
   400  	pageSize := b.tx.db.pageSize
   401  	s.BucketN += 1
   402  	if b.root == 0 {
   403  		s.InlineBucketN += 1
   404  	}
   405  	b.forEachPage(func(p *page, depth int) {
   406  		if (p.flags & leafPageFlag) != 0 {
   407  			s.KeyN += int(p.count)
   408  
   409  			// used totals the used bytes for the page
   410  			used := pageHeaderSize
   411  
   412  			if p.count != 0 {
   413  				// If page has any elements, add all element headers.
   414  				used += leafPageElementSize * uintptr(p.count-1)
   415  
   416  				// Add all element key, value sizes.
   417  				// The computation takes advantage of the fact that the position
   418  				// of the last element's key/value equals to the total of the sizes
   419  				// of all previous elements' keys and values.
   420  				// It also includes the last element's header.
   421  				lastElement := p.leafPageElement(p.count - 1)
   422  				used += uintptr(lastElement.pos + lastElement.ksize + lastElement.vsize)
   423  			}
   424  
   425  			if b.root == 0 {
   426  				// For inlined bucket just update the inline stats
   427  				s.InlineBucketInuse += int(used)
   428  			} else {
   429  				// For non-inlined bucket update all the leaf stats
   430  				s.LeafPageN++
   431  				s.LeafInuse += int(used)
   432  				s.LeafOverflowN += int(p.overflow)
   433  
   434  				// Collect stats from sub-buckets.
   435  				// Do that by iterating over all element headers
   436  				// looking for the ones with the bucketLeafFlag.
   437  				for i := uint16(0); i < p.count; i++ {
   438  					e := p.leafPageElement(i)
   439  					if (e.flags & bucketLeafFlag) != 0 {
   440  						// For any bucket element, open the element value
   441  						// and recursively call Stats on the contained bucket.
   442  						subStats.Add(b.openBucket(e.value()).Stats())
   443  					}
   444  				}
   445  			}
   446  		} else if (p.flags & branchPageFlag) != 0 {
   447  			s.BranchPageN++
   448  			lastElement := p.branchPageElement(p.count - 1)
   449  
   450  			// used totals the used bytes for the page
   451  			// Add header and all element headers.
   452  			used := pageHeaderSize + (branchPageElementSize * uintptr(p.count-1))
   453  
   454  			// Add size of all keys and values.
   455  			// Again, use the fact that last element's position equals to
   456  			// the total of key, value sizes of all previous elements.
   457  			used += uintptr(lastElement.pos + lastElement.ksize)
   458  			s.BranchInuse += int(used)
   459  			s.BranchOverflowN += int(p.overflow)
   460  		}
   461  
   462  		// Keep track of maximum page depth.
   463  		if depth+1 > s.Depth {
   464  			s.Depth = (depth + 1)
   465  		}
   466  	})
   467  
   468  	// Alloc stats can be computed from page counts and pageSize.
   469  	s.BranchAlloc = (s.BranchPageN + s.BranchOverflowN) * pageSize
   470  	s.LeafAlloc = (s.LeafPageN + s.LeafOverflowN) * pageSize
   471  
   472  	// Add the max depth of sub-buckets to get total nested depth.
   473  	s.Depth += subStats.Depth
   474  	// Add the stats for all sub-buckets
   475  	s.Add(subStats)
   476  	return s
   477  }
   478  
   479  // forEachPage iterates over every page in a bucket, including inline pages.
   480  func (b *Bucket) forEachPage(fn func(*page, int)) {
   481  	// If we have an inline page then just use that.
   482  	if b.page != nil {
   483  		fn(b.page, 0)
   484  		return
   485  	}
   486  
   487  	// Otherwise traverse the page hierarchy.
   488  	b.tx.forEachPage(b.root, 0, fn)
   489  }
   490  
   491  // forEachPageNode iterates over every page (or node) in a bucket.
   492  // This also includes inline pages.
   493  func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) {
   494  	// If we have an inline page or root node then just use that.
   495  	if b.page != nil {
   496  		fn(b.page, nil, 0)
   497  		return
   498  	}
   499  	b._forEachPageNode(b.root, 0, fn)
   500  }
   501  
   502  func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, int)) {
   503  	var p, n = b.pageNode(pgid)
   504  
   505  	// Execute function.
   506  	fn(p, n, depth)
   507  
   508  	// Recursively loop over children.
   509  	if p != nil {
   510  		if (p.flags & branchPageFlag) != 0 {
   511  			for i := 0; i < int(p.count); i++ {
   512  				elem := p.branchPageElement(uint16(i))
   513  				b._forEachPageNode(elem.pgid, depth+1, fn)
   514  			}
   515  		}
   516  	} else {
   517  		if !n.isLeaf {
   518  			for _, inode := range n.inodes {
   519  				b._forEachPageNode(inode.pgid, depth+1, fn)
   520  			}
   521  		}
   522  	}
   523  }
   524  
   525  // spill writes all the nodes for this bucket to dirty pages.
   526  func (b *Bucket) spill() error {
   527  	// Spill all child buckets first.
   528  	for name, child := range b.buckets {
   529  		// If the child bucket is small enough and it has no child buckets then
   530  		// write it inline into the parent bucket's page. Otherwise spill it
   531  		// like a normal bucket and make the parent value a pointer to the page.
   532  		var value []byte
   533  		if child.inlineable() {
   534  			child.free()
   535  			value = child.write()
   536  		} else {
   537  			if err := child.spill(); err != nil {
   538  				return err
   539  			}
   540  
   541  			// Update the child bucket header in this bucket.
   542  			value = make([]byte, unsafe.Sizeof(bucket{}))
   543  			var bucket = (*bucket)(unsafe.Pointer(&value[0]))
   544  			*bucket = *child.bucket
   545  		}
   546  
   547  		// Skip writing the bucket if there are no materialized nodes.
   548  		if child.rootNode == nil {
   549  			continue
   550  		}
   551  
   552  		// Update parent node.
   553  		var c = b.Cursor()
   554  		k, _, flags := c.seek([]byte(name))
   555  		if !bytes.Equal([]byte(name), k) {
   556  			panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k))
   557  		}
   558  		if flags&bucketLeafFlag == 0 {
   559  			panic(fmt.Sprintf("unexpected bucket header flag: %x", flags))
   560  		}
   561  		c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag)
   562  	}
   563  
   564  	// Ignore if there's not a materialized root node.
   565  	if b.rootNode == nil {
   566  		return nil
   567  	}
   568  
   569  	// Spill nodes.
   570  	if err := b.rootNode.spill(); err != nil {
   571  		return err
   572  	}
   573  	b.rootNode = b.rootNode.root()
   574  
   575  	// Update the root node for this bucket.
   576  	if b.rootNode.pgid >= b.tx.meta.pgid {
   577  		panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid))
   578  	}
   579  	b.root = b.rootNode.pgid
   580  
   581  	return nil
   582  }
   583  
   584  // inlineable returns true if a bucket is small enough to be written inline
   585  // and if it contains no subbuckets. Otherwise returns false.
   586  func (b *Bucket) inlineable() bool {
   587  	var n = b.rootNode
   588  
   589  	// Bucket must only contain a single leaf node.
   590  	if n == nil || !n.isLeaf {
   591  		return false
   592  	}
   593  
   594  	// Bucket is not inlineable if it contains subbuckets or if it goes beyond
   595  	// our threshold for inline bucket size.
   596  	var size = pageHeaderSize
   597  	for _, inode := range n.inodes {
   598  		size += leafPageElementSize + uintptr(len(inode.key)) + uintptr(len(inode.value))
   599  
   600  		if inode.flags&bucketLeafFlag != 0 {
   601  			return false
   602  		} else if size > b.maxInlineBucketSize() {
   603  			return false
   604  		}
   605  	}
   606  
   607  	return true
   608  }
   609  
   610  // Returns the maximum total size of a bucket to make it a candidate for inlining.
   611  func (b *Bucket) maxInlineBucketSize() uintptr {
   612  	return uintptr(b.tx.db.pageSize / 4)
   613  }
   614  
   615  // write allocates and writes a bucket to a byte slice.
   616  func (b *Bucket) write() []byte {
   617  	// Allocate the appropriate size.
   618  	var n = b.rootNode
   619  	var value = make([]byte, bucketHeaderSize+n.size())
   620  
   621  	// Write a bucket header.
   622  	var bucket = (*bucket)(unsafe.Pointer(&value[0]))
   623  	*bucket = *b.bucket
   624  
   625  	// Convert byte slice to a fake page and write the root node.
   626  	var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
   627  	n.write(p)
   628  
   629  	return value
   630  }
   631  
   632  // rebalance attempts to balance all nodes.
   633  func (b *Bucket) rebalance() {
   634  	for _, n := range b.nodes {
   635  		n.rebalance()
   636  	}
   637  	for _, child := range b.buckets {
   638  		child.rebalance()
   639  	}
   640  }
   641  
   642  // node creates a node from a page and associates it with a given parent.
   643  func (b *Bucket) node(pgid pgid, parent *node) *node {
   644  	_assert(b.nodes != nil, "nodes map expected")
   645  
   646  	// Retrieve node if it's already been created.
   647  	if n := b.nodes[pgid]; n != nil {
   648  		return n
   649  	}
   650  
   651  	// Otherwise create a node and cache it.
   652  	n := &node{bucket: b, parent: parent}
   653  	if parent == nil {
   654  		b.rootNode = n
   655  	} else {
   656  		parent.children = append(parent.children, n)
   657  	}
   658  
   659  	// Use the inline page if this is an inline bucket.
   660  	var p = b.page
   661  	if p == nil {
   662  		p = b.tx.page(pgid)
   663  	}
   664  
   665  	// Read the page into the node and cache it.
   666  	n.read(p)
   667  	b.nodes[pgid] = n
   668  
   669  	// Update statistics.
   670  	b.tx.stats.NodeCount++
   671  
   672  	return n
   673  }
   674  
   675  // free recursively frees all pages in the bucket.
   676  func (b *Bucket) free() {
   677  	if b.root == 0 {
   678  		return
   679  	}
   680  
   681  	var tx = b.tx
   682  	b.forEachPageNode(func(p *page, n *node, _ int) {
   683  		if p != nil {
   684  			tx.db.freelist.free(tx.meta.txid, p)
   685  		} else {
   686  			n.free()
   687  		}
   688  	})
   689  	b.root = 0
   690  }
   691  
   692  // dereference removes all references to the old mmap.
   693  func (b *Bucket) dereference() {
   694  	if b.rootNode != nil {
   695  		b.rootNode.root().dereference()
   696  	}
   697  
   698  	for _, child := range b.buckets {
   699  		child.dereference()
   700  	}
   701  }
   702  
   703  // pageNode returns the in-memory node, if it exists.
   704  // Otherwise returns the underlying page.
   705  func (b *Bucket) pageNode(id pgid) (*page, *node) {
   706  	// Inline buckets have a fake page embedded in their value so treat them
   707  	// differently. We'll return the rootNode (if available) or the fake page.
   708  	if b.root == 0 {
   709  		if id != 0 {
   710  			panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
   711  		}
   712  		if b.rootNode != nil {
   713  			return nil, b.rootNode
   714  		}
   715  		return b.page, nil
   716  	}
   717  
   718  	// Check the node cache for non-inline buckets.
   719  	if b.nodes != nil {
   720  		if n := b.nodes[id]; n != nil {
   721  			return nil, n
   722  		}
   723  	}
   724  
   725  	// Finally lookup the page from the transaction if no node is materialized.
   726  	return b.tx.page(id), nil
   727  }
   728  
   729  // BucketStats records statistics about resources used by a bucket.
   730  type BucketStats struct {
   731  	// Page count statistics.
   732  	BranchPageN     int // number of logical branch pages
   733  	BranchOverflowN int // number of physical branch overflow pages
   734  	LeafPageN       int // number of logical leaf pages
   735  	LeafOverflowN   int // number of physical leaf overflow pages
   736  
   737  	// Tree statistics.
   738  	KeyN  int // number of keys/value pairs
   739  	Depth int // number of levels in B+tree
   740  
   741  	// Page size utilization.
   742  	BranchAlloc int // bytes allocated for physical branch pages
   743  	BranchInuse int // bytes actually used for branch data
   744  	LeafAlloc   int // bytes allocated for physical leaf pages
   745  	LeafInuse   int // bytes actually used for leaf data
   746  
   747  	// Bucket statistics
   748  	BucketN           int // total number of buckets including the top bucket
   749  	InlineBucketN     int // total number on inlined buckets
   750  	InlineBucketInuse int // bytes used for inlined buckets (also accounted for in LeafInuse)
   751  }
   752  
   753  func (s *BucketStats) Add(other BucketStats) {
   754  	s.BranchPageN += other.BranchPageN
   755  	s.BranchOverflowN += other.BranchOverflowN
   756  	s.LeafPageN += other.LeafPageN
   757  	s.LeafOverflowN += other.LeafOverflowN
   758  	s.KeyN += other.KeyN
   759  	if s.Depth < other.Depth {
   760  		s.Depth = other.Depth
   761  	}
   762  	s.BranchAlloc += other.BranchAlloc
   763  	s.BranchInuse += other.BranchInuse
   764  	s.LeafAlloc += other.LeafAlloc
   765  	s.LeafInuse += other.LeafInuse
   766  
   767  	s.BucketN += other.BucketN
   768  	s.InlineBucketN += other.InlineBucketN
   769  	s.InlineBucketInuse += other.InlineBucketInuse
   770  }
   771  
   772  // cloneBytes returns a copy of a given slice.
   773  func cloneBytes(v []byte) []byte {
   774  	var clone = make([]byte, len(v))
   775  	copy(clone, v)
   776  	return clone
   777  }