github.com/decred/dcrlnd@v0.7.6/kvdb/bolt_compact.go (about)

     1  // The code in this file is an adapted version of the bbolt compact command
     2  // implemented in this file:
     3  // https://github.com/etcd-io/bbolt/blob/master/cmd/bbolt/main.go
     4  
     5  //go:build !js
     6  // +build !js
     7  
     8  package kvdb
     9  
    10  import (
    11  	"encoding/hex"
    12  	"fmt"
    13  	"os"
    14  	"path"
    15  	"time"
    16  
    17  	"github.com/decred/dcrlnd/healthcheck"
    18  	bolt "go.etcd.io/bbolt"
    19  )
    20  
    21  const (
    22  	// defaultResultFileSizeMultiplier is the default multiplier we apply to
    23  	// the current database size to calculate how big it could possibly get
    24  	// after compacting, in case the database is already at its optimal size
    25  	// and compaction causes it to grow. This should normally not be the
    26  	// case but we really want to avoid not having enough disk space for the
    27  	// compaction, so we apply a safety margin of 10%.
    28  	defaultResultFileSizeMultiplier = float64(1.1)
    29  
    30  	// defaultTxMaxSize is the default maximum number of operations that
    31  	// are allowed to be executed in a single transaction.
    32  	defaultTxMaxSize = 65536
    33  
    34  	// bucketFillSize is the fill size setting that is used for each new
    35  	// bucket that is created in the compacted database. This setting is not
    36  	// persisted and is therefore only effective for the compaction itself.
    37  	// Because during the compaction we only append data a fill percent of
    38  	// 100% is optimal for performance.
    39  	bucketFillSize = 1.0
    40  )
    41  
    42  type compacter struct {
    43  	srcPath   string
    44  	dstPath   string
    45  	txMaxSize int64
    46  
    47  	// dbTimeout specifies the timeout value used when opening the db.
    48  	dbTimeout time.Duration
    49  }
    50  
    51  // execute opens the source and destination databases and then compacts the
    52  // source into destination and returns the size of both files as a result.
    53  func (cmd *compacter) execute() (int64, int64, error) {
    54  	if cmd.txMaxSize == 0 {
    55  		cmd.txMaxSize = defaultTxMaxSize
    56  	}
    57  
    58  	// Ensure source file exists.
    59  	fi, err := os.Stat(cmd.srcPath)
    60  	if err != nil {
    61  		return 0, 0, fmt.Errorf("error determining source database "+
    62  			"size: %v", err)
    63  	}
    64  	initialSize := fi.Size()
    65  	marginSize := float64(initialSize) * defaultResultFileSizeMultiplier
    66  
    67  	// Before opening any of the databases, let's first make sure we have
    68  	// enough free space on the destination file system to create a full
    69  	// copy of the source DB (worst-case scenario if the compaction doesn't
    70  	// actually shrink the file size).
    71  	destFolder := path.Dir(cmd.dstPath)
    72  	freeSpace, err := healthcheck.AvailableDiskSpace(destFolder)
    73  	if err != nil {
    74  		return 0, 0, fmt.Errorf("error determining free disk space on "+
    75  			"%s: %v", destFolder, err)
    76  	}
    77  	log.Debugf("Free disk space on compaction destination file system: "+
    78  		"%d bytes", freeSpace)
    79  	if freeSpace < uint64(marginSize) {
    80  		return 0, 0, fmt.Errorf("could not start compaction, "+
    81  			"destination folder %s only has %d bytes of free disk "+
    82  			"space available while we need at least %d for worst-"+
    83  			"case compaction", destFolder, freeSpace, uint64(marginSize))
    84  	}
    85  
    86  	// Open source database. We open it in read only mode to avoid (and fix)
    87  	// possible freelist sync problems.
    88  	src, err := bolt.Open(cmd.srcPath, 0444, &bolt.Options{
    89  		ReadOnly: true,
    90  		Timeout:  cmd.dbTimeout,
    91  	})
    92  	if err != nil {
    93  		return 0, 0, fmt.Errorf("error opening source database: %v",
    94  			err)
    95  	}
    96  	defer func() {
    97  		if err := src.Close(); err != nil {
    98  			log.Errorf("Compact error: closing source DB: %v", err)
    99  		}
   100  	}()
   101  
   102  	// Open destination database.
   103  	dst, err := bolt.Open(cmd.dstPath, fi.Mode(), &bolt.Options{
   104  		Timeout: cmd.dbTimeout,
   105  	})
   106  	if err != nil {
   107  		return 0, 0, fmt.Errorf("error opening destination database: "+
   108  			"%v", err)
   109  	}
   110  	defer func() {
   111  		if err := dst.Close(); err != nil {
   112  			log.Errorf("Compact error: closing dest DB: %v", err)
   113  		}
   114  	}()
   115  
   116  	// Run compaction.
   117  	if err := cmd.compact(dst, src); err != nil {
   118  		return 0, 0, fmt.Errorf("error running compaction: %v", err)
   119  	}
   120  
   121  	// Report stats on new size.
   122  	fi, err = os.Stat(cmd.dstPath)
   123  	if err != nil {
   124  		return 0, 0, fmt.Errorf("error determining destination "+
   125  			"database size: %v", err)
   126  	} else if fi.Size() == 0 {
   127  		return 0, 0, fmt.Errorf("zero db size")
   128  	}
   129  
   130  	return initialSize, fi.Size(), nil
   131  }
   132  
   133  // compact tries to create a compacted copy of the source database in a new
   134  // destination database.
   135  func (cmd *compacter) compact(dst, src *bolt.DB) error {
   136  	// Commit regularly, or we'll run out of memory for large datasets if
   137  	// using one transaction.
   138  	var size int64
   139  	tx, err := dst.Begin(true)
   140  	if err != nil {
   141  		return err
   142  	}
   143  	defer func() {
   144  		_ = tx.Rollback()
   145  	}()
   146  
   147  	if err := cmd.walk(src, func(keys [][]byte, k, v []byte, seq uint64) error {
   148  		// On each key/value, check if we have exceeded tx size.
   149  		sz := int64(len(k) + len(v))
   150  		if size+sz > cmd.txMaxSize && cmd.txMaxSize != 0 {
   151  			// Commit previous transaction.
   152  			if err := tx.Commit(); err != nil {
   153  				return err
   154  			}
   155  
   156  			// Start new transaction.
   157  			tx, err = dst.Begin(true)
   158  			if err != nil {
   159  				return err
   160  			}
   161  			size = 0
   162  		}
   163  		size += sz
   164  
   165  		// Create bucket on the root transaction if this is the first
   166  		// level.
   167  		nk := len(keys)
   168  		if nk == 0 {
   169  			bkt, err := tx.CreateBucket(k)
   170  			if err != nil {
   171  				return err
   172  			}
   173  			if err := bkt.SetSequence(seq); err != nil {
   174  				return err
   175  			}
   176  			return nil
   177  		}
   178  
   179  		// Create buckets on subsequent levels, if necessary.
   180  		b := tx.Bucket(keys[0])
   181  		if nk > 1 {
   182  			for _, k := range keys[1:] {
   183  				b = b.Bucket(k)
   184  			}
   185  		}
   186  
   187  		// Fill the entire page for best compaction.
   188  		b.FillPercent = bucketFillSize
   189  
   190  		// If there is no value then this is a bucket call.
   191  		if v == nil {
   192  			bkt, err := b.CreateBucket(k)
   193  			if err != nil {
   194  				return err
   195  			}
   196  			if err := bkt.SetSequence(seq); err != nil {
   197  				return err
   198  			}
   199  			return nil
   200  		}
   201  
   202  		// Otherwise treat it as a key/value pair.
   203  		return b.Put(k, v)
   204  	}); err != nil {
   205  		return err
   206  	}
   207  
   208  	return tx.Commit()
   209  }
   210  
   211  // walkFunc is the type of the function called for keys (buckets and "normal"
   212  // values) discovered by Walk. keys is the list of keys to descend to the bucket
   213  // owning the discovered key/value pair k/v.
   214  type walkFunc func(keys [][]byte, k, v []byte, seq uint64) error
   215  
   216  // walk walks recursively the bolt database db, calling walkFn for each key it
   217  // finds.
   218  func (cmd *compacter) walk(db *bolt.DB, walkFn walkFunc) error {
   219  	return db.View(func(tx *bolt.Tx) error {
   220  		return tx.ForEach(func(name []byte, b *bolt.Bucket) error {
   221  			// This will log the top level buckets only to give the
   222  			// user some sense of progress.
   223  			log.Debugf("Compacting top level bucket '%s'",
   224  				LoggableKeyName(name))
   225  
   226  			return cmd.walkBucket(
   227  				b, nil, name, nil, b.Sequence(), walkFn,
   228  			)
   229  		})
   230  	})
   231  }
   232  
   233  // LoggableKeyName returns a printable name of the given key.
   234  func LoggableKeyName(key []byte) string {
   235  	strKey := string(key)
   236  	if hasSpecialChars(strKey) {
   237  		return hex.EncodeToString(key)
   238  	}
   239  
   240  	return strKey
   241  }
   242  
   243  // hasSpecialChars returns true if any of the characters in the given string
   244  // cannot be printed.
   245  func hasSpecialChars(s string) bool {
   246  	for _, b := range s {
   247  		if !(b >= 'a' && b <= 'z') && !(b >= 'A' && b <= 'Z') &&
   248  			!(b >= '0' && b <= '9') && b != '-' && b != '_' {
   249  
   250  			return true
   251  		}
   252  	}
   253  
   254  	return false
   255  }
   256  
   257  // walkBucket recursively walks through a bucket.
   258  func (cmd *compacter) walkBucket(b *bolt.Bucket, keyPath [][]byte, k, v []byte,
   259  	seq uint64, fn walkFunc) error {
   260  
   261  	// Execute callback.
   262  	if err := fn(keyPath, k, v, seq); err != nil {
   263  		return err
   264  	}
   265  
   266  	// If this is not a bucket then stop.
   267  	if v != nil {
   268  		return nil
   269  	}
   270  
   271  	// Iterate over each child key/value.
   272  	keyPath = append(keyPath, k)
   273  	return b.ForEach(func(k, v []byte) error {
   274  		if v == nil {
   275  			bkt := b.Bucket(k)
   276  			return cmd.walkBucket(
   277  				bkt, keyPath, k, nil, bkt.Sequence(), fn,
   278  			)
   279  		}
   280  		return cmd.walkBucket(b, keyPath, k, v, b.Sequence(), fn)
   281  	})
   282  }