github.com/decred/dcrlnd@v0.7.6/kvdb/bolt_compact.go (about) 1 // The code in this file is an adapted version of the bbolt compact command 2 // implemented in this file: 3 // https://github.com/etcd-io/bbolt/blob/master/cmd/bbolt/main.go 4 5 //go:build !js 6 // +build !js 7 8 package kvdb 9 10 import ( 11 "encoding/hex" 12 "fmt" 13 "os" 14 "path" 15 "time" 16 17 "github.com/decred/dcrlnd/healthcheck" 18 bolt "go.etcd.io/bbolt" 19 ) 20 21 const ( 22 // defaultResultFileSizeMultiplier is the default multiplier we apply to 23 // the current database size to calculate how big it could possibly get 24 // after compacting, in case the database is already at its optimal size 25 // and compaction causes it to grow. This should normally not be the 26 // case but we really want to avoid not having enough disk space for the 27 // compaction, so we apply a safety margin of 10%. 28 defaultResultFileSizeMultiplier = float64(1.1) 29 30 // defaultTxMaxSize is the default maximum number of operations that 31 // are allowed to be executed in a single transaction. 32 defaultTxMaxSize = 65536 33 34 // bucketFillSize is the fill size setting that is used for each new 35 // bucket that is created in the compacted database. This setting is not 36 // persisted and is therefore only effective for the compaction itself. 37 // Because during the compaction we only append data a fill percent of 38 // 100% is optimal for performance. 39 bucketFillSize = 1.0 40 ) 41 42 type compacter struct { 43 srcPath string 44 dstPath string 45 txMaxSize int64 46 47 // dbTimeout specifies the timeout value used when opening the db. 48 dbTimeout time.Duration 49 } 50 51 // execute opens the source and destination databases and then compacts the 52 // source into destination and returns the size of both files as a result. 53 func (cmd *compacter) execute() (int64, int64, error) { 54 if cmd.txMaxSize == 0 { 55 cmd.txMaxSize = defaultTxMaxSize 56 } 57 58 // Ensure source file exists. 59 fi, err := os.Stat(cmd.srcPath) 60 if err != nil { 61 return 0, 0, fmt.Errorf("error determining source database "+ 62 "size: %v", err) 63 } 64 initialSize := fi.Size() 65 marginSize := float64(initialSize) * defaultResultFileSizeMultiplier 66 67 // Before opening any of the databases, let's first make sure we have 68 // enough free space on the destination file system to create a full 69 // copy of the source DB (worst-case scenario if the compaction doesn't 70 // actually shrink the file size). 71 destFolder := path.Dir(cmd.dstPath) 72 freeSpace, err := healthcheck.AvailableDiskSpace(destFolder) 73 if err != nil { 74 return 0, 0, fmt.Errorf("error determining free disk space on "+ 75 "%s: %v", destFolder, err) 76 } 77 log.Debugf("Free disk space on compaction destination file system: "+ 78 "%d bytes", freeSpace) 79 if freeSpace < uint64(marginSize) { 80 return 0, 0, fmt.Errorf("could not start compaction, "+ 81 "destination folder %s only has %d bytes of free disk "+ 82 "space available while we need at least %d for worst-"+ 83 "case compaction", destFolder, freeSpace, uint64(marginSize)) 84 } 85 86 // Open source database. We open it in read only mode to avoid (and fix) 87 // possible freelist sync problems. 88 src, err := bolt.Open(cmd.srcPath, 0444, &bolt.Options{ 89 ReadOnly: true, 90 Timeout: cmd.dbTimeout, 91 }) 92 if err != nil { 93 return 0, 0, fmt.Errorf("error opening source database: %v", 94 err) 95 } 96 defer func() { 97 if err := src.Close(); err != nil { 98 log.Errorf("Compact error: closing source DB: %v", err) 99 } 100 }() 101 102 // Open destination database. 103 dst, err := bolt.Open(cmd.dstPath, fi.Mode(), &bolt.Options{ 104 Timeout: cmd.dbTimeout, 105 }) 106 if err != nil { 107 return 0, 0, fmt.Errorf("error opening destination database: "+ 108 "%v", err) 109 } 110 defer func() { 111 if err := dst.Close(); err != nil { 112 log.Errorf("Compact error: closing dest DB: %v", err) 113 } 114 }() 115 116 // Run compaction. 117 if err := cmd.compact(dst, src); err != nil { 118 return 0, 0, fmt.Errorf("error running compaction: %v", err) 119 } 120 121 // Report stats on new size. 122 fi, err = os.Stat(cmd.dstPath) 123 if err != nil { 124 return 0, 0, fmt.Errorf("error determining destination "+ 125 "database size: %v", err) 126 } else if fi.Size() == 0 { 127 return 0, 0, fmt.Errorf("zero db size") 128 } 129 130 return initialSize, fi.Size(), nil 131 } 132 133 // compact tries to create a compacted copy of the source database in a new 134 // destination database. 135 func (cmd *compacter) compact(dst, src *bolt.DB) error { 136 // Commit regularly, or we'll run out of memory for large datasets if 137 // using one transaction. 138 var size int64 139 tx, err := dst.Begin(true) 140 if err != nil { 141 return err 142 } 143 defer func() { 144 _ = tx.Rollback() 145 }() 146 147 if err := cmd.walk(src, func(keys [][]byte, k, v []byte, seq uint64) error { 148 // On each key/value, check if we have exceeded tx size. 149 sz := int64(len(k) + len(v)) 150 if size+sz > cmd.txMaxSize && cmd.txMaxSize != 0 { 151 // Commit previous transaction. 152 if err := tx.Commit(); err != nil { 153 return err 154 } 155 156 // Start new transaction. 157 tx, err = dst.Begin(true) 158 if err != nil { 159 return err 160 } 161 size = 0 162 } 163 size += sz 164 165 // Create bucket on the root transaction if this is the first 166 // level. 167 nk := len(keys) 168 if nk == 0 { 169 bkt, err := tx.CreateBucket(k) 170 if err != nil { 171 return err 172 } 173 if err := bkt.SetSequence(seq); err != nil { 174 return err 175 } 176 return nil 177 } 178 179 // Create buckets on subsequent levels, if necessary. 180 b := tx.Bucket(keys[0]) 181 if nk > 1 { 182 for _, k := range keys[1:] { 183 b = b.Bucket(k) 184 } 185 } 186 187 // Fill the entire page for best compaction. 188 b.FillPercent = bucketFillSize 189 190 // If there is no value then this is a bucket call. 191 if v == nil { 192 bkt, err := b.CreateBucket(k) 193 if err != nil { 194 return err 195 } 196 if err := bkt.SetSequence(seq); err != nil { 197 return err 198 } 199 return nil 200 } 201 202 // Otherwise treat it as a key/value pair. 203 return b.Put(k, v) 204 }); err != nil { 205 return err 206 } 207 208 return tx.Commit() 209 } 210 211 // walkFunc is the type of the function called for keys (buckets and "normal" 212 // values) discovered by Walk. keys is the list of keys to descend to the bucket 213 // owning the discovered key/value pair k/v. 214 type walkFunc func(keys [][]byte, k, v []byte, seq uint64) error 215 216 // walk walks recursively the bolt database db, calling walkFn for each key it 217 // finds. 218 func (cmd *compacter) walk(db *bolt.DB, walkFn walkFunc) error { 219 return db.View(func(tx *bolt.Tx) error { 220 return tx.ForEach(func(name []byte, b *bolt.Bucket) error { 221 // This will log the top level buckets only to give the 222 // user some sense of progress. 223 log.Debugf("Compacting top level bucket '%s'", 224 LoggableKeyName(name)) 225 226 return cmd.walkBucket( 227 b, nil, name, nil, b.Sequence(), walkFn, 228 ) 229 }) 230 }) 231 } 232 233 // LoggableKeyName returns a printable name of the given key. 234 func LoggableKeyName(key []byte) string { 235 strKey := string(key) 236 if hasSpecialChars(strKey) { 237 return hex.EncodeToString(key) 238 } 239 240 return strKey 241 } 242 243 // hasSpecialChars returns true if any of the characters in the given string 244 // cannot be printed. 245 func hasSpecialChars(s string) bool { 246 for _, b := range s { 247 if !(b >= 'a' && b <= 'z') && !(b >= 'A' && b <= 'Z') && 248 !(b >= '0' && b <= '9') && b != '-' && b != '_' { 249 250 return true 251 } 252 } 253 254 return false 255 } 256 257 // walkBucket recursively walks through a bucket. 258 func (cmd *compacter) walkBucket(b *bolt.Bucket, keyPath [][]byte, k, v []byte, 259 seq uint64, fn walkFunc) error { 260 261 // Execute callback. 262 if err := fn(keyPath, k, v, seq); err != nil { 263 return err 264 } 265 266 // If this is not a bucket then stop. 267 if v != nil { 268 return nil 269 } 270 271 // Iterate over each child key/value. 272 keyPath = append(keyPath, k) 273 return b.ForEach(func(k, v []byte) error { 274 if v == nil { 275 bkt := b.Bucket(k) 276 return cmd.walkBucket( 277 bkt, keyPath, k, nil, bkt.Sequence(), fn, 278 ) 279 } 280 return cmd.walkBucket(b, keyPath, k, v, b.Sequence(), fn) 281 }) 282 }