github.com/jincm/wesharechain@v0.0.0-20210122032815-1537409ce26a/chain/swarm/storage/localstore/gc.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 /* 18 Counting number of items in garbage collection index 19 20 The number of items in garbage collection index is not the same as the number of 21 chunks in retrieval index (total number of stored chunks). Chunk can be garbage 22 collected only when it is set to a synced state by ModSetSync, and only then can 23 be counted into garbage collection size, which determines whether a number of 24 chunk should be removed from the storage by the garbage collection. This opens a 25 possibility that the storage size exceeds the limit if files are locally 26 uploaded and the node is not connected to other nodes or there is a problem with 27 syncing. 28 29 Tracking of garbage collection size (gcSize) is focused on performance. Key 30 points: 31 32 1. counting the number of key/value pairs in LevelDB takes around 0.7s for 1e6 33 on a very fast ssd (unacceptable long time in reality) 34 2. locking leveldb batch writes with a global mutex (serial batch writes) is 35 not acceptable, we should use locking per chunk address 36 37 Because of point 1. we cannot count the number of items in garbage collection 38 index in New constructor as it could last very long for realistic scenarios 39 where limit is 5e6 and nodes are running on slower hdd disks or cloud providers 40 with low IOPS. 41 42 Point 2. is a performance optimization to allow parallel batch writes with 43 getters, putters and setters. Every single batch that they create contain only 44 information related to a single chunk, no relations with other chunks or shared 45 statistical data (like gcSize). This approach avoids race conditions on writing 46 batches in parallel, but creates a problem of synchronizing statistical data 47 values like gcSize. With global mutex lock, any data could be written by any 48 batch, but would not use utilize the full potential of leveldb parallel writes. 49 50 To mitigate this two problems, the implementation of counting and persisting 51 gcSize is split into two parts. One is the in-memory value (gcSize) that is fast 52 to read and write with a dedicated mutex (gcSizeMu) if the batch which adds or 53 removes items from garbage collection index is successful. The second part is 54 the reliable persistence of this value to leveldb database, as storedGCSize 55 field. This database field is saved by writeGCSizeWorker and writeGCSize 56 functions when in-memory gcSize variable is changed, but no too often to avoid 57 very frequent database writes. This database writes are triggered by 58 writeGCSizeTrigger when a call is made to function incGCSize. Trigger ensures 59 that no database writes are done only when gcSize is changed (contrary to a 60 simpler periodic writes or checks). A backoff of 10s in writeGCSizeWorker 61 ensures that no frequent batch writes are made. Saving the storedGCSize on 62 database Close function ensures that in-memory gcSize is persisted when database 63 is closed. 64 65 This persistence must be resilient to failures like panics. For this purpose, a 66 collection of hashes that are added to the garbage collection index, but still 67 not persisted to storedGCSize, must be tracked to count them in when DB is 68 constructed again with New function after the failure (swarm node restarts). On 69 every batch write that adds a new item to garbage collection index, the same 70 hash is added to gcUncountedHashesIndex. This ensures that there is a persisted 71 information which hashes were added to the garbage collection index. But, when 72 the storedGCSize is saved by writeGCSize function, this values are removed in 73 the same batch in which storedGCSize is changed to ensure consistency. When the 74 panic happen, or database Close method is not saved. The database storage 75 contains all information to reliably and efficiently get the correct number of 76 items in garbage collection index. This is performed in the New function when 77 all hashes in gcUncountedHashesIndex are counted, added to the storedGCSize and 78 saved to the disk before the database is constructed again. Index 79 gcUncountedHashesIndex is acting as dirty bit for recovery that provides 80 information what needs to be corrected. With a simple dirty bit, the whole 81 garbage collection index should me counted on recovery instead only the items in 82 gcUncountedHashesIndex. Because of the triggering mechanizm of writeGCSizeWorker 83 and relatively short backoff time, the number of hashes in 84 gcUncountedHashesIndex should be low and it should take a very short time to 85 recover from the previous failure. If there was no failure and 86 gcUncountedHashesIndex is empty, which is the usual case, New function will take 87 the minimal time to return. 88 */ 89 90 package localstore 91 92 import ( 93 "time" 94 95 "github.com/ethereum/go-ethereum/log" 96 "github.com/ethereum/go-ethereum/swarm/shed" 97 "github.com/syndtr/goleveldb/leveldb" 98 ) 99 100 var ( 101 // gcTargetRatio defines the target number of items 102 // in garbage collection index that will not be removed 103 // on garbage collection. The target number of items 104 // is calculated by gcTarget function. This value must be 105 // in range (0,1]. For example, with 0.9 value, 106 // garbage collection will leave 90% of defined capacity 107 // in database after its run. This prevents frequent 108 // garbage collection runs. 109 gcTargetRatio = 0.9 110 // gcBatchSize limits the number of chunks in a single 111 // leveldb batch on garbage collection. 112 gcBatchSize int64 = 1000 113 ) 114 115 // collectGarbageWorker is a long running function that waits for 116 // collectGarbageTrigger channel to signal a garbage collection 117 // run. GC run iterates on gcIndex and removes older items 118 // form retrieval and other indexes. 119 func (db *DB) collectGarbageWorker() { 120 defer close(db.collectGarbageWorkerDone) 121 122 for { 123 select { 124 case <-db.collectGarbageTrigger: 125 // run a single collect garbage run and 126 // if done is false, gcBatchSize is reached and 127 // another collect garbage run is needed 128 collectedCount, done, err := db.collectGarbage() 129 if err != nil { 130 log.Error("localstore collect garbage", "err", err) 131 } 132 // check if another gc run is needed 133 if !done { 134 db.triggerGarbageCollection() 135 } 136 137 if collectedCount > 0 && testHookCollectGarbage != nil { 138 testHookCollectGarbage(collectedCount) 139 } 140 case <-db.close: 141 return 142 } 143 } 144 } 145 146 // collectGarbage removes chunks from retrieval and other 147 // indexes if maximal number of chunks in database is reached. 148 // This function returns the number of removed chunks. If done 149 // is false, another call to this function is needed to collect 150 // the rest of the garbage as the batch size limit is reached. 151 // This function is called in collectGarbageWorker. 152 func (db *DB) collectGarbage() (collectedCount int64, done bool, err error) { 153 batch := new(leveldb.Batch) 154 target := db.gcTarget() 155 156 done = true 157 err = db.gcIndex.Iterate(func(item shed.Item) (stop bool, err error) { 158 // protect parallel updates 159 unlock, err := db.lockAddr(item.Address) 160 if err != nil { 161 return false, err 162 } 163 defer unlock() 164 165 gcSize := db.getGCSize() 166 if gcSize-collectedCount <= target { 167 return true, nil 168 } 169 // delete from retrieve, pull, gc 170 db.retrievalDataIndex.DeleteInBatch(batch, item) 171 db.retrievalAccessIndex.DeleteInBatch(batch, item) 172 db.pullIndex.DeleteInBatch(batch, item) 173 db.gcIndex.DeleteInBatch(batch, item) 174 collectedCount++ 175 if collectedCount >= gcBatchSize { 176 // bach size limit reached, 177 // another gc run is needed 178 done = false 179 return true, nil 180 } 181 return false, nil 182 }, nil) 183 if err != nil { 184 return 0, false, err 185 } 186 187 err = db.shed.WriteBatch(batch) 188 if err != nil { 189 return 0, false, err 190 } 191 // batch is written, decrement gcSize 192 db.incGCSize(-collectedCount) 193 return collectedCount, done, nil 194 } 195 196 // gcTrigger retruns the absolute value for garbage collection 197 // target value, calculated from db.capacity and gcTargetRatio. 198 func (db *DB) gcTarget() (target int64) { 199 return int64(float64(db.capacity) * gcTargetRatio) 200 } 201 202 // incGCSize increments gcSize by the provided number. 203 // If count is negative, it will decrement gcSize. 204 func (db *DB) incGCSize(count int64) { 205 if count == 0 { 206 return 207 } 208 209 db.gcSizeMu.Lock() 210 new := db.gcSize + count 211 db.gcSize = new 212 db.gcSizeMu.Unlock() 213 214 select { 215 case db.writeGCSizeTrigger <- struct{}{}: 216 default: 217 } 218 if new >= db.capacity { 219 db.triggerGarbageCollection() 220 } 221 } 222 223 // getGCSize returns gcSize value by locking it 224 // with gcSizeMu mutex. 225 func (db *DB) getGCSize() (count int64) { 226 db.gcSizeMu.RLock() 227 count = db.gcSize 228 db.gcSizeMu.RUnlock() 229 return count 230 } 231 232 // triggerGarbageCollection signals collectGarbageWorker 233 // to call collectGarbage. 234 func (db *DB) triggerGarbageCollection() { 235 select { 236 case db.collectGarbageTrigger <- struct{}{}: 237 case <-db.close: 238 default: 239 } 240 } 241 242 // writeGCSizeWorker writes gcSize on trigger event 243 // and waits writeGCSizeDelay after each write. 244 // It implements a linear backoff with delay of 245 // writeGCSizeDelay duration to avoid very frequent 246 // database operations. 247 func (db *DB) writeGCSizeWorker() { 248 defer close(db.writeGCSizeWorkerDone) 249 250 for { 251 select { 252 case <-db.writeGCSizeTrigger: 253 err := db.writeGCSize(db.getGCSize()) 254 if err != nil { 255 log.Error("localstore write gc size", "err", err) 256 } 257 // Wait some time before writing gc size in the next 258 // iteration. This prevents frequent I/O operations. 259 select { 260 case <-time.After(10 * time.Second): 261 case <-db.close: 262 return 263 } 264 case <-db.close: 265 return 266 } 267 } 268 } 269 270 // writeGCSize stores the number of items in gcIndex. 271 // It removes all hashes from gcUncountedHashesIndex 272 // not to include them on the next DB initialization 273 // (New function) when gcSize is counted. 274 func (db *DB) writeGCSize(gcSize int64) (err error) { 275 const maxBatchSize = 1000 276 277 batch := new(leveldb.Batch) 278 db.storedGCSize.PutInBatch(batch, uint64(gcSize)) 279 batchSize := 1 280 281 // use only one iterator as it acquires its snapshot 282 // not to remove hashes from index that are added 283 // after stored gc size is written 284 err = db.gcUncountedHashesIndex.Iterate(func(item shed.Item) (stop bool, err error) { 285 db.gcUncountedHashesIndex.DeleteInBatch(batch, item) 286 batchSize++ 287 if batchSize >= maxBatchSize { 288 err = db.shed.WriteBatch(batch) 289 if err != nil { 290 return false, err 291 } 292 batch.Reset() 293 batchSize = 0 294 } 295 return false, nil 296 }, nil) 297 if err != nil { 298 return err 299 } 300 return db.shed.WriteBatch(batch) 301 } 302 303 // testHookCollectGarbage is a hook that can provide 304 // information when a garbage collection run is done 305 // and how many items it removed. 306 var testHookCollectGarbage func(collectedCount int64)