github.com/daeglee/go-ethereum@v0.0.0-20190504220456-cad3e8d18e9b/swarm/storage/localstore/gc.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package localstore 18 19 import ( 20 "github.com/ethereum/go-ethereum/log" 21 "github.com/ethereum/go-ethereum/swarm/shed" 22 "github.com/syndtr/goleveldb/leveldb" 23 ) 24 25 var ( 26 // gcTargetRatio defines the target number of items 27 // in garbage collection index that will not be removed 28 // on garbage collection. The target number of items 29 // is calculated by gcTarget function. This value must be 30 // in range (0,1]. For example, with 0.9 value, 31 // garbage collection will leave 90% of defined capacity 32 // in database after its run. This prevents frequent 33 // garbage collection runs. 34 gcTargetRatio = 0.9 35 // gcBatchSize limits the number of chunks in a single 36 // leveldb batch on garbage collection. 37 gcBatchSize uint64 = 1000 38 ) 39 40 // collectGarbageWorker is a long running function that waits for 41 // collectGarbageTrigger channel to signal a garbage collection 42 // run. GC run iterates on gcIndex and removes older items 43 // form retrieval and other indexes. 44 func (db *DB) collectGarbageWorker() { 45 defer close(db.collectGarbageWorkerDone) 46 47 for { 48 select { 49 case <-db.collectGarbageTrigger: 50 // run a single collect garbage run and 51 // if done is false, gcBatchSize is reached and 52 // another collect garbage run is needed 53 collectedCount, done, err := db.collectGarbage() 54 if err != nil { 55 log.Error("localstore collect garbage", "err", err) 56 } 57 // check if another gc run is needed 58 if !done { 59 db.triggerGarbageCollection() 60 } 61 62 if collectedCount > 0 && testHookCollectGarbage != nil { 63 testHookCollectGarbage(collectedCount) 64 } 65 case <-db.close: 66 return 67 } 68 } 69 } 70 71 // collectGarbage removes chunks from retrieval and other 72 // indexes if maximal number of chunks in database is reached. 73 // This function returns the number of removed chunks. If done 74 // is false, another call to this function is needed to collect 75 // the rest of the garbage as the batch size limit is reached. 76 // This function is called in collectGarbageWorker. 77 func (db *DB) collectGarbage() (collectedCount uint64, done bool, err error) { 78 batch := new(leveldb.Batch) 79 target := db.gcTarget() 80 81 // protect database from changing idexes and gcSize 82 db.batchMu.Lock() 83 defer db.batchMu.Unlock() 84 85 gcSize, err := db.gcSize.Get() 86 if err != nil { 87 return 0, true, err 88 } 89 90 done = true 91 err = db.gcIndex.Iterate(func(item shed.Item) (stop bool, err error) { 92 if gcSize-collectedCount <= target { 93 return true, nil 94 } 95 // delete from retrieve, pull, gc 96 db.retrievalDataIndex.DeleteInBatch(batch, item) 97 db.retrievalAccessIndex.DeleteInBatch(batch, item) 98 db.pullIndex.DeleteInBatch(batch, item) 99 db.gcIndex.DeleteInBatch(batch, item) 100 collectedCount++ 101 if collectedCount >= gcBatchSize { 102 // bach size limit reached, 103 // another gc run is needed 104 done = false 105 return true, nil 106 } 107 return false, nil 108 }, nil) 109 if err != nil { 110 return 0, false, err 111 } 112 113 db.gcSize.PutInBatch(batch, gcSize-collectedCount) 114 115 err = db.shed.WriteBatch(batch) 116 if err != nil { 117 return 0, false, err 118 } 119 return collectedCount, done, nil 120 } 121 122 // gcTrigger retruns the absolute value for garbage collection 123 // target value, calculated from db.capacity and gcTargetRatio. 124 func (db *DB) gcTarget() (target uint64) { 125 return uint64(float64(db.capacity) * gcTargetRatio) 126 } 127 128 // triggerGarbageCollection signals collectGarbageWorker 129 // to call collectGarbage. 130 func (db *DB) triggerGarbageCollection() { 131 select { 132 case db.collectGarbageTrigger <- struct{}{}: 133 case <-db.close: 134 default: 135 } 136 } 137 138 // incGCSizeInBatch changes gcSize field value 139 // by change which can be negative. This function 140 // must be called under batchMu lock. 141 func (db *DB) incGCSizeInBatch(batch *leveldb.Batch, change int64) (err error) { 142 if change == 0 { 143 return nil 144 } 145 gcSize, err := db.gcSize.Get() 146 if err != nil { 147 return err 148 } 149 150 var new uint64 151 if change > 0 { 152 new = gcSize + uint64(change) 153 } else { 154 // 'change' is an int64 and is negative 155 // a conversion is needed with correct sign 156 c := uint64(-change) 157 if c > gcSize { 158 // protect uint64 undeflow 159 return nil 160 } 161 new = gcSize - c 162 } 163 db.gcSize.PutInBatch(batch, new) 164 165 // trigger garbage collection if we reached the capacity 166 if new >= db.capacity { 167 db.triggerGarbageCollection() 168 } 169 return nil 170 } 171 172 // testHookCollectGarbage is a hook that can provide 173 // information when a garbage collection run is done 174 // and how many items it removed. 175 var testHookCollectGarbage func(collectedCount uint64)