github.com/jincm/wesharechain@v0.0.0-20210122032815-1537409ce26a/chain/swarm/storage/localstore/gc.go

github.com/jincm/wesharechain@v0.0.0-20210122032815-1537409ce26a/chain/swarm/storage/localstore/gc.go (about)

     1  // Copyright 2018 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  /*
    18  Counting number of items in garbage collection index
    19  
    20  The number of items in garbage collection index is not the same as the number of
    21  chunks in retrieval index (total number of stored chunks). Chunk can be garbage
    22  collected only when it is set to a synced state by ModSetSync, and only then can
    23  be counted into garbage collection size, which determines whether a number of
    24  chunk should be removed from the storage by the garbage collection. This opens a
    25  possibility that the storage size exceeds the limit if files are locally
    26  uploaded and the node is not connected to other nodes or there is a problem with
    27  syncing.
    28  
    29  Tracking of garbage collection size (gcSize) is focused on performance. Key
    30  points:
    31  
    32   1. counting the number of key/value pairs in LevelDB takes around 0.7s for 1e6
    33      on a very fast ssd (unacceptable long time in reality)
    34   2. locking leveldb batch writes with a global mutex (serial batch writes) is
    35      not acceptable, we should use locking per chunk address
    36  
    37  Because of point 1. we cannot count the number of items in garbage collection
    38  index in New constructor as it could last very long for realistic scenarios
    39  where limit is 5e6 and nodes are running on slower hdd disks or cloud providers
    40  with low IOPS.
    41  
    42  Point 2. is a performance optimization to allow parallel batch writes with
    43  getters, putters and setters. Every single batch that they create contain only
    44  information related to a single chunk, no relations with other chunks or shared
    45  statistical data (like gcSize). This approach avoids race conditions on writing
    46  batches in parallel, but creates a problem of synchronizing statistical data
    47  values like gcSize. With global mutex lock, any data could be written by any
    48  batch, but would not use utilize the full potential of leveldb parallel writes.
    49  
    50  To mitigate this two problems, the implementation of counting and persisting
    51  gcSize is split into two parts. One is the in-memory value (gcSize) that is fast
    52  to read and write with a dedicated mutex (gcSizeMu) if the batch which adds or
    53  removes items from garbage collection index is successful. The second part is
    54  the reliable persistence of this value to leveldb database, as storedGCSize
    55  field. This database field is saved by writeGCSizeWorker and writeGCSize
    56  functions when in-memory gcSize variable is changed, but no too often to avoid
    57  very frequent database writes. This database writes are triggered by
    58  writeGCSizeTrigger when a call is made to function incGCSize. Trigger ensures
    59  that no database writes are done only when gcSize is changed (contrary to a
    60  simpler periodic writes or checks). A backoff of 10s in writeGCSizeWorker
    61  ensures that no frequent batch writes are made. Saving the storedGCSize on
    62  database Close function ensures that in-memory gcSize is persisted when database
    63  is closed.
    64  
    65  This persistence must be resilient to failures like panics. For this purpose, a
    66  collection of hashes that are added to the garbage collection index, but still
    67  not persisted to storedGCSize, must be tracked to count them in when DB is
    68  constructed again with New function after the failure (swarm node restarts). On
    69  every batch write that adds a new item to garbage collection index, the same
    70  hash is added to gcUncountedHashesIndex. This ensures that there is a persisted
    71  information which hashes were added to the garbage collection index. But, when
    72  the storedGCSize is saved by writeGCSize function, this values are removed in
    73  the same batch in which storedGCSize is changed to ensure consistency. When the
    74  panic happen, or database Close method is not saved. The database storage
    75  contains all information to reliably and efficiently get the correct number of
    76  items in garbage collection index. This is performed in the New function when
    77  all hashes in gcUncountedHashesIndex are counted, added to the storedGCSize and
    78  saved to the disk before the database is constructed again. Index
    79  gcUncountedHashesIndex is acting as dirty bit for recovery that provides
    80  information what needs to be corrected. With a simple dirty bit, the whole
    81  garbage collection index should me counted on recovery instead only the items in
    82  gcUncountedHashesIndex. Because of the triggering mechanizm of writeGCSizeWorker
    83  and relatively short backoff time, the number of hashes in
    84  gcUncountedHashesIndex should be low and it should take a very short time to
    85  recover from the previous failure. If there was no failure and
    86  gcUncountedHashesIndex is empty, which is the usual case, New function will take
    87  the minimal time to return.
    88  */
    89  
    90  package localstore
    91  
    92  import (
    93  	"time"
    94  
    95  	"github.com/ethereum/go-ethereum/log"
    96  	"github.com/ethereum/go-ethereum/swarm/shed"
    97  	"github.com/syndtr/goleveldb/leveldb"
    98  )
    99  
   100  var (
   101  	// gcTargetRatio defines the target number of items
   102  	// in garbage collection index that will not be removed
   103  	// on garbage collection. The target number of items
   104  	// is calculated by gcTarget function. This value must be
   105  	// in range (0,1]. For example, with 0.9 value,
   106  	// garbage collection will leave 90% of defined capacity
   107  	// in database after its run. This prevents frequent
   108  	// garbage collection runs.
   109  	gcTargetRatio = 0.9
   110  	// gcBatchSize limits the number of chunks in a single
   111  	// leveldb batch on garbage collection.
   112  	gcBatchSize int64 = 1000
   113  )
   114  
   115  // collectGarbageWorker is a long running function that waits for
   116  // collectGarbageTrigger channel to signal a garbage collection
   117  // run. GC run iterates on gcIndex and removes older items
   118  // form retrieval and other indexes.
   119  func (db *DB) collectGarbageWorker() {
   120  	defer close(db.collectGarbageWorkerDone)
   121  
   122  	for {
   123  		select {
   124  		case <-db.collectGarbageTrigger:
   125  			// run a single collect garbage run and
   126  			// if done is false, gcBatchSize is reached and
   127  			// another collect garbage run is needed
   128  			collectedCount, done, err := db.collectGarbage()
   129  			if err != nil {
   130  				log.Error("localstore collect garbage", "err", err)
   131  			}
   132  			// check if another gc run is needed
   133  			if !done {
   134  				db.triggerGarbageCollection()
   135  			}
   136  
   137  			if collectedCount > 0 && testHookCollectGarbage != nil {
   138  				testHookCollectGarbage(collectedCount)
   139  			}
   140  		case <-db.close:
   141  			return
   142  		}
   143  	}
   144  }
   145  
   146  // collectGarbage removes chunks from retrieval and other
   147  // indexes if maximal number of chunks in database is reached.
   148  // This function returns the number of removed chunks. If done
   149  // is false, another call to this function is needed to collect
   150  // the rest of the garbage as the batch size limit is reached.
   151  // This function is called in collectGarbageWorker.
   152  func (db *DB) collectGarbage() (collectedCount int64, done bool, err error) {
   153  	batch := new(leveldb.Batch)
   154  	target := db.gcTarget()
   155  
   156  	done = true
   157  	err = db.gcIndex.Iterate(func(item shed.Item) (stop bool, err error) {
   158  		// protect parallel updates
   159  		unlock, err := db.lockAddr(item.Address)
   160  		if err != nil {
   161  			return false, err
   162  		}
   163  		defer unlock()
   164  
   165  		gcSize := db.getGCSize()
   166  		if gcSize-collectedCount <= target {
   167  			return true, nil
   168  		}
   169  		// delete from retrieve, pull, gc
   170  		db.retrievalDataIndex.DeleteInBatch(batch, item)
   171  		db.retrievalAccessIndex.DeleteInBatch(batch, item)
   172  		db.pullIndex.DeleteInBatch(batch, item)
   173  		db.gcIndex.DeleteInBatch(batch, item)
   174  		collectedCount++
   175  		if collectedCount >= gcBatchSize {
   176  			// bach size limit reached,
   177  			// another gc run is needed
   178  			done = false
   179  			return true, nil
   180  		}
   181  		return false, nil
   182  	}, nil)
   183  	if err != nil {
   184  		return 0, false, err
   185  	}
   186  
   187  	err = db.shed.WriteBatch(batch)
   188  	if err != nil {
   189  		return 0, false, err
   190  	}
   191  	// batch is written, decrement gcSize
   192  	db.incGCSize(-collectedCount)
   193  	return collectedCount, done, nil
   194  }
   195  
   196  // gcTrigger retruns the absolute value for garbage collection
   197  // target value, calculated from db.capacity and gcTargetRatio.
   198  func (db *DB) gcTarget() (target int64) {
   199  	return int64(float64(db.capacity) * gcTargetRatio)
   200  }
   201  
   202  // incGCSize increments gcSize by the provided number.
   203  // If count is negative, it will decrement gcSize.
   204  func (db *DB) incGCSize(count int64) {
   205  	if count == 0 {
   206  		return
   207  	}
   208  
   209  	db.gcSizeMu.Lock()
   210  	new := db.gcSize + count
   211  	db.gcSize = new
   212  	db.gcSizeMu.Unlock()
   213  
   214  	select {
   215  	case db.writeGCSizeTrigger <- struct{}{}:
   216  	default:
   217  	}
   218  	if new >= db.capacity {
   219  		db.triggerGarbageCollection()
   220  	}
   221  }
   222  
   223  // getGCSize returns gcSize value by locking it
   224  // with gcSizeMu mutex.
   225  func (db *DB) getGCSize() (count int64) {
   226  	db.gcSizeMu.RLock()
   227  	count = db.gcSize
   228  	db.gcSizeMu.RUnlock()
   229  	return count
   230  }
   231  
   232  // triggerGarbageCollection signals collectGarbageWorker
   233  // to call collectGarbage.
   234  func (db *DB) triggerGarbageCollection() {
   235  	select {
   236  	case db.collectGarbageTrigger <- struct{}{}:
   237  	case <-db.close:
   238  	default:
   239  	}
   240  }
   241  
   242  // writeGCSizeWorker writes gcSize on trigger event
   243  // and waits writeGCSizeDelay after each write.
   244  // It implements a linear backoff with delay of
   245  // writeGCSizeDelay duration to avoid very frequent
   246  // database operations.
   247  func (db *DB) writeGCSizeWorker() {
   248  	defer close(db.writeGCSizeWorkerDone)
   249  
   250  	for {
   251  		select {
   252  		case <-db.writeGCSizeTrigger:
   253  			err := db.writeGCSize(db.getGCSize())
   254  			if err != nil {
   255  				log.Error("localstore write gc size", "err", err)
   256  			}
   257  			// Wait some time before writing gc size in the next
   258  			// iteration. This prevents frequent I/O operations.
   259  			select {
   260  			case <-time.After(10 * time.Second):
   261  			case <-db.close:
   262  				return
   263  			}
   264  		case <-db.close:
   265  			return
   266  		}
   267  	}
   268  }
   269  
   270  // writeGCSize stores the number of items in gcIndex.
   271  // It removes all hashes from gcUncountedHashesIndex
   272  // not to include them on the next DB initialization
   273  // (New function) when gcSize is counted.
   274  func (db *DB) writeGCSize(gcSize int64) (err error) {
   275  	const maxBatchSize = 1000
   276  
   277  	batch := new(leveldb.Batch)
   278  	db.storedGCSize.PutInBatch(batch, uint64(gcSize))
   279  	batchSize := 1
   280  
   281  	// use only one iterator as it acquires its snapshot
   282  	// not to remove hashes from index that are added
   283  	// after stored gc size is written
   284  	err = db.gcUncountedHashesIndex.Iterate(func(item shed.Item) (stop bool, err error) {
   285  		db.gcUncountedHashesIndex.DeleteInBatch(batch, item)
   286  		batchSize++
   287  		if batchSize >= maxBatchSize {
   288  			err = db.shed.WriteBatch(batch)
   289  			if err != nil {
   290  				return false, err
   291  			}
   292  			batch.Reset()
   293  			batchSize = 0
   294  		}
   295  		return false, nil
   296  	}, nil)
   297  	if err != nil {
   298  		return err
   299  	}
   300  	return db.shed.WriteBatch(batch)
   301  }
   302  
   303  // testHookCollectGarbage is a hook that can provide
   304  // information when a garbage collection run is done
   305  // and how many items it removed.
   306  var testHookCollectGarbage func(collectedCount int64)