github.com/jincm/wesharechain@v0.0.0-20210122032815-1537409ce26a/chain/swarm/storage/localstore/localstore.go (about)

     1  // Copyright 2018 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package localstore
    18  
    19  import (
    20  	"encoding/binary"
    21  	"encoding/hex"
    22  	"errors"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/ethereum/go-ethereum/log"
    27  	"github.com/ethereum/go-ethereum/swarm/chunk"
    28  	"github.com/ethereum/go-ethereum/swarm/shed"
    29  	"github.com/ethereum/go-ethereum/swarm/storage/mock"
    30  )
    31  
    32  var (
    33  	// ErrInvalidMode is retuned when an unknown Mode
    34  	// is provided to the function.
    35  	ErrInvalidMode = errors.New("invalid mode")
    36  	// ErrAddressLockTimeout is returned when the same chunk
    37  	// is updated in parallel and one of the updates
    38  	// takes longer then the configured timeout duration.
    39  	ErrAddressLockTimeout = errors.New("address lock timeout")
    40  )
    41  
    42  var (
    43  	// Default value for Capacity DB option.
    44  	defaultCapacity int64 = 5000000
    45  	// Limit the number of goroutines created by Getters
    46  	// that call updateGC function. Value 0 sets no limit.
    47  	maxParallelUpdateGC = 1000
    48  )
    49  
    50  // DB is the local store implementation and holds
    51  // database related objects.
    52  type DB struct {
    53  	shed *shed.DB
    54  
    55  	// schema name of loaded data
    56  	schemaName shed.StringField
    57  	// field that stores number of intems in gc index
    58  	storedGCSize shed.Uint64Field
    59  
    60  	// retrieval indexes
    61  	retrievalDataIndex   shed.Index
    62  	retrievalAccessIndex shed.Index
    63  	// push syncing index
    64  	pushIndex shed.Index
    65  	// push syncing subscriptions triggers
    66  	pushTriggers   []chan struct{}
    67  	pushTriggersMu sync.RWMutex
    68  
    69  	// pull syncing index
    70  	pullIndex shed.Index
    71  	// pull syncing subscriptions triggers per bin
    72  	pullTriggers   map[uint8][]chan struct{}
    73  	pullTriggersMu sync.RWMutex
    74  
    75  	// garbage collection index
    76  	gcIndex shed.Index
    77  	// index that stores hashes that are not
    78  	// counted in and saved to storedGCSize
    79  	gcUncountedHashesIndex shed.Index
    80  
    81  	// number of elements in garbage collection index
    82  	// it must be always read by getGCSize and
    83  	// set with incGCSize which are locking gcSizeMu
    84  	gcSize   int64
    85  	gcSizeMu sync.RWMutex
    86  	// garbage collection is triggered when gcSize exceeds
    87  	// the capacity value
    88  	capacity int64
    89  
    90  	// triggers garbage collection event loop
    91  	collectGarbageTrigger chan struct{}
    92  	// triggers write gc size event loop
    93  	writeGCSizeTrigger chan struct{}
    94  
    95  	// a buffered channel acting as a semaphore
    96  	// to limit the maximal number of goroutines
    97  	// created by Getters to call updateGC function
    98  	updateGCSem chan struct{}
    99  	// a wait group to ensure all updateGC goroutines
   100  	// are done before closing the database
   101  	updateGCWG sync.WaitGroup
   102  
   103  	baseKey []byte
   104  
   105  	addressLocks sync.Map
   106  
   107  	// this channel is closed when close function is called
   108  	// to terminate other goroutines
   109  	close chan struct{}
   110  
   111  	// protect Close method from exiting before
   112  	// garbage collection and gc size write workers
   113  	// are done
   114  	collectGarbageWorkerDone chan struct{}
   115  	writeGCSizeWorkerDone    chan struct{}
   116  }
   117  
   118  // Options struct holds optional parameters for configuring DB.
   119  type Options struct {
   120  	// MockStore is a mock node store that is used to store
   121  	// chunk data in a central store. It can be used to reduce
   122  	// total storage space requirements in testing large number
   123  	// of swarm nodes with chunk data deduplication provided by
   124  	// the mock global store.
   125  	MockStore *mock.NodeStore
   126  	// Capacity is a limit that triggers garbage collection when
   127  	// number of items in gcIndex equals or exceeds it.
   128  	Capacity int64
   129  	// MetricsPrefix defines a prefix for metrics names.
   130  	MetricsPrefix string
   131  }
   132  
   133  // New returns a new DB.  All fields and indexes are initialized
   134  // and possible conflicts with schema from existing database is checked.
   135  // One goroutine for writing batches is created.
   136  func New(path string, baseKey []byte, o *Options) (db *DB, err error) {
   137  	if o == nil {
   138  		o = new(Options)
   139  	}
   140  	db = &DB{
   141  		capacity: o.Capacity,
   142  		baseKey:  baseKey,
   143  		// channels collectGarbageTrigger and writeGCSizeTrigger
   144  		// need to be buffered with the size of 1
   145  		// to signal another event if it
   146  		// is triggered during already running function
   147  		collectGarbageTrigger:    make(chan struct{}, 1),
   148  		writeGCSizeTrigger:       make(chan struct{}, 1),
   149  		close:                    make(chan struct{}),
   150  		collectGarbageWorkerDone: make(chan struct{}),
   151  		writeGCSizeWorkerDone:    make(chan struct{}),
   152  	}
   153  	if db.capacity <= 0 {
   154  		db.capacity = defaultCapacity
   155  	}
   156  	if maxParallelUpdateGC > 0 {
   157  		db.updateGCSem = make(chan struct{}, maxParallelUpdateGC)
   158  	}
   159  
   160  	db.shed, err = shed.NewDB(path, o.MetricsPrefix)
   161  	if err != nil {
   162  		return nil, err
   163  	}
   164  	// Identify current storage schema by arbitrary name.
   165  	db.schemaName, err = db.shed.NewStringField("schema-name")
   166  	if err != nil {
   167  		return nil, err
   168  	}
   169  	// Persist gc size.
   170  	db.storedGCSize, err = db.shed.NewUint64Field("gc-size")
   171  	if err != nil {
   172  		return nil, err
   173  	}
   174  	// Functions for retrieval data index.
   175  	var (
   176  		encodeValueFunc func(fields shed.Item) (value []byte, err error)
   177  		decodeValueFunc func(keyItem shed.Item, value []byte) (e shed.Item, err error)
   178  	)
   179  	if o.MockStore != nil {
   180  		encodeValueFunc = func(fields shed.Item) (value []byte, err error) {
   181  			b := make([]byte, 8)
   182  			binary.BigEndian.PutUint64(b, uint64(fields.StoreTimestamp))
   183  			err = o.MockStore.Put(fields.Address, fields.Data)
   184  			if err != nil {
   185  				return nil, err
   186  			}
   187  			return b, nil
   188  		}
   189  		decodeValueFunc = func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
   190  			e.StoreTimestamp = int64(binary.BigEndian.Uint64(value[:8]))
   191  			e.Data, err = o.MockStore.Get(keyItem.Address)
   192  			return e, err
   193  		}
   194  	} else {
   195  		encodeValueFunc = func(fields shed.Item) (value []byte, err error) {
   196  			b := make([]byte, 8)
   197  			binary.BigEndian.PutUint64(b, uint64(fields.StoreTimestamp))
   198  			value = append(b, fields.Data...)
   199  			return value, nil
   200  		}
   201  		decodeValueFunc = func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
   202  			e.StoreTimestamp = int64(binary.BigEndian.Uint64(value[:8]))
   203  			e.Data = value[8:]
   204  			return e, nil
   205  		}
   206  	}
   207  	// Index storing actual chunk address, data and store timestamp.
   208  	db.retrievalDataIndex, err = db.shed.NewIndex("Address->StoreTimestamp|Data", shed.IndexFuncs{
   209  		EncodeKey: func(fields shed.Item) (key []byte, err error) {
   210  			return fields.Address, nil
   211  		},
   212  		DecodeKey: func(key []byte) (e shed.Item, err error) {
   213  			e.Address = key
   214  			return e, nil
   215  		},
   216  		EncodeValue: encodeValueFunc,
   217  		DecodeValue: decodeValueFunc,
   218  	})
   219  	if err != nil {
   220  		return nil, err
   221  	}
   222  	// Index storing access timestamp for a particular address.
   223  	// It is needed in order to update gc index keys for iteration order.
   224  	db.retrievalAccessIndex, err = db.shed.NewIndex("Address->AccessTimestamp", shed.IndexFuncs{
   225  		EncodeKey: func(fields shed.Item) (key []byte, err error) {
   226  			return fields.Address, nil
   227  		},
   228  		DecodeKey: func(key []byte) (e shed.Item, err error) {
   229  			e.Address = key
   230  			return e, nil
   231  		},
   232  		EncodeValue: func(fields shed.Item) (value []byte, err error) {
   233  			b := make([]byte, 8)
   234  			binary.BigEndian.PutUint64(b, uint64(fields.AccessTimestamp))
   235  			return b, nil
   236  		},
   237  		DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
   238  			e.AccessTimestamp = int64(binary.BigEndian.Uint64(value))
   239  			return e, nil
   240  		},
   241  	})
   242  	if err != nil {
   243  		return nil, err
   244  	}
   245  	// pull index allows history and live syncing per po bin
   246  	db.pullIndex, err = db.shed.NewIndex("PO|StoredTimestamp|Hash->nil", shed.IndexFuncs{
   247  		EncodeKey: func(fields shed.Item) (key []byte, err error) {
   248  			key = make([]byte, 41)
   249  			key[0] = db.po(fields.Address)
   250  			binary.BigEndian.PutUint64(key[1:9], uint64(fields.StoreTimestamp))
   251  			copy(key[9:], fields.Address[:])
   252  			return key, nil
   253  		},
   254  		DecodeKey: func(key []byte) (e shed.Item, err error) {
   255  			e.Address = key[9:]
   256  			e.StoreTimestamp = int64(binary.BigEndian.Uint64(key[1:9]))
   257  			return e, nil
   258  		},
   259  		EncodeValue: func(fields shed.Item) (value []byte, err error) {
   260  			return nil, nil
   261  		},
   262  		DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
   263  			return e, nil
   264  		},
   265  	})
   266  	if err != nil {
   267  		return nil, err
   268  	}
   269  	// create a pull syncing triggers used by SubscribePull function
   270  	db.pullTriggers = make(map[uint8][]chan struct{})
   271  	// push index contains as yet unsynced chunks
   272  	db.pushIndex, err = db.shed.NewIndex("StoredTimestamp|Hash->nil", shed.IndexFuncs{
   273  		EncodeKey: func(fields shed.Item) (key []byte, err error) {
   274  			key = make([]byte, 40)
   275  			binary.BigEndian.PutUint64(key[:8], uint64(fields.StoreTimestamp))
   276  			copy(key[8:], fields.Address[:])
   277  			return key, nil
   278  		},
   279  		DecodeKey: func(key []byte) (e shed.Item, err error) {
   280  			e.Address = key[8:]
   281  			e.StoreTimestamp = int64(binary.BigEndian.Uint64(key[:8]))
   282  			return e, nil
   283  		},
   284  		EncodeValue: func(fields shed.Item) (value []byte, err error) {
   285  			return nil, nil
   286  		},
   287  		DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
   288  			return e, nil
   289  		},
   290  	})
   291  	if err != nil {
   292  		return nil, err
   293  	}
   294  	// create a push syncing triggers used by SubscribePush function
   295  	db.pushTriggers = make([]chan struct{}, 0)
   296  	// gc index for removable chunk ordered by ascending last access time
   297  	db.gcIndex, err = db.shed.NewIndex("AccessTimestamp|StoredTimestamp|Hash->nil", shed.IndexFuncs{
   298  		EncodeKey: func(fields shed.Item) (key []byte, err error) {
   299  			b := make([]byte, 16, 16+len(fields.Address))
   300  			binary.BigEndian.PutUint64(b[:8], uint64(fields.AccessTimestamp))
   301  			binary.BigEndian.PutUint64(b[8:16], uint64(fields.StoreTimestamp))
   302  			key = append(b, fields.Address...)
   303  			return key, nil
   304  		},
   305  		DecodeKey: func(key []byte) (e shed.Item, err error) {
   306  			e.AccessTimestamp = int64(binary.BigEndian.Uint64(key[:8]))
   307  			e.StoreTimestamp = int64(binary.BigEndian.Uint64(key[8:16]))
   308  			e.Address = key[16:]
   309  			return e, nil
   310  		},
   311  		EncodeValue: func(fields shed.Item) (value []byte, err error) {
   312  			return nil, nil
   313  		},
   314  		DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
   315  			return e, nil
   316  		},
   317  	})
   318  	if err != nil {
   319  		return nil, err
   320  	}
   321  	// gc uncounted hashes index keeps hashes that are in gc index
   322  	// but not counted in and saved to storedGCSize
   323  	db.gcUncountedHashesIndex, err = db.shed.NewIndex("Hash->nil", shed.IndexFuncs{
   324  		EncodeKey: func(fields shed.Item) (key []byte, err error) {
   325  			return fields.Address, nil
   326  		},
   327  		DecodeKey: func(key []byte) (e shed.Item, err error) {
   328  			e.Address = key
   329  			return e, nil
   330  		},
   331  		EncodeValue: func(fields shed.Item) (value []byte, err error) {
   332  			return nil, nil
   333  		},
   334  		DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
   335  			return e, nil
   336  		},
   337  	})
   338  	if err != nil {
   339  		return nil, err
   340  	}
   341  
   342  	// count number of elements in garbage collection index
   343  	gcSize, err := db.storedGCSize.Get()
   344  	if err != nil {
   345  		return nil, err
   346  	}
   347  	// get number of uncounted hashes
   348  	gcUncountedSize, err := db.gcUncountedHashesIndex.Count()
   349  	if err != nil {
   350  		return nil, err
   351  	}
   352  	gcSize += uint64(gcUncountedSize)
   353  	// remove uncounted hashes from the index and
   354  	// save the total gcSize after uncounted hashes are removed
   355  	err = db.writeGCSize(int64(gcSize))
   356  	if err != nil {
   357  		return nil, err
   358  	}
   359  	db.incGCSize(int64(gcSize))
   360  
   361  	// start worker to write gc size
   362  	go db.writeGCSizeWorker()
   363  	// start garbage collection worker
   364  	go db.collectGarbageWorker()
   365  	return db, nil
   366  }
   367  
   368  // Close closes the underlying database.
   369  func (db *DB) Close() (err error) {
   370  	close(db.close)
   371  	db.updateGCWG.Wait()
   372  
   373  	// wait for gc worker and gc size write workers to
   374  	// return before closing the shed
   375  	timeout := time.After(5 * time.Second)
   376  	select {
   377  	case <-db.collectGarbageWorkerDone:
   378  	case <-timeout:
   379  		log.Error("localstore: collect garbage worker did not return after db close")
   380  	}
   381  	select {
   382  	case <-db.writeGCSizeWorkerDone:
   383  	case <-timeout:
   384  		log.Error("localstore: write gc size worker did not return after db close")
   385  	}
   386  
   387  	if err := db.writeGCSize(db.getGCSize()); err != nil {
   388  		log.Error("localstore: write gc size", "err", err)
   389  	}
   390  	return db.shed.Close()
   391  }
   392  
   393  // po computes the proximity order between the address
   394  // and database base key.
   395  func (db *DB) po(addr chunk.Address) (bin uint8) {
   396  	return uint8(chunk.Proximity(db.baseKey, addr))
   397  }
   398  
   399  var (
   400  	// Maximal time for lockAddr to wait until it
   401  	// returns error.
   402  	addressLockTimeout = 3 * time.Second
   403  	// duration between two lock checks in lockAddr.
   404  	addressLockCheckDelay = 30 * time.Microsecond
   405  )
   406  
   407  // lockAddr sets the lock on a particular address
   408  // using addressLocks sync.Map and returns unlock function.
   409  // If the address is locked this function will check it
   410  // in a for loop for addressLockTimeout time, after which
   411  // it will return ErrAddressLockTimeout error.
   412  func (db *DB) lockAddr(addr chunk.Address) (unlock func(), err error) {
   413  	start := time.Now()
   414  	lockKey := hex.EncodeToString(addr)
   415  	for {
   416  		_, loaded := db.addressLocks.LoadOrStore(lockKey, struct{}{})
   417  		if !loaded {
   418  			break
   419  		}
   420  		time.Sleep(addressLockCheckDelay)
   421  		if time.Since(start) > addressLockTimeout {
   422  			return nil, ErrAddressLockTimeout
   423  		}
   424  	}
   425  	return func() { db.addressLocks.Delete(lockKey) }, nil
   426  }
   427  
   428  // chunkToItem creates new Item with data provided by the Chunk.
   429  func chunkToItem(ch chunk.Chunk) shed.Item {
   430  	return shed.Item{
   431  		Address: ch.Address(),
   432  		Data:    ch.Data(),
   433  	}
   434  }
   435  
   436  // addressToItem creates new Item with a provided address.
   437  func addressToItem(addr chunk.Address) shed.Item {
   438  	return shed.Item{
   439  		Address: addr,
   440  	}
   441  }
   442  
   443  // now is a helper function that returns a current unix timestamp
   444  // in UTC timezone.
   445  // It is set in the init function for usage in production, and
   446  // optionally overridden in tests for data validation.
   447  var now func() int64
   448  
   449  func init() {
   450  	// set the now function
   451  	now = func() (t int64) {
   452  		return time.Now().UTC().UnixNano()
   453  	}
   454  }