github.com/codingfuture/orig-energi3@v0.8.4/swarm/storage/localstore/localstore.go (about)

     1  // Copyright 2018 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package localstore
    18  
    19  import (
    20  	"encoding/binary"
    21  	"encoding/hex"
    22  	"errors"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/ethereum/go-ethereum/log"
    27  	"github.com/ethereum/go-ethereum/swarm/shed"
    28  	"github.com/ethereum/go-ethereum/swarm/storage"
    29  	"github.com/ethereum/go-ethereum/swarm/storage/mock"
    30  )
    31  
    32  var (
    33  	// ErrInvalidMode is retuned when an unknown Mode
    34  	// is provided to the function.
    35  	ErrInvalidMode = errors.New("invalid mode")
    36  	// ErrAddressLockTimeout is returned when the same chunk
    37  	// is updated in parallel and one of the updates
    38  	// takes longer then the configured timeout duration.
    39  	ErrAddressLockTimeout = errors.New("address lock timeout")
    40  )
    41  
    42  var (
    43  	// Default value for Capacity DB option.
    44  	defaultCapacity int64 = 5000000
    45  	// Limit the number of goroutines created by Getters
    46  	// that call updateGC function. Value 0 sets no limit.
    47  	maxParallelUpdateGC = 1000
    48  )
    49  
    50  // DB is the local store implementation and holds
    51  // database related objects.
    52  type DB struct {
    53  	shed *shed.DB
    54  
    55  	// schema name of loaded data
    56  	schemaName shed.StringField
    57  	// field that stores number of intems in gc index
    58  	storedGCSize shed.Uint64Field
    59  
    60  	// retrieval indexes
    61  	retrievalDataIndex   shed.Index
    62  	retrievalAccessIndex shed.Index
    63  	// push syncing index
    64  	pushIndex shed.Index
    65  	// push syncing subscriptions triggers
    66  	pushTriggers   []chan struct{}
    67  	pushTriggersMu sync.RWMutex
    68  
    69  	// pull syncing index
    70  	pullIndex shed.Index
    71  	// pull syncing subscriptions triggers per bin
    72  	pullTriggers   map[uint8][]chan struct{}
    73  	pullTriggersMu sync.RWMutex
    74  
    75  	// garbage collection index
    76  	gcIndex shed.Index
    77  	// index that stores hashes that are not
    78  	// counted in and saved to storedGCSize
    79  	gcUncountedHashesIndex shed.Index
    80  
    81  	// number of elements in garbage collection index
    82  	// it must be always read by getGCSize and
    83  	// set with incGCSize which are locking gcSizeMu
    84  	gcSize   int64
    85  	gcSizeMu sync.RWMutex
    86  	// garbage collection is triggered when gcSize exceeds
    87  	// the capacity value
    88  	capacity int64
    89  
    90  	// triggers garbage collection event loop
    91  	collectGarbageTrigger chan struct{}
    92  	// triggers write gc size event loop
    93  	writeGCSizeTrigger chan struct{}
    94  
    95  	// a buffered channel acting as a semaphore
    96  	// to limit the maximal number of goroutines
    97  	// created by Getters to call updateGC function
    98  	updateGCSem chan struct{}
    99  	// a wait group to ensure all updateGC goroutines
   100  	// are done before closing the database
   101  	updateGCWG sync.WaitGroup
   102  
   103  	baseKey []byte
   104  
   105  	addressLocks sync.Map
   106  
   107  	// this channel is closed when close function is called
   108  	// to terminate other goroutines
   109  	close chan struct{}
   110  }
   111  
   112  // Options struct holds optional parameters for configuring DB.
   113  type Options struct {
   114  	// MockStore is a mock node store that is used to store
   115  	// chunk data in a central store. It can be used to reduce
   116  	// total storage space requirements in testing large number
   117  	// of swarm nodes with chunk data deduplication provided by
   118  	// the mock global store.
   119  	MockStore *mock.NodeStore
   120  	// Capacity is a limit that triggers garbage collection when
   121  	// number of items in gcIndex equals or exceeds it.
   122  	Capacity int64
   123  	// MetricsPrefix defines a prefix for metrics names.
   124  	MetricsPrefix string
   125  }
   126  
   127  // New returns a new DB.  All fields and indexes are initialized
   128  // and possible conflicts with schema from existing database is checked.
   129  // One goroutine for writing batches is created.
   130  func New(path string, baseKey []byte, o *Options) (db *DB, err error) {
   131  	if o == nil {
   132  		o = new(Options)
   133  	}
   134  	db = &DB{
   135  		capacity: o.Capacity,
   136  		baseKey:  baseKey,
   137  		// channels collectGarbageTrigger and writeGCSizeTrigger
   138  		// need to be buffered with the size of 1
   139  		// to signal another event if it
   140  		// is triggered during already running function
   141  		collectGarbageTrigger: make(chan struct{}, 1),
   142  		writeGCSizeTrigger:    make(chan struct{}, 1),
   143  		close:                 make(chan struct{}),
   144  	}
   145  	if db.capacity <= 0 {
   146  		db.capacity = defaultCapacity
   147  	}
   148  	if maxParallelUpdateGC > 0 {
   149  		db.updateGCSem = make(chan struct{}, maxParallelUpdateGC)
   150  	}
   151  
   152  	db.shed, err = shed.NewDB(path, o.MetricsPrefix)
   153  	if err != nil {
   154  		return nil, err
   155  	}
   156  	// Identify current storage schema by arbitrary name.
   157  	db.schemaName, err = db.shed.NewStringField("schema-name")
   158  	if err != nil {
   159  		return nil, err
   160  	}
   161  	// Persist gc size.
   162  	db.storedGCSize, err = db.shed.NewUint64Field("gc-size")
   163  	if err != nil {
   164  		return nil, err
   165  	}
   166  	// Functions for retrieval data index.
   167  	var (
   168  		encodeValueFunc func(fields shed.Item) (value []byte, err error)
   169  		decodeValueFunc func(keyItem shed.Item, value []byte) (e shed.Item, err error)
   170  	)
   171  	if o.MockStore != nil {
   172  		encodeValueFunc = func(fields shed.Item) (value []byte, err error) {
   173  			b := make([]byte, 8)
   174  			binary.BigEndian.PutUint64(b, uint64(fields.StoreTimestamp))
   175  			err = o.MockStore.Put(fields.Address, fields.Data)
   176  			if err != nil {
   177  				return nil, err
   178  			}
   179  			return b, nil
   180  		}
   181  		decodeValueFunc = func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
   182  			e.StoreTimestamp = int64(binary.BigEndian.Uint64(value[:8]))
   183  			e.Data, err = o.MockStore.Get(keyItem.Address)
   184  			return e, err
   185  		}
   186  	} else {
   187  		encodeValueFunc = func(fields shed.Item) (value []byte, err error) {
   188  			b := make([]byte, 8)
   189  			binary.BigEndian.PutUint64(b, uint64(fields.StoreTimestamp))
   190  			value = append(b, fields.Data...)
   191  			return value, nil
   192  		}
   193  		decodeValueFunc = func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
   194  			e.StoreTimestamp = int64(binary.BigEndian.Uint64(value[:8]))
   195  			e.Data = value[8:]
   196  			return e, nil
   197  		}
   198  	}
   199  	// Index storing actual chunk address, data and store timestamp.
   200  	db.retrievalDataIndex, err = db.shed.NewIndex("Address->StoreTimestamp|Data", shed.IndexFuncs{
   201  		EncodeKey: func(fields shed.Item) (key []byte, err error) {
   202  			return fields.Address, nil
   203  		},
   204  		DecodeKey: func(key []byte) (e shed.Item, err error) {
   205  			e.Address = key
   206  			return e, nil
   207  		},
   208  		EncodeValue: encodeValueFunc,
   209  		DecodeValue: decodeValueFunc,
   210  	})
   211  	if err != nil {
   212  		return nil, err
   213  	}
   214  	// Index storing access timestamp for a particular address.
   215  	// It is needed in order to update gc index keys for iteration order.
   216  	db.retrievalAccessIndex, err = db.shed.NewIndex("Address->AccessTimestamp", shed.IndexFuncs{
   217  		EncodeKey: func(fields shed.Item) (key []byte, err error) {
   218  			return fields.Address, nil
   219  		},
   220  		DecodeKey: func(key []byte) (e shed.Item, err error) {
   221  			e.Address = key
   222  			return e, nil
   223  		},
   224  		EncodeValue: func(fields shed.Item) (value []byte, err error) {
   225  			b := make([]byte, 8)
   226  			binary.BigEndian.PutUint64(b, uint64(fields.AccessTimestamp))
   227  			return b, nil
   228  		},
   229  		DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
   230  			e.AccessTimestamp = int64(binary.BigEndian.Uint64(value))
   231  			return e, nil
   232  		},
   233  	})
   234  	if err != nil {
   235  		return nil, err
   236  	}
   237  	// pull index allows history and live syncing per po bin
   238  	db.pullIndex, err = db.shed.NewIndex("PO|StoredTimestamp|Hash->nil", shed.IndexFuncs{
   239  		EncodeKey: func(fields shed.Item) (key []byte, err error) {
   240  			key = make([]byte, 41)
   241  			key[0] = db.po(fields.Address)
   242  			binary.BigEndian.PutUint64(key[1:9], uint64(fields.StoreTimestamp))
   243  			copy(key[9:], fields.Address[:])
   244  			return key, nil
   245  		},
   246  		DecodeKey: func(key []byte) (e shed.Item, err error) {
   247  			e.Address = key[9:]
   248  			e.StoreTimestamp = int64(binary.BigEndian.Uint64(key[1:9]))
   249  			return e, nil
   250  		},
   251  		EncodeValue: func(fields shed.Item) (value []byte, err error) {
   252  			return nil, nil
   253  		},
   254  		DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
   255  			return e, nil
   256  		},
   257  	})
   258  	if err != nil {
   259  		return nil, err
   260  	}
   261  	// create a pull syncing triggers used by SubscribePull function
   262  	db.pullTriggers = make(map[uint8][]chan struct{})
   263  	// push index contains as yet unsynced chunks
   264  	db.pushIndex, err = db.shed.NewIndex("StoredTimestamp|Hash->nil", shed.IndexFuncs{
   265  		EncodeKey: func(fields shed.Item) (key []byte, err error) {
   266  			key = make([]byte, 40)
   267  			binary.BigEndian.PutUint64(key[:8], uint64(fields.StoreTimestamp))
   268  			copy(key[8:], fields.Address[:])
   269  			return key, nil
   270  		},
   271  		DecodeKey: func(key []byte) (e shed.Item, err error) {
   272  			e.Address = key[8:]
   273  			e.StoreTimestamp = int64(binary.BigEndian.Uint64(key[:8]))
   274  			return e, nil
   275  		},
   276  		EncodeValue: func(fields shed.Item) (value []byte, err error) {
   277  			return nil, nil
   278  		},
   279  		DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
   280  			return e, nil
   281  		},
   282  	})
   283  	if err != nil {
   284  		return nil, err
   285  	}
   286  	// create a push syncing triggers used by SubscribePush function
   287  	db.pushTriggers = make([]chan struct{}, 0)
   288  	// gc index for removable chunk ordered by ascending last access time
   289  	db.gcIndex, err = db.shed.NewIndex("AccessTimestamp|StoredTimestamp|Hash->nil", shed.IndexFuncs{
   290  		EncodeKey: func(fields shed.Item) (key []byte, err error) {
   291  			b := make([]byte, 16, 16+len(fields.Address))
   292  			binary.BigEndian.PutUint64(b[:8], uint64(fields.AccessTimestamp))
   293  			binary.BigEndian.PutUint64(b[8:16], uint64(fields.StoreTimestamp))
   294  			key = append(b, fields.Address...)
   295  			return key, nil
   296  		},
   297  		DecodeKey: func(key []byte) (e shed.Item, err error) {
   298  			e.AccessTimestamp = int64(binary.BigEndian.Uint64(key[:8]))
   299  			e.StoreTimestamp = int64(binary.BigEndian.Uint64(key[8:16]))
   300  			e.Address = key[16:]
   301  			return e, nil
   302  		},
   303  		EncodeValue: func(fields shed.Item) (value []byte, err error) {
   304  			return nil, nil
   305  		},
   306  		DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
   307  			return e, nil
   308  		},
   309  	})
   310  	if err != nil {
   311  		return nil, err
   312  	}
   313  	// gc uncounted hashes index keeps hashes that are in gc index
   314  	// but not counted in and saved to storedGCSize
   315  	db.gcUncountedHashesIndex, err = db.shed.NewIndex("Hash->nil", shed.IndexFuncs{
   316  		EncodeKey: func(fields shed.Item) (key []byte, err error) {
   317  			return fields.Address, nil
   318  		},
   319  		DecodeKey: func(key []byte) (e shed.Item, err error) {
   320  			e.Address = key
   321  			return e, nil
   322  		},
   323  		EncodeValue: func(fields shed.Item) (value []byte, err error) {
   324  			return nil, nil
   325  		},
   326  		DecodeValue: func(keyItem shed.Item, value []byte) (e shed.Item, err error) {
   327  			return e, nil
   328  		},
   329  	})
   330  	if err != nil {
   331  		return nil, err
   332  	}
   333  
   334  	// count number of elements in garbage collection index
   335  	gcSize, err := db.storedGCSize.Get()
   336  	if err != nil {
   337  		return nil, err
   338  	}
   339  	// get number of uncounted hashes
   340  	gcUncountedSize, err := db.gcUncountedHashesIndex.Count()
   341  	if err != nil {
   342  		return nil, err
   343  	}
   344  	gcSize += uint64(gcUncountedSize)
   345  	// remove uncounted hashes from the index and
   346  	// save the total gcSize after uncounted hashes are removed
   347  	err = db.writeGCSize(int64(gcSize))
   348  	if err != nil {
   349  		return nil, err
   350  	}
   351  	db.incGCSize(int64(gcSize))
   352  
   353  	// start worker to write gc size
   354  	go db.writeGCSizeWorker()
   355  	// start garbage collection worker
   356  	go db.collectGarbageWorker()
   357  	return db, nil
   358  }
   359  
   360  // Close closes the underlying database.
   361  func (db *DB) Close() (err error) {
   362  	close(db.close)
   363  	db.updateGCWG.Wait()
   364  	if err := db.writeGCSize(db.getGCSize()); err != nil {
   365  		log.Error("localstore: write gc size", "err", err)
   366  	}
   367  	return db.shed.Close()
   368  }
   369  
   370  // po computes the proximity order between the address
   371  // and database base key.
   372  func (db *DB) po(addr storage.Address) (bin uint8) {
   373  	return uint8(storage.Proximity(db.baseKey, addr))
   374  }
   375  
   376  var (
   377  	// Maximal time for lockAddr to wait until it
   378  	// returns error.
   379  	addressLockTimeout = 3 * time.Second
   380  	// duration between two lock checks in lockAddr.
   381  	addressLockCheckDelay = 30 * time.Microsecond
   382  )
   383  
   384  // lockAddr sets the lock on a particular address
   385  // using addressLocks sync.Map and returns unlock function.
   386  // If the address is locked this function will check it
   387  // in a for loop for addressLockTimeout time, after which
   388  // it will return ErrAddressLockTimeout error.
   389  func (db *DB) lockAddr(addr storage.Address) (unlock func(), err error) {
   390  	start := time.Now()
   391  	lockKey := hex.EncodeToString(addr)
   392  	for {
   393  		_, loaded := db.addressLocks.LoadOrStore(lockKey, struct{}{})
   394  		if !loaded {
   395  			break
   396  		}
   397  		time.Sleep(addressLockCheckDelay)
   398  		if time.Since(start) > addressLockTimeout {
   399  			return nil, ErrAddressLockTimeout
   400  		}
   401  	}
   402  	return func() { db.addressLocks.Delete(lockKey) }, nil
   403  }
   404  
   405  // chunkToItem creates new Item with data provided by the Chunk.
   406  func chunkToItem(ch storage.Chunk) shed.Item {
   407  	return shed.Item{
   408  		Address: ch.Address(),
   409  		Data:    ch.Data(),
   410  	}
   411  }
   412  
   413  // addressToItem creates new Item with a provided address.
   414  func addressToItem(addr storage.Address) shed.Item {
   415  	return shed.Item{
   416  		Address: addr,
   417  	}
   418  }
   419  
   420  // now is a helper function that returns a current unix timestamp
   421  // in UTC timezone.
   422  // It is set in the init function for usage in production, and
   423  // optionally overridden in tests for data validation.
   424  var now func() int64
   425  
   426  func init() {
   427  	// set the now function
   428  	now = func() (t int64) {
   429  		return time.Now().UTC().UnixNano()
   430  	}
   431  }