github.com/digdeepmining/go-atheios@v1.5.13-0.20180902133602-d5687a2e6f43/swarm/storage/dbstore.go (about)

     1  // Copyright 2016 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  // disk storage layer for the package bzz
    18  // DbStore implements the ChunkStore interface and is used by the DPA as
    19  // persistent storage of chunks
    20  // it implements purging based on access count allowing for external control of
    21  // max capacity
    22  
    23  package storage
    24  
    25  import (
    26  	"bytes"
    27  	"encoding/binary"
    28  	"fmt"
    29  	"sync"
    30  
    31  	"github.com/atheioschain/go-atheios/logger"
    32  	"github.com/atheioschain/go-atheios/logger/glog"
    33  	"github.com/atheioschain/go-atheios/rlp"
    34  	"github.com/syndtr/goleveldb/leveldb"
    35  	"github.com/syndtr/goleveldb/leveldb/iterator"
    36  )
    37  
    38  const (
    39  	defaultDbCapacity = 5000000
    40  	defaultRadius     = 0 // not yet used
    41  
    42  	gcArraySize      = 10000
    43  	gcArrayFreeRatio = 0.1
    44  
    45  	// key prefixes for leveldb storage
    46  	kpIndex = 0
    47  	kpData  = 1
    48  )
    49  
    50  var (
    51  	keyAccessCnt = []byte{2}
    52  	keyEntryCnt  = []byte{3}
    53  	keyDataIdx   = []byte{4}
    54  	keyGCPos     = []byte{5}
    55  )
    56  
    57  type gcItem struct {
    58  	idx    uint64
    59  	value  uint64
    60  	idxKey []byte
    61  }
    62  
    63  type DbStore struct {
    64  	db *LDBDatabase
    65  
    66  	// this should be stored in db, accessed transactionally
    67  	entryCnt, accessCnt, dataIdx, capacity uint64
    68  
    69  	gcPos, gcStartPos []byte
    70  	gcArray           []*gcItem
    71  
    72  	hashfunc Hasher
    73  
    74  	lock sync.Mutex
    75  }
    76  
    77  func NewDbStore(path string, hash Hasher, capacity uint64, radius int) (s *DbStore, err error) {
    78  	s = new(DbStore)
    79  
    80  	s.hashfunc = hash
    81  
    82  	s.db, err = NewLDBDatabase(path)
    83  	if err != nil {
    84  		return
    85  	}
    86  
    87  	s.setCapacity(capacity)
    88  
    89  	s.gcStartPos = make([]byte, 1)
    90  	s.gcStartPos[0] = kpIndex
    91  	s.gcArray = make([]*gcItem, gcArraySize)
    92  
    93  	data, _ := s.db.Get(keyEntryCnt)
    94  	s.entryCnt = BytesToU64(data)
    95  	data, _ = s.db.Get(keyAccessCnt)
    96  	s.accessCnt = BytesToU64(data)
    97  	data, _ = s.db.Get(keyDataIdx)
    98  	s.dataIdx = BytesToU64(data)
    99  	s.gcPos, _ = s.db.Get(keyGCPos)
   100  	if s.gcPos == nil {
   101  		s.gcPos = s.gcStartPos
   102  	}
   103  	return
   104  }
   105  
   106  type dpaDBIndex struct {
   107  	Idx    uint64
   108  	Access uint64
   109  }
   110  
   111  func BytesToU64(data []byte) uint64 {
   112  	if len(data) < 8 {
   113  		return 0
   114  	}
   115  	return binary.LittleEndian.Uint64(data)
   116  }
   117  
   118  func U64ToBytes(val uint64) []byte {
   119  	data := make([]byte, 8)
   120  	binary.LittleEndian.PutUint64(data, val)
   121  	return data
   122  }
   123  
   124  func getIndexGCValue(index *dpaDBIndex) uint64 {
   125  	return index.Access
   126  }
   127  
   128  func (s *DbStore) updateIndexAccess(index *dpaDBIndex) {
   129  	index.Access = s.accessCnt
   130  }
   131  
   132  func getIndexKey(hash Key) []byte {
   133  	HashSize := len(hash)
   134  	key := make([]byte, HashSize+1)
   135  	key[0] = 0
   136  	copy(key[1:], hash[:])
   137  	return key
   138  }
   139  
   140  func getDataKey(idx uint64) []byte {
   141  	key := make([]byte, 9)
   142  	key[0] = 1
   143  	binary.BigEndian.PutUint64(key[1:9], idx)
   144  
   145  	return key
   146  }
   147  
   148  func encodeIndex(index *dpaDBIndex) []byte {
   149  	data, _ := rlp.EncodeToBytes(index)
   150  	return data
   151  }
   152  
   153  func encodeData(chunk *Chunk) []byte {
   154  	return chunk.SData
   155  }
   156  
   157  func decodeIndex(data []byte, index *dpaDBIndex) {
   158  	dec := rlp.NewStream(bytes.NewReader(data), 0)
   159  	dec.Decode(index)
   160  }
   161  
   162  func decodeData(data []byte, chunk *Chunk) {
   163  	chunk.SData = data
   164  	chunk.Size = int64(binary.LittleEndian.Uint64(data[0:8]))
   165  }
   166  
   167  func gcListPartition(list []*gcItem, left int, right int, pivotIndex int) int {
   168  	pivotValue := list[pivotIndex].value
   169  	dd := list[pivotIndex]
   170  	list[pivotIndex] = list[right]
   171  	list[right] = dd
   172  	storeIndex := left
   173  	for i := left; i < right; i++ {
   174  		if list[i].value < pivotValue {
   175  			dd = list[storeIndex]
   176  			list[storeIndex] = list[i]
   177  			list[i] = dd
   178  			storeIndex++
   179  		}
   180  	}
   181  	dd = list[storeIndex]
   182  	list[storeIndex] = list[right]
   183  	list[right] = dd
   184  	return storeIndex
   185  }
   186  
   187  func gcListSelect(list []*gcItem, left int, right int, n int) int {
   188  	if left == right {
   189  		return left
   190  	}
   191  	pivotIndex := (left + right) / 2
   192  	pivotIndex = gcListPartition(list, left, right, pivotIndex)
   193  	if n == pivotIndex {
   194  		return n
   195  	} else {
   196  		if n < pivotIndex {
   197  			return gcListSelect(list, left, pivotIndex-1, n)
   198  		} else {
   199  			return gcListSelect(list, pivotIndex+1, right, n)
   200  		}
   201  	}
   202  }
   203  
   204  func (s *DbStore) collectGarbage(ratio float32) {
   205  	it := s.db.NewIterator()
   206  	it.Seek(s.gcPos)
   207  	if it.Valid() {
   208  		s.gcPos = it.Key()
   209  	} else {
   210  		s.gcPos = nil
   211  	}
   212  	gcnt := 0
   213  
   214  	for (gcnt < gcArraySize) && (uint64(gcnt) < s.entryCnt) {
   215  
   216  		if (s.gcPos == nil) || (s.gcPos[0] != kpIndex) {
   217  			it.Seek(s.gcStartPos)
   218  			if it.Valid() {
   219  				s.gcPos = it.Key()
   220  			} else {
   221  				s.gcPos = nil
   222  			}
   223  		}
   224  
   225  		if (s.gcPos == nil) || (s.gcPos[0] != kpIndex) {
   226  			break
   227  		}
   228  
   229  		gci := new(gcItem)
   230  		gci.idxKey = s.gcPos
   231  		var index dpaDBIndex
   232  		decodeIndex(it.Value(), &index)
   233  		gci.idx = index.Idx
   234  		// the smaller, the more likely to be gc'd
   235  		gci.value = getIndexGCValue(&index)
   236  		s.gcArray[gcnt] = gci
   237  		gcnt++
   238  		it.Next()
   239  		if it.Valid() {
   240  			s.gcPos = it.Key()
   241  		} else {
   242  			s.gcPos = nil
   243  		}
   244  	}
   245  	it.Release()
   246  
   247  	cutidx := gcListSelect(s.gcArray, 0, gcnt-1, int(float32(gcnt)*ratio))
   248  	cutval := s.gcArray[cutidx].value
   249  
   250  	// fmt.Print(gcnt, " ", s.entryCnt, " ")
   251  
   252  	// actual gc
   253  	for i := 0; i < gcnt; i++ {
   254  		if s.gcArray[i].value <= cutval {
   255  			s.delete(s.gcArray[i].idx, s.gcArray[i].idxKey)
   256  		}
   257  	}
   258  
   259  	// fmt.Println(s.entryCnt)
   260  
   261  	s.db.Put(keyGCPos, s.gcPos)
   262  }
   263  
   264  func (s *DbStore) Cleanup() {
   265  	//Iterates over the database and checks that there are no faulty chunks
   266  	it := s.db.NewIterator()
   267  	startPosition := []byte{kpIndex}
   268  	it.Seek(startPosition)
   269  	var key []byte
   270  	var errorsFound, total int
   271  	for it.Valid() {
   272  		key = it.Key()
   273  		if (key == nil) || (key[0] != kpIndex) {
   274  			break
   275  		}
   276  		total++
   277  		var index dpaDBIndex
   278  		decodeIndex(it.Value(), &index)
   279  
   280  		data, err := s.db.Get(getDataKey(index.Idx))
   281  		if err != nil {
   282  			glog.V(logger.Warn).Infof("Chunk %x found but could not be accessed: %v", key[:], err)
   283  			s.delete(index.Idx, getIndexKey(key[1:]))
   284  			errorsFound++
   285  		} else {
   286  			hasher := s.hashfunc()
   287  			hasher.Write(data)
   288  			hash := hasher.Sum(nil)
   289  			if !bytes.Equal(hash, key[1:]) {
   290  				glog.V(logger.Warn).Infof("Found invalid chunk. Hash mismatch. hash=%x, key=%x", hash, key[:])
   291  				s.delete(index.Idx, getIndexKey(key[1:]))
   292  				errorsFound++
   293  			}
   294  		}
   295  		it.Next()
   296  	}
   297  	it.Release()
   298  	glog.V(logger.Warn).Infof("Found %v errors out of %v entries", errorsFound, total)
   299  }
   300  
   301  func (s *DbStore) delete(idx uint64, idxKey []byte) {
   302  	batch := new(leveldb.Batch)
   303  	batch.Delete(idxKey)
   304  	batch.Delete(getDataKey(idx))
   305  	s.entryCnt--
   306  	batch.Put(keyEntryCnt, U64ToBytes(s.entryCnt))
   307  	s.db.Write(batch)
   308  }
   309  
   310  func (s *DbStore) Counter() uint64 {
   311  	s.lock.Lock()
   312  	defer s.lock.Unlock()
   313  	return s.dataIdx
   314  }
   315  
   316  func (s *DbStore) Put(chunk *Chunk) {
   317  	s.lock.Lock()
   318  	defer s.lock.Unlock()
   319  
   320  	ikey := getIndexKey(chunk.Key)
   321  	var index dpaDBIndex
   322  
   323  	if s.tryAccessIdx(ikey, &index) {
   324  		if chunk.dbStored != nil {
   325  			close(chunk.dbStored)
   326  		}
   327  		glog.V(logger.Detail).Infof("Storing to DB: chunk already exists, only update access")
   328  		return // already exists, only update access
   329  	}
   330  
   331  	data := encodeData(chunk)
   332  	//data := ethutil.Encode([]interface{}{entry})
   333  
   334  	if s.entryCnt >= s.capacity {
   335  		s.collectGarbage(gcArrayFreeRatio)
   336  	}
   337  
   338  	batch := new(leveldb.Batch)
   339  
   340  	batch.Put(getDataKey(s.dataIdx), data)
   341  
   342  	index.Idx = s.dataIdx
   343  	s.updateIndexAccess(&index)
   344  
   345  	idata := encodeIndex(&index)
   346  	batch.Put(ikey, idata)
   347  
   348  	batch.Put(keyEntryCnt, U64ToBytes(s.entryCnt))
   349  	s.entryCnt++
   350  	batch.Put(keyDataIdx, U64ToBytes(s.dataIdx))
   351  	s.dataIdx++
   352  	batch.Put(keyAccessCnt, U64ToBytes(s.accessCnt))
   353  	s.accessCnt++
   354  
   355  	s.db.Write(batch)
   356  	if chunk.dbStored != nil {
   357  		close(chunk.dbStored)
   358  	}
   359  	glog.V(logger.Detail).Infof("DbStore.Put: %v. db storage counter: %v ", chunk.Key.Log(), s.dataIdx)
   360  }
   361  
   362  // try to find index; if found, update access cnt and return true
   363  func (s *DbStore) tryAccessIdx(ikey []byte, index *dpaDBIndex) bool {
   364  	idata, err := s.db.Get(ikey)
   365  	if err != nil {
   366  		return false
   367  	}
   368  	decodeIndex(idata, index)
   369  
   370  	batch := new(leveldb.Batch)
   371  
   372  	batch.Put(keyAccessCnt, U64ToBytes(s.accessCnt))
   373  	s.accessCnt++
   374  	s.updateIndexAccess(index)
   375  	idata = encodeIndex(index)
   376  	batch.Put(ikey, idata)
   377  
   378  	s.db.Write(batch)
   379  
   380  	return true
   381  }
   382  
   383  func (s *DbStore) Get(key Key) (chunk *Chunk, err error) {
   384  	s.lock.Lock()
   385  	defer s.lock.Unlock()
   386  
   387  	var index dpaDBIndex
   388  
   389  	if s.tryAccessIdx(getIndexKey(key), &index) {
   390  		var data []byte
   391  		data, err = s.db.Get(getDataKey(index.Idx))
   392  		if err != nil {
   393  			glog.V(logger.Detail).Infof("DBStore: Chunk %v found but could not be accessed: %v", key.Log(), err)
   394  			s.delete(index.Idx, getIndexKey(key))
   395  			return
   396  		}
   397  
   398  		hasher := s.hashfunc()
   399  		hasher.Write(data)
   400  		hash := hasher.Sum(nil)
   401  		if !bytes.Equal(hash, key) {
   402  			s.delete(index.Idx, getIndexKey(key))
   403  			panic("Invalid Chunk in Database. Please repair with command: 'swarm cleandb'")
   404  		}
   405  
   406  		chunk = &Chunk{
   407  			Key: key,
   408  		}
   409  		decodeData(data, chunk)
   410  	} else {
   411  		err = notFound
   412  	}
   413  
   414  	return
   415  
   416  }
   417  
   418  func (s *DbStore) updateAccessCnt(key Key) {
   419  
   420  	s.lock.Lock()
   421  	defer s.lock.Unlock()
   422  
   423  	var index dpaDBIndex
   424  	s.tryAccessIdx(getIndexKey(key), &index) // result_chn == nil, only update access cnt
   425  
   426  }
   427  
   428  func (s *DbStore) setCapacity(c uint64) {
   429  
   430  	s.lock.Lock()
   431  	defer s.lock.Unlock()
   432  
   433  	s.capacity = c
   434  
   435  	if s.entryCnt > c {
   436  		var ratio float32
   437  		ratio = float32(1.01) - float32(c)/float32(s.entryCnt)
   438  		if ratio < gcArrayFreeRatio {
   439  			ratio = gcArrayFreeRatio
   440  		}
   441  		if ratio > 1 {
   442  			ratio = 1
   443  		}
   444  		for s.entryCnt > c {
   445  			s.collectGarbage(ratio)
   446  		}
   447  	}
   448  }
   449  
   450  func (s *DbStore) getEntryCnt() uint64 {
   451  	return s.entryCnt
   452  }
   453  
   454  func (s *DbStore) Close() {
   455  	s.db.Close()
   456  }
   457  
   458  //  describes a section of the DbStore representing the unsynced
   459  // domain relevant to a peer
   460  // Start - Stop designate a continuous area Keys in an address space
   461  // typically the addresses closer to us than to the peer but not closer
   462  // another closer peer in between
   463  // From - To designates a time interval typically from the last disconnect
   464  // till the latest connection (real time traffic is relayed)
   465  type DbSyncState struct {
   466  	Start, Stop Key
   467  	First, Last uint64
   468  }
   469  
   470  // implements the syncer iterator interface
   471  // iterates by storage index (~ time of storage = first entry to db)
   472  type dbSyncIterator struct {
   473  	it iterator.Iterator
   474  	DbSyncState
   475  }
   476  
   477  // initialises a sync iterator from a syncToken (passed in with the handshake)
   478  func (self *DbStore) NewSyncIterator(state DbSyncState) (si *dbSyncIterator, err error) {
   479  	if state.First > state.Last {
   480  		return nil, fmt.Errorf("no entries found")
   481  	}
   482  	si = &dbSyncIterator{
   483  		it:          self.db.NewIterator(),
   484  		DbSyncState: state,
   485  	}
   486  	si.it.Seek(getIndexKey(state.Start))
   487  	return si, nil
   488  }
   489  
   490  // walk the area from Start to Stop and returns items within time interval
   491  // First to Last
   492  func (self *dbSyncIterator) Next() (key Key) {
   493  	for self.it.Valid() {
   494  		dbkey := self.it.Key()
   495  		if dbkey[0] != 0 {
   496  			break
   497  		}
   498  		key = Key(make([]byte, len(dbkey)-1))
   499  		copy(key[:], dbkey[1:])
   500  		if bytes.Compare(key[:], self.Start) <= 0 {
   501  			self.it.Next()
   502  			continue
   503  		}
   504  		if bytes.Compare(key[:], self.Stop) > 0 {
   505  			break
   506  		}
   507  		var index dpaDBIndex
   508  		decodeIndex(self.it.Value(), &index)
   509  		self.it.Next()
   510  		if (index.Idx >= self.First) && (index.Idx < self.Last) {
   511  			return
   512  		}
   513  	}
   514  	self.it.Release()
   515  	return nil
   516  }