github.com/alanchchen/go-ethereum@v1.6.6-0.20170601190819-6171d01b1195/swarm/storage/dbstore.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // disk storage layer for the package bzz 18 // DbStore implements the ChunkStore interface and is used by the DPA as 19 // persistent storage of chunks 20 // it implements purging based on access count allowing for external control of 21 // max capacity 22 23 package storage 24 25 import ( 26 "bytes" 27 "encoding/binary" 28 "fmt" 29 "sync" 30 31 "github.com/ethereum/go-ethereum/log" 32 "github.com/ethereum/go-ethereum/rlp" 33 "github.com/syndtr/goleveldb/leveldb" 34 "github.com/syndtr/goleveldb/leveldb/iterator" 35 ) 36 37 const ( 38 defaultDbCapacity = 5000000 39 defaultRadius = 0 // not yet used 40 41 gcArraySize = 10000 42 gcArrayFreeRatio = 0.1 43 44 // key prefixes for leveldb storage 45 kpIndex = 0 46 kpData = 1 47 ) 48 49 var ( 50 keyAccessCnt = []byte{2} 51 keyEntryCnt = []byte{3} 52 keyDataIdx = []byte{4} 53 keyGCPos = []byte{5} 54 ) 55 56 type gcItem struct { 57 idx uint64 58 value uint64 59 idxKey []byte 60 } 61 62 type DbStore struct { 63 db *LDBDatabase 64 65 // this should be stored in db, accessed transactionally 66 entryCnt, accessCnt, dataIdx, capacity uint64 67 68 gcPos, gcStartPos []byte 69 gcArray []*gcItem 70 71 hashfunc Hasher 72 73 lock sync.Mutex 74 } 75 76 func NewDbStore(path string, hash Hasher, capacity uint64, radius int) (s *DbStore, err error) { 77 s = new(DbStore) 78 79 s.hashfunc = hash 80 81 s.db, err = NewLDBDatabase(path) 82 if err != nil { 83 return 84 } 85 86 s.setCapacity(capacity) 87 88 s.gcStartPos = make([]byte, 1) 89 s.gcStartPos[0] = kpIndex 90 s.gcArray = make([]*gcItem, gcArraySize) 91 92 data, _ := s.db.Get(keyEntryCnt) 93 s.entryCnt = BytesToU64(data) 94 data, _ = s.db.Get(keyAccessCnt) 95 s.accessCnt = BytesToU64(data) 96 data, _ = s.db.Get(keyDataIdx) 97 s.dataIdx = BytesToU64(data) 98 s.gcPos, _ = s.db.Get(keyGCPos) 99 if s.gcPos == nil { 100 s.gcPos = s.gcStartPos 101 } 102 return 103 } 104 105 type dpaDBIndex struct { 106 Idx uint64 107 Access uint64 108 } 109 110 func BytesToU64(data []byte) uint64 { 111 if len(data) < 8 { 112 return 0 113 } 114 return binary.LittleEndian.Uint64(data) 115 } 116 117 func U64ToBytes(val uint64) []byte { 118 data := make([]byte, 8) 119 binary.LittleEndian.PutUint64(data, val) 120 return data 121 } 122 123 func getIndexGCValue(index *dpaDBIndex) uint64 { 124 return index.Access 125 } 126 127 func (s *DbStore) updateIndexAccess(index *dpaDBIndex) { 128 index.Access = s.accessCnt 129 } 130 131 func getIndexKey(hash Key) []byte { 132 HashSize := len(hash) 133 key := make([]byte, HashSize+1) 134 key[0] = 0 135 copy(key[1:], hash[:]) 136 return key 137 } 138 139 func getDataKey(idx uint64) []byte { 140 key := make([]byte, 9) 141 key[0] = 1 142 binary.BigEndian.PutUint64(key[1:9], idx) 143 144 return key 145 } 146 147 func encodeIndex(index *dpaDBIndex) []byte { 148 data, _ := rlp.EncodeToBytes(index) 149 return data 150 } 151 152 func encodeData(chunk *Chunk) []byte { 153 return chunk.SData 154 } 155 156 func decodeIndex(data []byte, index *dpaDBIndex) { 157 dec := rlp.NewStream(bytes.NewReader(data), 0) 158 dec.Decode(index) 159 } 160 161 func decodeData(data []byte, chunk *Chunk) { 162 chunk.SData = data 163 chunk.Size = int64(binary.LittleEndian.Uint64(data[0:8])) 164 } 165 166 func gcListPartition(list []*gcItem, left int, right int, pivotIndex int) int { 167 pivotValue := list[pivotIndex].value 168 dd := list[pivotIndex] 169 list[pivotIndex] = list[right] 170 list[right] = dd 171 storeIndex := left 172 for i := left; i < right; i++ { 173 if list[i].value < pivotValue { 174 dd = list[storeIndex] 175 list[storeIndex] = list[i] 176 list[i] = dd 177 storeIndex++ 178 } 179 } 180 dd = list[storeIndex] 181 list[storeIndex] = list[right] 182 list[right] = dd 183 return storeIndex 184 } 185 186 func gcListSelect(list []*gcItem, left int, right int, n int) int { 187 if left == right { 188 return left 189 } 190 pivotIndex := (left + right) / 2 191 pivotIndex = gcListPartition(list, left, right, pivotIndex) 192 if n == pivotIndex { 193 return n 194 } else { 195 if n < pivotIndex { 196 return gcListSelect(list, left, pivotIndex-1, n) 197 } else { 198 return gcListSelect(list, pivotIndex+1, right, n) 199 } 200 } 201 } 202 203 func (s *DbStore) collectGarbage(ratio float32) { 204 it := s.db.NewIterator() 205 it.Seek(s.gcPos) 206 if it.Valid() { 207 s.gcPos = it.Key() 208 } else { 209 s.gcPos = nil 210 } 211 gcnt := 0 212 213 for (gcnt < gcArraySize) && (uint64(gcnt) < s.entryCnt) { 214 215 if (s.gcPos == nil) || (s.gcPos[0] != kpIndex) { 216 it.Seek(s.gcStartPos) 217 if it.Valid() { 218 s.gcPos = it.Key() 219 } else { 220 s.gcPos = nil 221 } 222 } 223 224 if (s.gcPos == nil) || (s.gcPos[0] != kpIndex) { 225 break 226 } 227 228 gci := new(gcItem) 229 gci.idxKey = s.gcPos 230 var index dpaDBIndex 231 decodeIndex(it.Value(), &index) 232 gci.idx = index.Idx 233 // the smaller, the more likely to be gc'd 234 gci.value = getIndexGCValue(&index) 235 s.gcArray[gcnt] = gci 236 gcnt++ 237 it.Next() 238 if it.Valid() { 239 s.gcPos = it.Key() 240 } else { 241 s.gcPos = nil 242 } 243 } 244 it.Release() 245 246 cutidx := gcListSelect(s.gcArray, 0, gcnt-1, int(float32(gcnt)*ratio)) 247 cutval := s.gcArray[cutidx].value 248 249 // fmt.Print(gcnt, " ", s.entryCnt, " ") 250 251 // actual gc 252 for i := 0; i < gcnt; i++ { 253 if s.gcArray[i].value <= cutval { 254 s.delete(s.gcArray[i].idx, s.gcArray[i].idxKey) 255 } 256 } 257 258 // fmt.Println(s.entryCnt) 259 260 s.db.Put(keyGCPos, s.gcPos) 261 } 262 263 func (s *DbStore) Cleanup() { 264 //Iterates over the database and checks that there are no faulty chunks 265 it := s.db.NewIterator() 266 startPosition := []byte{kpIndex} 267 it.Seek(startPosition) 268 var key []byte 269 var errorsFound, total int 270 for it.Valid() { 271 key = it.Key() 272 if (key == nil) || (key[0] != kpIndex) { 273 break 274 } 275 total++ 276 var index dpaDBIndex 277 decodeIndex(it.Value(), &index) 278 279 data, err := s.db.Get(getDataKey(index.Idx)) 280 if err != nil { 281 log.Warn(fmt.Sprintf("Chunk %x found but could not be accessed: %v", key[:], err)) 282 s.delete(index.Idx, getIndexKey(key[1:])) 283 errorsFound++ 284 } else { 285 hasher := s.hashfunc() 286 hasher.Write(data) 287 hash := hasher.Sum(nil) 288 if !bytes.Equal(hash, key[1:]) { 289 log.Warn(fmt.Sprintf("Found invalid chunk. Hash mismatch. hash=%x, key=%x", hash, key[:])) 290 s.delete(index.Idx, getIndexKey(key[1:])) 291 errorsFound++ 292 } 293 } 294 it.Next() 295 } 296 it.Release() 297 log.Warn(fmt.Sprintf("Found %v errors out of %v entries", errorsFound, total)) 298 } 299 300 func (s *DbStore) delete(idx uint64, idxKey []byte) { 301 batch := new(leveldb.Batch) 302 batch.Delete(idxKey) 303 batch.Delete(getDataKey(idx)) 304 s.entryCnt-- 305 batch.Put(keyEntryCnt, U64ToBytes(s.entryCnt)) 306 s.db.Write(batch) 307 } 308 309 func (s *DbStore) Counter() uint64 { 310 s.lock.Lock() 311 defer s.lock.Unlock() 312 return s.dataIdx 313 } 314 315 func (s *DbStore) Put(chunk *Chunk) { 316 s.lock.Lock() 317 defer s.lock.Unlock() 318 319 ikey := getIndexKey(chunk.Key) 320 var index dpaDBIndex 321 322 if s.tryAccessIdx(ikey, &index) { 323 if chunk.dbStored != nil { 324 close(chunk.dbStored) 325 } 326 log.Trace(fmt.Sprintf("Storing to DB: chunk already exists, only update access")) 327 return // already exists, only update access 328 } 329 330 data := encodeData(chunk) 331 //data := ethutil.Encode([]interface{}{entry}) 332 333 if s.entryCnt >= s.capacity { 334 s.collectGarbage(gcArrayFreeRatio) 335 } 336 337 batch := new(leveldb.Batch) 338 339 batch.Put(getDataKey(s.dataIdx), data) 340 341 index.Idx = s.dataIdx 342 s.updateIndexAccess(&index) 343 344 idata := encodeIndex(&index) 345 batch.Put(ikey, idata) 346 347 batch.Put(keyEntryCnt, U64ToBytes(s.entryCnt)) 348 s.entryCnt++ 349 batch.Put(keyDataIdx, U64ToBytes(s.dataIdx)) 350 s.dataIdx++ 351 batch.Put(keyAccessCnt, U64ToBytes(s.accessCnt)) 352 s.accessCnt++ 353 354 s.db.Write(batch) 355 if chunk.dbStored != nil { 356 close(chunk.dbStored) 357 } 358 log.Trace(fmt.Sprintf("DbStore.Put: %v. db storage counter: %v ", chunk.Key.Log(), s.dataIdx)) 359 } 360 361 // try to find index; if found, update access cnt and return true 362 func (s *DbStore) tryAccessIdx(ikey []byte, index *dpaDBIndex) bool { 363 idata, err := s.db.Get(ikey) 364 if err != nil { 365 return false 366 } 367 decodeIndex(idata, index) 368 369 batch := new(leveldb.Batch) 370 371 batch.Put(keyAccessCnt, U64ToBytes(s.accessCnt)) 372 s.accessCnt++ 373 s.updateIndexAccess(index) 374 idata = encodeIndex(index) 375 batch.Put(ikey, idata) 376 377 s.db.Write(batch) 378 379 return true 380 } 381 382 func (s *DbStore) Get(key Key) (chunk *Chunk, err error) { 383 s.lock.Lock() 384 defer s.lock.Unlock() 385 386 var index dpaDBIndex 387 388 if s.tryAccessIdx(getIndexKey(key), &index) { 389 var data []byte 390 data, err = s.db.Get(getDataKey(index.Idx)) 391 if err != nil { 392 log.Trace(fmt.Sprintf("DBStore: Chunk %v found but could not be accessed: %v", key.Log(), err)) 393 s.delete(index.Idx, getIndexKey(key)) 394 return 395 } 396 397 hasher := s.hashfunc() 398 hasher.Write(data) 399 hash := hasher.Sum(nil) 400 if !bytes.Equal(hash, key) { 401 s.delete(index.Idx, getIndexKey(key)) 402 panic("Invalid Chunk in Database. Please repair with command: 'swarm cleandb'") 403 } 404 405 chunk = &Chunk{ 406 Key: key, 407 } 408 decodeData(data, chunk) 409 } else { 410 err = notFound 411 } 412 413 return 414 415 } 416 417 func (s *DbStore) updateAccessCnt(key Key) { 418 419 s.lock.Lock() 420 defer s.lock.Unlock() 421 422 var index dpaDBIndex 423 s.tryAccessIdx(getIndexKey(key), &index) // result_chn == nil, only update access cnt 424 425 } 426 427 func (s *DbStore) setCapacity(c uint64) { 428 429 s.lock.Lock() 430 defer s.lock.Unlock() 431 432 s.capacity = c 433 434 if s.entryCnt > c { 435 var ratio float32 436 ratio = float32(1.01) - float32(c)/float32(s.entryCnt) 437 if ratio < gcArrayFreeRatio { 438 ratio = gcArrayFreeRatio 439 } 440 if ratio > 1 { 441 ratio = 1 442 } 443 for s.entryCnt > c { 444 s.collectGarbage(ratio) 445 } 446 } 447 } 448 449 func (s *DbStore) getEntryCnt() uint64 { 450 return s.entryCnt 451 } 452 453 func (s *DbStore) Close() { 454 s.db.Close() 455 } 456 457 // describes a section of the DbStore representing the unsynced 458 // domain relevant to a peer 459 // Start - Stop designate a continuous area Keys in an address space 460 // typically the addresses closer to us than to the peer but not closer 461 // another closer peer in between 462 // From - To designates a time interval typically from the last disconnect 463 // till the latest connection (real time traffic is relayed) 464 type DbSyncState struct { 465 Start, Stop Key 466 First, Last uint64 467 } 468 469 // implements the syncer iterator interface 470 // iterates by storage index (~ time of storage = first entry to db) 471 type dbSyncIterator struct { 472 it iterator.Iterator 473 DbSyncState 474 } 475 476 // initialises a sync iterator from a syncToken (passed in with the handshake) 477 func (self *DbStore) NewSyncIterator(state DbSyncState) (si *dbSyncIterator, err error) { 478 if state.First > state.Last { 479 return nil, fmt.Errorf("no entries found") 480 } 481 si = &dbSyncIterator{ 482 it: self.db.NewIterator(), 483 DbSyncState: state, 484 } 485 si.it.Seek(getIndexKey(state.Start)) 486 return si, nil 487 } 488 489 // walk the area from Start to Stop and returns items within time interval 490 // First to Last 491 func (self *dbSyncIterator) Next() (key Key) { 492 for self.it.Valid() { 493 dbkey := self.it.Key() 494 if dbkey[0] != 0 { 495 break 496 } 497 key = Key(make([]byte, len(dbkey)-1)) 498 copy(key[:], dbkey[1:]) 499 if bytes.Compare(key[:], self.Start) <= 0 { 500 self.it.Next() 501 continue 502 } 503 if bytes.Compare(key[:], self.Stop) > 0 { 504 break 505 } 506 var index dpaDBIndex 507 decodeIndex(self.it.Value(), &index) 508 self.it.Next() 509 if (index.Idx >= self.First) && (index.Idx < self.Last) { 510 return 511 } 512 } 513 self.it.Release() 514 return nil 515 }