github.com/samgwo/go-ethereum@v1.8.2-0.20180302101319-49bcb5fbd55e/swarm/storage/dbstore.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // disk storage layer for the package bzz 18 // DbStore implements the ChunkStore interface and is used by the DPA as 19 // persistent storage of chunks 20 // it implements purging based on access count allowing for external control of 21 // max capacity 22 23 package storage 24 25 import ( 26 "archive/tar" 27 "bytes" 28 "encoding/binary" 29 "encoding/hex" 30 "fmt" 31 "io" 32 "io/ioutil" 33 "sync" 34 35 "github.com/ethereum/go-ethereum/log" 36 "github.com/ethereum/go-ethereum/metrics" 37 "github.com/ethereum/go-ethereum/rlp" 38 "github.com/syndtr/goleveldb/leveldb" 39 "github.com/syndtr/goleveldb/leveldb/iterator" 40 ) 41 42 //metrics variables 43 var ( 44 gcCounter = metrics.NewRegisteredCounter("storage.db.dbstore.gc.count", nil) 45 dbStoreDeleteCounter = metrics.NewRegisteredCounter("storage.db.dbstore.rm.count", nil) 46 ) 47 48 const ( 49 defaultDbCapacity = 5000000 50 defaultRadius = 0 // not yet used 51 52 gcArraySize = 10000 53 gcArrayFreeRatio = 0.1 54 55 // key prefixes for leveldb storage 56 kpIndex = 0 57 kpData = 1 58 ) 59 60 var ( 61 keyAccessCnt = []byte{2} 62 keyEntryCnt = []byte{3} 63 keyDataIdx = []byte{4} 64 keyGCPos = []byte{5} 65 ) 66 67 type gcItem struct { 68 idx uint64 69 value uint64 70 idxKey []byte 71 } 72 73 type DbStore struct { 74 db *LDBDatabase 75 76 // this should be stored in db, accessed transactionally 77 entryCnt, accessCnt, dataIdx, capacity uint64 78 79 gcPos, gcStartPos []byte 80 gcArray []*gcItem 81 82 hashfunc SwarmHasher 83 84 lock sync.Mutex 85 } 86 87 func NewDbStore(path string, hash SwarmHasher, capacity uint64, radius int) (s *DbStore, err error) { 88 s = new(DbStore) 89 90 s.hashfunc = hash 91 92 s.db, err = NewLDBDatabase(path) 93 if err != nil { 94 return 95 } 96 97 s.setCapacity(capacity) 98 99 s.gcStartPos = make([]byte, 1) 100 s.gcStartPos[0] = kpIndex 101 s.gcArray = make([]*gcItem, gcArraySize) 102 103 data, _ := s.db.Get(keyEntryCnt) 104 s.entryCnt = BytesToU64(data) 105 data, _ = s.db.Get(keyAccessCnt) 106 s.accessCnt = BytesToU64(data) 107 data, _ = s.db.Get(keyDataIdx) 108 s.dataIdx = BytesToU64(data) 109 s.gcPos, _ = s.db.Get(keyGCPos) 110 if s.gcPos == nil { 111 s.gcPos = s.gcStartPos 112 } 113 return 114 } 115 116 type dpaDBIndex struct { 117 Idx uint64 118 Access uint64 119 } 120 121 func BytesToU64(data []byte) uint64 { 122 if len(data) < 8 { 123 return 0 124 } 125 return binary.LittleEndian.Uint64(data) 126 } 127 128 func U64ToBytes(val uint64) []byte { 129 data := make([]byte, 8) 130 binary.LittleEndian.PutUint64(data, val) 131 return data 132 } 133 134 func getIndexGCValue(index *dpaDBIndex) uint64 { 135 return index.Access 136 } 137 138 func (s *DbStore) updateIndexAccess(index *dpaDBIndex) { 139 index.Access = s.accessCnt 140 } 141 142 func getIndexKey(hash Key) []byte { 143 HashSize := len(hash) 144 key := make([]byte, HashSize+1) 145 key[0] = 0 146 copy(key[1:], hash[:]) 147 return key 148 } 149 150 func getDataKey(idx uint64) []byte { 151 key := make([]byte, 9) 152 key[0] = 1 153 binary.BigEndian.PutUint64(key[1:9], idx) 154 155 return key 156 } 157 158 func encodeIndex(index *dpaDBIndex) []byte { 159 data, _ := rlp.EncodeToBytes(index) 160 return data 161 } 162 163 func encodeData(chunk *Chunk) []byte { 164 return chunk.SData 165 } 166 167 func decodeIndex(data []byte, index *dpaDBIndex) { 168 dec := rlp.NewStream(bytes.NewReader(data), 0) 169 dec.Decode(index) 170 } 171 172 func decodeData(data []byte, chunk *Chunk) { 173 chunk.SData = data 174 chunk.Size = int64(binary.LittleEndian.Uint64(data[0:8])) 175 } 176 177 func gcListPartition(list []*gcItem, left int, right int, pivotIndex int) int { 178 pivotValue := list[pivotIndex].value 179 dd := list[pivotIndex] 180 list[pivotIndex] = list[right] 181 list[right] = dd 182 storeIndex := left 183 for i := left; i < right; i++ { 184 if list[i].value < pivotValue { 185 dd = list[storeIndex] 186 list[storeIndex] = list[i] 187 list[i] = dd 188 storeIndex++ 189 } 190 } 191 dd = list[storeIndex] 192 list[storeIndex] = list[right] 193 list[right] = dd 194 return storeIndex 195 } 196 197 func gcListSelect(list []*gcItem, left int, right int, n int) int { 198 if left == right { 199 return left 200 } 201 pivotIndex := (left + right) / 2 202 pivotIndex = gcListPartition(list, left, right, pivotIndex) 203 if n == pivotIndex { 204 return n 205 } else { 206 if n < pivotIndex { 207 return gcListSelect(list, left, pivotIndex-1, n) 208 } else { 209 return gcListSelect(list, pivotIndex+1, right, n) 210 } 211 } 212 } 213 214 func (s *DbStore) collectGarbage(ratio float32) { 215 it := s.db.NewIterator() 216 it.Seek(s.gcPos) 217 if it.Valid() { 218 s.gcPos = it.Key() 219 } else { 220 s.gcPos = nil 221 } 222 gcnt := 0 223 224 for (gcnt < gcArraySize) && (uint64(gcnt) < s.entryCnt) { 225 226 if (s.gcPos == nil) || (s.gcPos[0] != kpIndex) { 227 it.Seek(s.gcStartPos) 228 if it.Valid() { 229 s.gcPos = it.Key() 230 } else { 231 s.gcPos = nil 232 } 233 } 234 235 if (s.gcPos == nil) || (s.gcPos[0] != kpIndex) { 236 break 237 } 238 239 gci := new(gcItem) 240 gci.idxKey = s.gcPos 241 var index dpaDBIndex 242 decodeIndex(it.Value(), &index) 243 gci.idx = index.Idx 244 // the smaller, the more likely to be gc'd 245 gci.value = getIndexGCValue(&index) 246 s.gcArray[gcnt] = gci 247 gcnt++ 248 it.Next() 249 if it.Valid() { 250 s.gcPos = it.Key() 251 } else { 252 s.gcPos = nil 253 } 254 } 255 it.Release() 256 257 cutidx := gcListSelect(s.gcArray, 0, gcnt-1, int(float32(gcnt)*ratio)) 258 cutval := s.gcArray[cutidx].value 259 260 // fmt.Print(gcnt, " ", s.entryCnt, " ") 261 262 // actual gc 263 for i := 0; i < gcnt; i++ { 264 if s.gcArray[i].value <= cutval { 265 gcCounter.Inc(1) 266 s.delete(s.gcArray[i].idx, s.gcArray[i].idxKey) 267 } 268 } 269 270 // fmt.Println(s.entryCnt) 271 272 s.db.Put(keyGCPos, s.gcPos) 273 } 274 275 // Export writes all chunks from the store to a tar archive, returning the 276 // number of chunks written. 277 func (s *DbStore) Export(out io.Writer) (int64, error) { 278 tw := tar.NewWriter(out) 279 defer tw.Close() 280 281 it := s.db.NewIterator() 282 defer it.Release() 283 var count int64 284 for ok := it.Seek([]byte{kpIndex}); ok; ok = it.Next() { 285 key := it.Key() 286 if (key == nil) || (key[0] != kpIndex) { 287 break 288 } 289 290 var index dpaDBIndex 291 decodeIndex(it.Value(), &index) 292 293 data, err := s.db.Get(getDataKey(index.Idx)) 294 if err != nil { 295 log.Warn(fmt.Sprintf("Chunk %x found but could not be accessed: %v", key[:], err)) 296 continue 297 } 298 299 hdr := &tar.Header{ 300 Name: hex.EncodeToString(key[1:]), 301 Mode: 0644, 302 Size: int64(len(data)), 303 } 304 if err := tw.WriteHeader(hdr); err != nil { 305 return count, err 306 } 307 if _, err := tw.Write(data); err != nil { 308 return count, err 309 } 310 count++ 311 } 312 313 return count, nil 314 } 315 316 // Import reads chunks into the store from a tar archive, returning the number 317 // of chunks read. 318 func (s *DbStore) Import(in io.Reader) (int64, error) { 319 tr := tar.NewReader(in) 320 321 var count int64 322 for { 323 hdr, err := tr.Next() 324 if err == io.EOF { 325 break 326 } else if err != nil { 327 return count, err 328 } 329 330 if len(hdr.Name) != 64 { 331 log.Warn("ignoring non-chunk file", "name", hdr.Name) 332 continue 333 } 334 335 key, err := hex.DecodeString(hdr.Name) 336 if err != nil { 337 log.Warn("ignoring invalid chunk file", "name", hdr.Name, "err", err) 338 continue 339 } 340 341 data, err := ioutil.ReadAll(tr) 342 if err != nil { 343 return count, err 344 } 345 346 s.Put(&Chunk{Key: key, SData: data}) 347 count++ 348 } 349 350 return count, nil 351 } 352 353 func (s *DbStore) Cleanup() { 354 //Iterates over the database and checks that there are no faulty chunks 355 it := s.db.NewIterator() 356 startPosition := []byte{kpIndex} 357 it.Seek(startPosition) 358 var key []byte 359 var errorsFound, total int 360 for it.Valid() { 361 key = it.Key() 362 if (key == nil) || (key[0] != kpIndex) { 363 break 364 } 365 total++ 366 var index dpaDBIndex 367 decodeIndex(it.Value(), &index) 368 369 data, err := s.db.Get(getDataKey(index.Idx)) 370 if err != nil { 371 log.Warn(fmt.Sprintf("Chunk %x found but could not be accessed: %v", key[:], err)) 372 s.delete(index.Idx, getIndexKey(key[1:])) 373 errorsFound++ 374 } else { 375 hasher := s.hashfunc() 376 hasher.Write(data) 377 hash := hasher.Sum(nil) 378 if !bytes.Equal(hash, key[1:]) { 379 log.Warn(fmt.Sprintf("Found invalid chunk. Hash mismatch. hash=%x, key=%x", hash, key[:])) 380 s.delete(index.Idx, getIndexKey(key[1:])) 381 errorsFound++ 382 } 383 } 384 it.Next() 385 } 386 it.Release() 387 log.Warn(fmt.Sprintf("Found %v errors out of %v entries", errorsFound, total)) 388 } 389 390 func (s *DbStore) delete(idx uint64, idxKey []byte) { 391 batch := new(leveldb.Batch) 392 batch.Delete(idxKey) 393 batch.Delete(getDataKey(idx)) 394 dbStoreDeleteCounter.Inc(1) 395 s.entryCnt-- 396 batch.Put(keyEntryCnt, U64ToBytes(s.entryCnt)) 397 s.db.Write(batch) 398 } 399 400 func (s *DbStore) Counter() uint64 { 401 s.lock.Lock() 402 defer s.lock.Unlock() 403 return s.dataIdx 404 } 405 406 func (s *DbStore) Put(chunk *Chunk) { 407 s.lock.Lock() 408 defer s.lock.Unlock() 409 410 ikey := getIndexKey(chunk.Key) 411 var index dpaDBIndex 412 413 if s.tryAccessIdx(ikey, &index) { 414 if chunk.dbStored != nil { 415 close(chunk.dbStored) 416 } 417 log.Trace(fmt.Sprintf("Storing to DB: chunk already exists, only update access")) 418 return // already exists, only update access 419 } 420 421 data := encodeData(chunk) 422 //data := ethutil.Encode([]interface{}{entry}) 423 424 if s.entryCnt >= s.capacity { 425 s.collectGarbage(gcArrayFreeRatio) 426 } 427 428 batch := new(leveldb.Batch) 429 430 batch.Put(getDataKey(s.dataIdx), data) 431 432 index.Idx = s.dataIdx 433 s.updateIndexAccess(&index) 434 435 idata := encodeIndex(&index) 436 batch.Put(ikey, idata) 437 438 batch.Put(keyEntryCnt, U64ToBytes(s.entryCnt)) 439 s.entryCnt++ 440 batch.Put(keyDataIdx, U64ToBytes(s.dataIdx)) 441 s.dataIdx++ 442 batch.Put(keyAccessCnt, U64ToBytes(s.accessCnt)) 443 s.accessCnt++ 444 445 s.db.Write(batch) 446 if chunk.dbStored != nil { 447 close(chunk.dbStored) 448 } 449 log.Trace(fmt.Sprintf("DbStore.Put: %v. db storage counter: %v ", chunk.Key.Log(), s.dataIdx)) 450 } 451 452 // try to find index; if found, update access cnt and return true 453 func (s *DbStore) tryAccessIdx(ikey []byte, index *dpaDBIndex) bool { 454 idata, err := s.db.Get(ikey) 455 if err != nil { 456 return false 457 } 458 decodeIndex(idata, index) 459 460 batch := new(leveldb.Batch) 461 462 batch.Put(keyAccessCnt, U64ToBytes(s.accessCnt)) 463 s.accessCnt++ 464 s.updateIndexAccess(index) 465 idata = encodeIndex(index) 466 batch.Put(ikey, idata) 467 468 s.db.Write(batch) 469 470 return true 471 } 472 473 func (s *DbStore) Get(key Key) (chunk *Chunk, err error) { 474 s.lock.Lock() 475 defer s.lock.Unlock() 476 477 var index dpaDBIndex 478 479 if s.tryAccessIdx(getIndexKey(key), &index) { 480 var data []byte 481 data, err = s.db.Get(getDataKey(index.Idx)) 482 if err != nil { 483 log.Trace(fmt.Sprintf("DBStore: Chunk %v found but could not be accessed: %v", key.Log(), err)) 484 s.delete(index.Idx, getIndexKey(key)) 485 return 486 } 487 488 hasher := s.hashfunc() 489 hasher.Write(data) 490 hash := hasher.Sum(nil) 491 if !bytes.Equal(hash, key) { 492 s.delete(index.Idx, getIndexKey(key)) 493 log.Warn("Invalid Chunk in Database. Please repair with command: 'swarm cleandb'") 494 } 495 496 chunk = &Chunk{ 497 Key: key, 498 } 499 decodeData(data, chunk) 500 } else { 501 err = notFound 502 } 503 504 return 505 506 } 507 508 func (s *DbStore) updateAccessCnt(key Key) { 509 510 s.lock.Lock() 511 defer s.lock.Unlock() 512 513 var index dpaDBIndex 514 s.tryAccessIdx(getIndexKey(key), &index) // result_chn == nil, only update access cnt 515 516 } 517 518 func (s *DbStore) setCapacity(c uint64) { 519 520 s.lock.Lock() 521 defer s.lock.Unlock() 522 523 s.capacity = c 524 525 if s.entryCnt > c { 526 ratio := float32(1.01) - float32(c)/float32(s.entryCnt) 527 if ratio < gcArrayFreeRatio { 528 ratio = gcArrayFreeRatio 529 } 530 if ratio > 1 { 531 ratio = 1 532 } 533 for s.entryCnt > c { 534 s.collectGarbage(ratio) 535 } 536 } 537 } 538 539 func (s *DbStore) Close() { 540 s.db.Close() 541 } 542 543 // describes a section of the DbStore representing the unsynced 544 // domain relevant to a peer 545 // Start - Stop designate a continuous area Keys in an address space 546 // typically the addresses closer to us than to the peer but not closer 547 // another closer peer in between 548 // From - To designates a time interval typically from the last disconnect 549 // till the latest connection (real time traffic is relayed) 550 type DbSyncState struct { 551 Start, Stop Key 552 First, Last uint64 553 } 554 555 // implements the syncer iterator interface 556 // iterates by storage index (~ time of storage = first entry to db) 557 type dbSyncIterator struct { 558 it iterator.Iterator 559 DbSyncState 560 } 561 562 // initialises a sync iterator from a syncToken (passed in with the handshake) 563 func (self *DbStore) NewSyncIterator(state DbSyncState) (si *dbSyncIterator, err error) { 564 if state.First > state.Last { 565 return nil, fmt.Errorf("no entries found") 566 } 567 si = &dbSyncIterator{ 568 it: self.db.NewIterator(), 569 DbSyncState: state, 570 } 571 si.it.Seek(getIndexKey(state.Start)) 572 return si, nil 573 } 574 575 // walk the area from Start to Stop and returns items within time interval 576 // First to Last 577 func (self *dbSyncIterator) Next() (key Key) { 578 for self.it.Valid() { 579 dbkey := self.it.Key() 580 if dbkey[0] != 0 { 581 break 582 } 583 key = Key(make([]byte, len(dbkey)-1)) 584 copy(key[:], dbkey[1:]) 585 if bytes.Compare(key[:], self.Start) <= 0 { 586 self.it.Next() 587 continue 588 } 589 if bytes.Compare(key[:], self.Stop) > 0 { 590 break 591 } 592 var index dpaDBIndex 593 decodeIndex(self.it.Value(), &index) 594 self.it.Next() 595 if (index.Idx >= self.First) && (index.Idx < self.Last) { 596 return 597 } 598 } 599 self.it.Release() 600 return nil 601 }