github.com/lzl124631x/go-ethereum@v1.8.12-0.20180615081455-574378edb50c/swarm/storage/dbstore.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // disk storage layer for the package bzz 18 // DbStore implements the ChunkStore interface and is used by the DPA as 19 // persistent storage of chunks 20 // it implements purging based on access count allowing for external control of 21 // max capacity 22 23 package storage 24 25 import ( 26 "archive/tar" 27 "bytes" 28 "encoding/binary" 29 "encoding/hex" 30 "fmt" 31 "io" 32 "io/ioutil" 33 "sync" 34 35 "github.com/ethereum/go-ethereum/log" 36 "github.com/ethereum/go-ethereum/metrics" 37 "github.com/ethereum/go-ethereum/rlp" 38 "github.com/syndtr/goleveldb/leveldb" 39 "github.com/syndtr/goleveldb/leveldb/iterator" 40 ) 41 42 //metrics variables 43 var ( 44 gcCounter = metrics.NewRegisteredCounter("storage.db.dbstore.gc.count", nil) 45 dbStoreDeleteCounter = metrics.NewRegisteredCounter("storage.db.dbstore.rm.count", nil) 46 ) 47 48 const ( 49 defaultDbCapacity = 5000000 50 defaultRadius = 0 // not yet used 51 52 gcArraySize = 10000 53 gcArrayFreeRatio = 0.1 54 55 // key prefixes for leveldb storage 56 kpIndex = 0 57 ) 58 59 var ( 60 keyAccessCnt = []byte{2} 61 keyEntryCnt = []byte{3} 62 keyDataIdx = []byte{4} 63 keyGCPos = []byte{5} 64 ) 65 66 type gcItem struct { 67 idx uint64 68 value uint64 69 idxKey []byte 70 } 71 72 type DbStore struct { 73 db *LDBDatabase 74 75 // this should be stored in db, accessed transactionally 76 entryCnt, accessCnt, dataIdx, capacity uint64 77 78 gcPos, gcStartPos []byte 79 gcArray []*gcItem 80 81 hashfunc SwarmHasher 82 83 lock sync.Mutex 84 } 85 86 func NewDbStore(path string, hash SwarmHasher, capacity uint64, radius int) (s *DbStore, err error) { 87 s = new(DbStore) 88 89 s.hashfunc = hash 90 91 s.db, err = NewLDBDatabase(path) 92 if err != nil { 93 return 94 } 95 96 s.setCapacity(capacity) 97 98 s.gcStartPos = make([]byte, 1) 99 s.gcStartPos[0] = kpIndex 100 s.gcArray = make([]*gcItem, gcArraySize) 101 102 data, _ := s.db.Get(keyEntryCnt) 103 s.entryCnt = BytesToU64(data) 104 data, _ = s.db.Get(keyAccessCnt) 105 s.accessCnt = BytesToU64(data) 106 data, _ = s.db.Get(keyDataIdx) 107 s.dataIdx = BytesToU64(data) 108 s.gcPos, _ = s.db.Get(keyGCPos) 109 if s.gcPos == nil { 110 s.gcPos = s.gcStartPos 111 } 112 return 113 } 114 115 type dpaDBIndex struct { 116 Idx uint64 117 Access uint64 118 } 119 120 func BytesToU64(data []byte) uint64 { 121 if len(data) < 8 { 122 return 0 123 } 124 return binary.LittleEndian.Uint64(data) 125 } 126 127 func U64ToBytes(val uint64) []byte { 128 data := make([]byte, 8) 129 binary.LittleEndian.PutUint64(data, val) 130 return data 131 } 132 133 func getIndexGCValue(index *dpaDBIndex) uint64 { 134 return index.Access 135 } 136 137 func (s *DbStore) updateIndexAccess(index *dpaDBIndex) { 138 index.Access = s.accessCnt 139 } 140 141 func getIndexKey(hash Key) []byte { 142 HashSize := len(hash) 143 key := make([]byte, HashSize+1) 144 key[0] = 0 145 copy(key[1:], hash[:]) 146 return key 147 } 148 149 func getDataKey(idx uint64) []byte { 150 key := make([]byte, 9) 151 key[0] = 1 152 binary.BigEndian.PutUint64(key[1:9], idx) 153 154 return key 155 } 156 157 func encodeIndex(index *dpaDBIndex) []byte { 158 data, _ := rlp.EncodeToBytes(index) 159 return data 160 } 161 162 func encodeData(chunk *Chunk) []byte { 163 return chunk.SData 164 } 165 166 func decodeIndex(data []byte, index *dpaDBIndex) { 167 dec := rlp.NewStream(bytes.NewReader(data), 0) 168 dec.Decode(index) 169 } 170 171 func decodeData(data []byte, chunk *Chunk) { 172 chunk.SData = data 173 chunk.Size = int64(binary.LittleEndian.Uint64(data[0:8])) 174 } 175 176 func gcListPartition(list []*gcItem, left int, right int, pivotIndex int) int { 177 pivotValue := list[pivotIndex].value 178 dd := list[pivotIndex] 179 list[pivotIndex] = list[right] 180 list[right] = dd 181 storeIndex := left 182 for i := left; i < right; i++ { 183 if list[i].value < pivotValue { 184 dd = list[storeIndex] 185 list[storeIndex] = list[i] 186 list[i] = dd 187 storeIndex++ 188 } 189 } 190 dd = list[storeIndex] 191 list[storeIndex] = list[right] 192 list[right] = dd 193 return storeIndex 194 } 195 196 func gcListSelect(list []*gcItem, left int, right int, n int) int { 197 if left == right { 198 return left 199 } 200 pivotIndex := (left + right) / 2 201 pivotIndex = gcListPartition(list, left, right, pivotIndex) 202 if n == pivotIndex { 203 return n 204 } else { 205 if n < pivotIndex { 206 return gcListSelect(list, left, pivotIndex-1, n) 207 } else { 208 return gcListSelect(list, pivotIndex+1, right, n) 209 } 210 } 211 } 212 213 func (s *DbStore) collectGarbage(ratio float32) { 214 it := s.db.NewIterator() 215 it.Seek(s.gcPos) 216 if it.Valid() { 217 s.gcPos = it.Key() 218 } else { 219 s.gcPos = nil 220 } 221 gcnt := 0 222 223 for (gcnt < gcArraySize) && (uint64(gcnt) < s.entryCnt) { 224 225 if (s.gcPos == nil) || (s.gcPos[0] != kpIndex) { 226 it.Seek(s.gcStartPos) 227 if it.Valid() { 228 s.gcPos = it.Key() 229 } else { 230 s.gcPos = nil 231 } 232 } 233 234 if (s.gcPos == nil) || (s.gcPos[0] != kpIndex) { 235 break 236 } 237 238 gci := new(gcItem) 239 gci.idxKey = s.gcPos 240 var index dpaDBIndex 241 decodeIndex(it.Value(), &index) 242 gci.idx = index.Idx 243 // the smaller, the more likely to be gc'd 244 gci.value = getIndexGCValue(&index) 245 s.gcArray[gcnt] = gci 246 gcnt++ 247 it.Next() 248 if it.Valid() { 249 s.gcPos = it.Key() 250 } else { 251 s.gcPos = nil 252 } 253 } 254 it.Release() 255 256 cutidx := gcListSelect(s.gcArray, 0, gcnt-1, int(float32(gcnt)*ratio)) 257 cutval := s.gcArray[cutidx].value 258 259 // fmt.Print(gcnt, " ", s.entryCnt, " ") 260 261 // actual gc 262 for i := 0; i < gcnt; i++ { 263 if s.gcArray[i].value <= cutval { 264 gcCounter.Inc(1) 265 s.delete(s.gcArray[i].idx, s.gcArray[i].idxKey) 266 } 267 } 268 269 // fmt.Println(s.entryCnt) 270 271 s.db.Put(keyGCPos, s.gcPos) 272 } 273 274 // Export writes all chunks from the store to a tar archive, returning the 275 // number of chunks written. 276 func (s *DbStore) Export(out io.Writer) (int64, error) { 277 tw := tar.NewWriter(out) 278 defer tw.Close() 279 280 it := s.db.NewIterator() 281 defer it.Release() 282 var count int64 283 for ok := it.Seek([]byte{kpIndex}); ok; ok = it.Next() { 284 key := it.Key() 285 if (key == nil) || (key[0] != kpIndex) { 286 break 287 } 288 289 var index dpaDBIndex 290 decodeIndex(it.Value(), &index) 291 292 data, err := s.db.Get(getDataKey(index.Idx)) 293 if err != nil { 294 log.Warn(fmt.Sprintf("Chunk %x found but could not be accessed: %v", key[:], err)) 295 continue 296 } 297 298 hdr := &tar.Header{ 299 Name: hex.EncodeToString(key[1:]), 300 Mode: 0644, 301 Size: int64(len(data)), 302 } 303 if err := tw.WriteHeader(hdr); err != nil { 304 return count, err 305 } 306 if _, err := tw.Write(data); err != nil { 307 return count, err 308 } 309 count++ 310 } 311 312 return count, nil 313 } 314 315 // Import reads chunks into the store from a tar archive, returning the number 316 // of chunks read. 317 func (s *DbStore) Import(in io.Reader) (int64, error) { 318 tr := tar.NewReader(in) 319 320 var count int64 321 for { 322 hdr, err := tr.Next() 323 if err == io.EOF { 324 break 325 } else if err != nil { 326 return count, err 327 } 328 329 if len(hdr.Name) != 64 { 330 log.Warn("ignoring non-chunk file", "name", hdr.Name) 331 continue 332 } 333 334 key, err := hex.DecodeString(hdr.Name) 335 if err != nil { 336 log.Warn("ignoring invalid chunk file", "name", hdr.Name, "err", err) 337 continue 338 } 339 340 data, err := ioutil.ReadAll(tr) 341 if err != nil { 342 return count, err 343 } 344 345 s.Put(&Chunk{Key: key, SData: data}) 346 count++ 347 } 348 349 return count, nil 350 } 351 352 func (s *DbStore) Cleanup() { 353 //Iterates over the database and checks that there are no faulty chunks 354 it := s.db.NewIterator() 355 startPosition := []byte{kpIndex} 356 it.Seek(startPosition) 357 var key []byte 358 var errorsFound, total int 359 for it.Valid() { 360 key = it.Key() 361 if (key == nil) || (key[0] != kpIndex) { 362 break 363 } 364 total++ 365 var index dpaDBIndex 366 decodeIndex(it.Value(), &index) 367 368 data, err := s.db.Get(getDataKey(index.Idx)) 369 if err != nil { 370 log.Warn(fmt.Sprintf("Chunk %x found but could not be accessed: %v", key[:], err)) 371 s.delete(index.Idx, getIndexKey(key[1:])) 372 errorsFound++ 373 } else { 374 hasher := s.hashfunc() 375 hasher.Write(data) 376 hash := hasher.Sum(nil) 377 if !bytes.Equal(hash, key[1:]) { 378 log.Warn(fmt.Sprintf("Found invalid chunk. Hash mismatch. hash=%x, key=%x", hash, key[:])) 379 s.delete(index.Idx, getIndexKey(key[1:])) 380 errorsFound++ 381 } 382 } 383 it.Next() 384 } 385 it.Release() 386 log.Warn(fmt.Sprintf("Found %v errors out of %v entries", errorsFound, total)) 387 } 388 389 func (s *DbStore) delete(idx uint64, idxKey []byte) { 390 batch := new(leveldb.Batch) 391 batch.Delete(idxKey) 392 batch.Delete(getDataKey(idx)) 393 dbStoreDeleteCounter.Inc(1) 394 s.entryCnt-- 395 batch.Put(keyEntryCnt, U64ToBytes(s.entryCnt)) 396 s.db.Write(batch) 397 } 398 399 func (s *DbStore) Counter() uint64 { 400 s.lock.Lock() 401 defer s.lock.Unlock() 402 return s.dataIdx 403 } 404 405 func (s *DbStore) Put(chunk *Chunk) { 406 s.lock.Lock() 407 defer s.lock.Unlock() 408 409 ikey := getIndexKey(chunk.Key) 410 var index dpaDBIndex 411 412 if s.tryAccessIdx(ikey, &index) { 413 if chunk.dbStored != nil { 414 close(chunk.dbStored) 415 } 416 log.Trace(fmt.Sprintf("Storing to DB: chunk already exists, only update access")) 417 return // already exists, only update access 418 } 419 420 data := encodeData(chunk) 421 //data := ethutil.Encode([]interface{}{entry}) 422 423 if s.entryCnt >= s.capacity { 424 s.collectGarbage(gcArrayFreeRatio) 425 } 426 427 batch := new(leveldb.Batch) 428 429 batch.Put(getDataKey(s.dataIdx), data) 430 431 index.Idx = s.dataIdx 432 s.updateIndexAccess(&index) 433 434 idata := encodeIndex(&index) 435 batch.Put(ikey, idata) 436 437 batch.Put(keyEntryCnt, U64ToBytes(s.entryCnt)) 438 s.entryCnt++ 439 batch.Put(keyDataIdx, U64ToBytes(s.dataIdx)) 440 s.dataIdx++ 441 batch.Put(keyAccessCnt, U64ToBytes(s.accessCnt)) 442 s.accessCnt++ 443 444 s.db.Write(batch) 445 if chunk.dbStored != nil { 446 close(chunk.dbStored) 447 } 448 log.Trace(fmt.Sprintf("DbStore.Put: %v. db storage counter: %v ", chunk.Key.Log(), s.dataIdx)) 449 } 450 451 // try to find index; if found, update access cnt and return true 452 func (s *DbStore) tryAccessIdx(ikey []byte, index *dpaDBIndex) bool { 453 idata, err := s.db.Get(ikey) 454 if err != nil { 455 return false 456 } 457 decodeIndex(idata, index) 458 459 batch := new(leveldb.Batch) 460 461 batch.Put(keyAccessCnt, U64ToBytes(s.accessCnt)) 462 s.accessCnt++ 463 s.updateIndexAccess(index) 464 idata = encodeIndex(index) 465 batch.Put(ikey, idata) 466 467 s.db.Write(batch) 468 469 return true 470 } 471 472 func (s *DbStore) Get(key Key) (chunk *Chunk, err error) { 473 s.lock.Lock() 474 defer s.lock.Unlock() 475 476 var index dpaDBIndex 477 478 if s.tryAccessIdx(getIndexKey(key), &index) { 479 var data []byte 480 data, err = s.db.Get(getDataKey(index.Idx)) 481 if err != nil { 482 log.Trace(fmt.Sprintf("DBStore: Chunk %v found but could not be accessed: %v", key.Log(), err)) 483 s.delete(index.Idx, getIndexKey(key)) 484 return 485 } 486 487 hasher := s.hashfunc() 488 hasher.Write(data) 489 hash := hasher.Sum(nil) 490 if !bytes.Equal(hash, key) { 491 s.delete(index.Idx, getIndexKey(key)) 492 log.Warn("Invalid Chunk in Database. Please repair with command: 'swarm cleandb'") 493 } 494 495 chunk = &Chunk{ 496 Key: key, 497 } 498 decodeData(data, chunk) 499 } else { 500 err = notFound 501 } 502 503 return 504 505 } 506 507 func (s *DbStore) updateAccessCnt(key Key) { 508 509 s.lock.Lock() 510 defer s.lock.Unlock() 511 512 var index dpaDBIndex 513 s.tryAccessIdx(getIndexKey(key), &index) // result_chn == nil, only update access cnt 514 515 } 516 517 func (s *DbStore) setCapacity(c uint64) { 518 519 s.lock.Lock() 520 defer s.lock.Unlock() 521 522 s.capacity = c 523 524 if s.entryCnt > c { 525 ratio := float32(1.01) - float32(c)/float32(s.entryCnt) 526 if ratio < gcArrayFreeRatio { 527 ratio = gcArrayFreeRatio 528 } 529 if ratio > 1 { 530 ratio = 1 531 } 532 for s.entryCnt > c { 533 s.collectGarbage(ratio) 534 } 535 } 536 } 537 538 func (s *DbStore) Close() { 539 s.db.Close() 540 } 541 542 // describes a section of the DbStore representing the unsynced 543 // domain relevant to a peer 544 // Start - Stop designate a continuous area Keys in an address space 545 // typically the addresses closer to us than to the peer but not closer 546 // another closer peer in between 547 // From - To designates a time interval typically from the last disconnect 548 // till the latest connection (real time traffic is relayed) 549 type DbSyncState struct { 550 Start, Stop Key 551 First, Last uint64 552 } 553 554 // implements the syncer iterator interface 555 // iterates by storage index (~ time of storage = first entry to db) 556 type dbSyncIterator struct { 557 it iterator.Iterator 558 DbSyncState 559 } 560 561 // initialises a sync iterator from a syncToken (passed in with the handshake) 562 func (self *DbStore) NewSyncIterator(state DbSyncState) (si *dbSyncIterator, err error) { 563 if state.First > state.Last { 564 return nil, fmt.Errorf("no entries found") 565 } 566 si = &dbSyncIterator{ 567 it: self.db.NewIterator(), 568 DbSyncState: state, 569 } 570 si.it.Seek(getIndexKey(state.Start)) 571 return si, nil 572 } 573 574 // walk the area from Start to Stop and returns items within time interval 575 // First to Last 576 func (self *dbSyncIterator) Next() (key Key) { 577 for self.it.Valid() { 578 dbkey := self.it.Key() 579 if dbkey[0] != 0 { 580 break 581 } 582 key = Key(make([]byte, len(dbkey)-1)) 583 copy(key[:], dbkey[1:]) 584 if bytes.Compare(key[:], self.Start) <= 0 { 585 self.it.Next() 586 continue 587 } 588 if bytes.Compare(key[:], self.Stop) > 0 { 589 break 590 } 591 var index dpaDBIndex 592 decodeIndex(self.it.Value(), &index) 593 self.it.Next() 594 if (index.Idx >= self.First) && (index.Idx < self.Last) { 595 return 596 } 597 } 598 self.it.Release() 599 return nil 600 }