github.com/n1ghtfa1l/go-vnt@v0.6.4-alpha.6/swarm/storage/dbstore.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // disk storage layer for the package bzz 18 // DbStore implements the ChunkStore interface and is used by the DPA as 19 // persistent storage of chunks 20 // it implements purging based on access count allowing for external control of 21 // max capacity 22 23 package storage 24 25 import ( 26 "archive/tar" 27 "bytes" 28 "encoding/binary" 29 "encoding/hex" 30 "fmt" 31 "io" 32 "io/ioutil" 33 "sync" 34 35 "github.com/syndtr/goleveldb/leveldb" 36 "github.com/syndtr/goleveldb/leveldb/iterator" 37 "github.com/vntchain/go-vnt/log" 38 "github.com/vntchain/go-vnt/metrics" 39 "github.com/vntchain/go-vnt/rlp" 40 ) 41 42 //metrics variables 43 var ( 44 gcCounter = metrics.NewRegisteredCounter("storage.db.dbstore.gc.count", nil) 45 dbStoreDeleteCounter = metrics.NewRegisteredCounter("storage.db.dbstore.rm.count", nil) 46 ) 47 48 const ( 49 defaultDbCapacity = 5000000 50 defaultRadius = 0 // not yet used 51 52 gcArraySize = 10000 53 gcArrayFreeRatio = 0.1 54 55 // key prefixes for leveldb storage 56 kpIndex = 0 57 ) 58 59 var ( 60 keyAccessCnt = []byte{2} 61 keyEntryCnt = []byte{3} 62 keyDataIdx = []byte{4} 63 keyGCPos = []byte{5} 64 ) 65 66 type gcItem struct { 67 idx uint64 68 value uint64 69 idxKey []byte 70 } 71 72 type DbStore struct { 73 db *LDBDatabase 74 75 // this should be stored in db, accessed transactionally 76 entryCnt, accessCnt, dataIdx, capacity uint64 77 78 gcPos, gcStartPos []byte 79 gcArray []*gcItem 80 81 hashfunc SwarmHasher 82 83 lock sync.Mutex 84 } 85 86 func NewDbStore(path string, hash SwarmHasher, capacity uint64, radius int) (s *DbStore, err error) { 87 s = new(DbStore) 88 89 s.hashfunc = hash 90 91 s.db, err = NewLDBDatabase(path) 92 if err != nil { 93 return 94 } 95 96 s.setCapacity(capacity) 97 98 s.gcStartPos = make([]byte, 1) 99 s.gcStartPos[0] = kpIndex 100 s.gcArray = make([]*gcItem, gcArraySize) 101 102 data, _ := s.db.Get(keyEntryCnt) 103 s.entryCnt = BytesToU64(data) 104 data, _ = s.db.Get(keyAccessCnt) 105 s.accessCnt = BytesToU64(data) 106 data, _ = s.db.Get(keyDataIdx) 107 s.dataIdx = BytesToU64(data) 108 s.gcPos, _ = s.db.Get(keyGCPos) 109 if s.gcPos == nil { 110 s.gcPos = s.gcStartPos 111 } 112 return 113 } 114 115 type dpaDBIndex struct { 116 Idx uint64 117 Access uint64 118 } 119 120 func BytesToU64(data []byte) uint64 { 121 if len(data) < 8 { 122 return 0 123 } 124 return binary.LittleEndian.Uint64(data) 125 } 126 127 func U64ToBytes(val uint64) []byte { 128 data := make([]byte, 8) 129 binary.LittleEndian.PutUint64(data, val) 130 return data 131 } 132 133 func getIndexGCValue(index *dpaDBIndex) uint64 { 134 return index.Access 135 } 136 137 func (s *DbStore) updateIndexAccess(index *dpaDBIndex) { 138 index.Access = s.accessCnt 139 } 140 141 func getIndexKey(hash Key) []byte { 142 HashSize := len(hash) 143 key := make([]byte, HashSize+1) 144 key[0] = 0 145 copy(key[1:], hash[:]) 146 return key 147 } 148 149 func getDataKey(idx uint64) []byte { 150 key := make([]byte, 9) 151 key[0] = 1 152 binary.BigEndian.PutUint64(key[1:9], idx) 153 154 return key 155 } 156 157 func encodeIndex(index *dpaDBIndex) []byte { 158 data, _ := rlp.EncodeToBytes(index) 159 return data 160 } 161 162 func encodeData(chunk *Chunk) []byte { 163 return chunk.SData 164 } 165 166 func decodeIndex(data []byte, index *dpaDBIndex) { 167 dec := rlp.NewStream(bytes.NewReader(data), 0) 168 dec.Decode(index) 169 } 170 171 func decodeData(data []byte, chunk *Chunk) { 172 chunk.SData = data 173 chunk.Size = int64(binary.LittleEndian.Uint64(data[0:8])) 174 } 175 176 func gcListPartition(list []*gcItem, left int, right int, pivotIndex int) int { 177 pivotValue := list[pivotIndex].value 178 dd := list[pivotIndex] 179 list[pivotIndex] = list[right] 180 list[right] = dd 181 storeIndex := left 182 for i := left; i < right; i++ { 183 if list[i].value < pivotValue { 184 dd = list[storeIndex] 185 list[storeIndex] = list[i] 186 list[i] = dd 187 storeIndex++ 188 } 189 } 190 dd = list[storeIndex] 191 list[storeIndex] = list[right] 192 list[right] = dd 193 return storeIndex 194 } 195 196 func gcListSelect(list []*gcItem, left int, right int, n int) int { 197 if left == right { 198 return left 199 } 200 pivotIndex := (left + right) / 2 201 pivotIndex = gcListPartition(list, left, right, pivotIndex) 202 if n == pivotIndex { 203 return n 204 } else { 205 if n < pivotIndex { 206 return gcListSelect(list, left, pivotIndex-1, n) 207 } else { 208 return gcListSelect(list, pivotIndex+1, right, n) 209 } 210 } 211 } 212 213 func (s *DbStore) collectGarbage(ratio float32) { 214 it := s.db.NewIterator() 215 it.Seek(s.gcPos) 216 if it.Valid() { 217 s.gcPos = it.Key() 218 } else { 219 s.gcPos = nil 220 } 221 gcnt := 0 222 223 for (gcnt < gcArraySize) && (uint64(gcnt) < s.entryCnt) { 224 225 if (s.gcPos == nil) || (s.gcPos[0] != kpIndex) { 226 it.Seek(s.gcStartPos) 227 if it.Valid() { 228 s.gcPos = it.Key() 229 } else { 230 s.gcPos = nil 231 } 232 } 233 234 if (s.gcPos == nil) || (s.gcPos[0] != kpIndex) { 235 break 236 } 237 238 gci := new(gcItem) 239 gci.idxKey = s.gcPos 240 var index dpaDBIndex 241 decodeIndex(it.Value(), &index) 242 gci.idx = index.Idx 243 // the smaller, the more likely to be gc'd 244 gci.value = getIndexGCValue(&index) 245 s.gcArray[gcnt] = gci 246 gcnt++ 247 it.Next() 248 if it.Valid() { 249 s.gcPos = it.Key() 250 } else { 251 s.gcPos = nil 252 } 253 } 254 it.Release() 255 256 cutidx := gcListSelect(s.gcArray, 0, gcnt-1, int(float32(gcnt)*ratio)) 257 cutval := s.gcArray[cutidx].value 258 259 // fmt.Print(gcnt, " ", s.entryCnt, " ") 260 261 // actual gc 262 for i := 0; i < gcnt; i++ { 263 if s.gcArray[i].value <= cutval { 264 gcCounter.Inc(1) 265 s.delete(s.gcArray[i].idx, s.gcArray[i].idxKey) 266 } 267 } 268 269 // fmt.Println(s.entryCnt) 270 271 s.db.Put(keyGCPos, s.gcPos) 272 } 273 274 // Export writes all chunks from the store to a tar archive, returning the 275 // number of chunks written. 276 func (s *DbStore) Export(out io.Writer) (int64, error) { 277 tw := tar.NewWriter(out) 278 defer tw.Close() 279 280 it := s.db.NewIterator() 281 defer it.Release() 282 var count int64 283 for ok := it.Seek([]byte{kpIndex}); ok; ok = it.Next() { 284 key := it.Key() 285 if (key == nil) || (key[0] != kpIndex) { 286 break 287 } 288 289 var index dpaDBIndex 290 decodeIndex(it.Value(), &index) 291 292 data, err := s.db.Get(getDataKey(index.Idx)) 293 if err != nil { 294 log.Warn(fmt.Sprintf("Chunk %x found but could not be accessed: %v", key[:], err)) 295 continue 296 } 297 298 hdr := &tar.Header{ 299 Name: hex.EncodeToString(key[1:]), 300 Mode: 0644, 301 Size: int64(len(data)), 302 } 303 if err := tw.WriteHeader(hdr); err != nil { 304 return count, err 305 } 306 if _, err := tw.Write(data); err != nil { 307 return count, err 308 } 309 count++ 310 } 311 312 return count, nil 313 } 314 315 // Import reads chunks into the store from a tar archive, returning the number 316 // of chunks read. 317 func (s *DbStore) Import(in io.Reader) (int64, error) { 318 tr := tar.NewReader(in) 319 320 var count int64 321 for { 322 hdr, err := tr.Next() 323 if err == io.EOF { 324 break 325 } else if err != nil { 326 return count, err 327 } 328 329 if len(hdr.Name) != 64 { 330 log.Warn("ignoring non-chunk file", "name", hdr.Name) 331 continue 332 } 333 334 key, err := hex.DecodeString(hdr.Name) 335 if err != nil { 336 log.Warn("ignoring invalid chunk file", "name", hdr.Name, "err", err) 337 continue 338 } 339 340 data, err := ioutil.ReadAll(tr) 341 if err != nil { 342 return count, err 343 } 344 345 s.Put(&Chunk{Key: key, SData: data}) 346 count++ 347 } 348 349 return count, nil 350 } 351 352 func (s *DbStore) Cleanup() { 353 //Iterates over the database and checks that there are no faulty chunks 354 it := s.db.NewIterator() 355 startPosition := []byte{kpIndex} 356 it.Seek(startPosition) 357 var key []byte 358 var errorsFound, total int 359 for it.Valid() { 360 key = it.Key() 361 if (key == nil) || (key[0] != kpIndex) { 362 break 363 } 364 total++ 365 var index dpaDBIndex 366 decodeIndex(it.Value(), &index) 367 368 data, err := s.db.Get(getDataKey(index.Idx)) 369 if err != nil { 370 log.Warn(fmt.Sprintf("Chunk %x found but could not be accessed: %v", key[:], err)) 371 s.delete(index.Idx, getIndexKey(key[1:])) 372 errorsFound++ 373 } else { 374 hasher := s.hashfunc() 375 hasher.Write(data) 376 hash := hasher.Sum(nil) 377 if !bytes.Equal(hash, key[1:]) { 378 log.Warn(fmt.Sprintf("Found invalid chunk. Hash mismatch. hash=%x, key=%x", hash, key[:])) 379 s.delete(index.Idx, getIndexKey(key[1:])) 380 errorsFound++ 381 } 382 } 383 it.Next() 384 } 385 it.Release() 386 log.Warn(fmt.Sprintf("Found %v errors out of %v entries", errorsFound, total)) 387 } 388 389 func (s *DbStore) delete(idx uint64, idxKey []byte) { 390 batch := new(leveldb.Batch) 391 batch.Delete(idxKey) 392 batch.Delete(getDataKey(idx)) 393 dbStoreDeleteCounter.Inc(1) 394 s.entryCnt-- 395 batch.Put(keyEntryCnt, U64ToBytes(s.entryCnt)) 396 s.db.Write(batch) 397 } 398 399 func (s *DbStore) Counter() uint64 { 400 s.lock.Lock() 401 defer s.lock.Unlock() 402 return s.dataIdx 403 } 404 405 func (s *DbStore) Put(chunk *Chunk) { 406 s.lock.Lock() 407 defer s.lock.Unlock() 408 409 ikey := getIndexKey(chunk.Key) 410 var index dpaDBIndex 411 412 if s.tryAccessIdx(ikey, &index) { 413 if chunk.dbStored != nil { 414 close(chunk.dbStored) 415 } 416 log.Trace(fmt.Sprintf("Storing to DB: chunk already exists, only update access")) 417 return // already exists, only update access 418 } 419 420 data := encodeData(chunk) 421 if s.entryCnt >= s.capacity { 422 s.collectGarbage(gcArrayFreeRatio) 423 } 424 425 batch := new(leveldb.Batch) 426 427 batch.Put(getDataKey(s.dataIdx), data) 428 429 index.Idx = s.dataIdx 430 s.updateIndexAccess(&index) 431 432 idata := encodeIndex(&index) 433 batch.Put(ikey, idata) 434 435 batch.Put(keyEntryCnt, U64ToBytes(s.entryCnt)) 436 s.entryCnt++ 437 batch.Put(keyDataIdx, U64ToBytes(s.dataIdx)) 438 s.dataIdx++ 439 batch.Put(keyAccessCnt, U64ToBytes(s.accessCnt)) 440 s.accessCnt++ 441 442 s.db.Write(batch) 443 if chunk.dbStored != nil { 444 close(chunk.dbStored) 445 } 446 log.Trace(fmt.Sprintf("DbStore.Put: %v. db storage counter: %v ", chunk.Key.Log(), s.dataIdx)) 447 } 448 449 // try to find index; if found, update access cnt and return true 450 func (s *DbStore) tryAccessIdx(ikey []byte, index *dpaDBIndex) bool { 451 idata, err := s.db.Get(ikey) 452 if err != nil { 453 return false 454 } 455 decodeIndex(idata, index) 456 457 batch := new(leveldb.Batch) 458 459 batch.Put(keyAccessCnt, U64ToBytes(s.accessCnt)) 460 s.accessCnt++ 461 s.updateIndexAccess(index) 462 idata = encodeIndex(index) 463 batch.Put(ikey, idata) 464 465 s.db.Write(batch) 466 467 return true 468 } 469 470 func (s *DbStore) Get(key Key) (chunk *Chunk, err error) { 471 s.lock.Lock() 472 defer s.lock.Unlock() 473 474 var index dpaDBIndex 475 476 if s.tryAccessIdx(getIndexKey(key), &index) { 477 var data []byte 478 data, err = s.db.Get(getDataKey(index.Idx)) 479 if err != nil { 480 log.Trace(fmt.Sprintf("DBStore: Chunk %v found but could not be accessed: %v", key.Log(), err)) 481 s.delete(index.Idx, getIndexKey(key)) 482 return 483 } 484 485 hasher := s.hashfunc() 486 hasher.Write(data) 487 hash := hasher.Sum(nil) 488 if !bytes.Equal(hash, key) { 489 s.delete(index.Idx, getIndexKey(key)) 490 log.Warn("Invalid Chunk in Database. Please repair with command: 'swarm cleandb'") 491 } 492 493 chunk = &Chunk{ 494 Key: key, 495 } 496 decodeData(data, chunk) 497 } else { 498 err = notFound 499 } 500 501 return 502 503 } 504 505 func (s *DbStore) updateAccessCnt(key Key) { 506 507 s.lock.Lock() 508 defer s.lock.Unlock() 509 510 var index dpaDBIndex 511 s.tryAccessIdx(getIndexKey(key), &index) // result_chn == nil, only update access cnt 512 513 } 514 515 func (s *DbStore) setCapacity(c uint64) { 516 517 s.lock.Lock() 518 defer s.lock.Unlock() 519 520 s.capacity = c 521 522 if s.entryCnt > c { 523 ratio := float32(1.01) - float32(c)/float32(s.entryCnt) 524 if ratio < gcArrayFreeRatio { 525 ratio = gcArrayFreeRatio 526 } 527 if ratio > 1 { 528 ratio = 1 529 } 530 for s.entryCnt > c { 531 s.collectGarbage(ratio) 532 } 533 } 534 } 535 536 func (s *DbStore) Close() { 537 s.db.Close() 538 } 539 540 // describes a section of the DbStore representing the unsynced 541 // domain relevant to a peer 542 // Start - Stop designate a continuous area Keys in an address space 543 // typically the addresses closer to us than to the peer but not closer 544 // another closer peer in between 545 // From - To designates a time interval typically from the last disconnect 546 // till the latest connection (real time traffic is relayed) 547 type DbSyncState struct { 548 Start, Stop Key 549 First, Last uint64 550 } 551 552 // implements the syncer iterator interface 553 // iterates by storage index (~ time of storage = first entry to db) 554 type dbSyncIterator struct { 555 it iterator.Iterator 556 DbSyncState 557 } 558 559 // initialises a sync iterator from a syncToken (passed in with the handshake) 560 func (self *DbStore) NewSyncIterator(state DbSyncState) (si *dbSyncIterator, err error) { 561 if state.First > state.Last { 562 return nil, fmt.Errorf("no entries found") 563 } 564 si = &dbSyncIterator{ 565 it: self.db.NewIterator(), 566 DbSyncState: state, 567 } 568 si.it.Seek(getIndexKey(state.Start)) 569 return si, nil 570 } 571 572 // walk the area from Start to Stop and returns items within time interval 573 // First to Last 574 func (self *dbSyncIterator) Next() (key Key) { 575 for self.it.Valid() { 576 dbkey := self.it.Key() 577 if dbkey[0] != 0 { 578 break 579 } 580 key = Key(make([]byte, len(dbkey)-1)) 581 copy(key[:], dbkey[1:]) 582 if bytes.Compare(key[:], self.Start) <= 0 { 583 self.it.Next() 584 continue 585 } 586 if bytes.Compare(key[:], self.Stop) > 0 { 587 break 588 } 589 var index dpaDBIndex 590 decodeIndex(self.it.Value(), &index) 591 self.it.Next() 592 if (index.Idx >= self.First) && (index.Idx < self.Last) { 593 return 594 } 595 } 596 self.it.Release() 597 return nil 598 }