github.com/m3shine/gochain@v2.2.26+incompatible/swarm/storage/dbstore.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // disk storage layer for the package bzz 18 // DbStore implements the ChunkStore interface and is used by the DPA as 19 // persistent storage of chunks 20 // it implements purging based on access count allowing for external control of 21 // max capacity 22 23 package storage 24 25 import ( 26 "archive/tar" 27 "bytes" 28 "encoding/binary" 29 "encoding/hex" 30 "fmt" 31 "io" 32 "io/ioutil" 33 "sync" 34 35 "github.com/gochain-io/gochain/log" 36 "github.com/gochain-io/gochain/rlp" 37 "github.com/syndtr/goleveldb/leveldb" 38 "github.com/syndtr/goleveldb/leveldb/iterator" 39 ) 40 41 const ( 42 defaultDbCapacity = 5000000 43 defaultRadius = 0 // not yet used 44 45 gcArraySize = 10000 46 gcArrayFreeRatio = 0.1 47 48 // key prefixes for leveldb storage 49 kpIndex = 0 50 kpData = 1 51 ) 52 53 var ( 54 keyAccessCnt = []byte{2} 55 keyEntryCnt = []byte{3} 56 keyDataIdx = []byte{4} 57 keyGCPos = []byte{5} 58 ) 59 60 type gcItem struct { 61 idx uint64 62 value uint64 63 idxKey []byte 64 } 65 66 type DbStore struct { 67 db *LDBDatabase 68 69 // this should be stored in db, accessed transactionally 70 entryCnt, accessCnt, dataIdx, capacity uint64 71 72 gcPos, gcStartPos []byte 73 gcArray []*gcItem 74 75 hashfunc SwarmHasher 76 77 lock sync.Mutex 78 } 79 80 func NewDbStore(path string, hash SwarmHasher, capacity uint64, radius int) (s *DbStore, err error) { 81 s = new(DbStore) 82 83 s.hashfunc = hash 84 85 s.db, err = NewLDBDatabase(path) 86 if err != nil { 87 return 88 } 89 90 s.setCapacity(capacity) 91 92 s.gcStartPos = make([]byte, 1) 93 s.gcStartPos[0] = kpIndex 94 s.gcArray = make([]*gcItem, gcArraySize) 95 96 data, _ := s.db.Get(keyEntryCnt) 97 s.entryCnt = BytesToU64(data) 98 data, _ = s.db.Get(keyAccessCnt) 99 s.accessCnt = BytesToU64(data) 100 data, _ = s.db.Get(keyDataIdx) 101 s.dataIdx = BytesToU64(data) 102 s.gcPos, _ = s.db.Get(keyGCPos) 103 if s.gcPos == nil { 104 s.gcPos = s.gcStartPos 105 } 106 return 107 } 108 109 type dpaDBIndex struct { 110 Idx uint64 111 Access uint64 112 } 113 114 func BytesToU64(data []byte) uint64 { 115 if len(data) < 8 { 116 return 0 117 } 118 return binary.LittleEndian.Uint64(data) 119 } 120 121 func U64ToBytes(val uint64) []byte { 122 data := make([]byte, 8) 123 binary.LittleEndian.PutUint64(data, val) 124 return data 125 } 126 127 func getIndexGCValue(index *dpaDBIndex) uint64 { 128 return index.Access 129 } 130 131 func (s *DbStore) updateIndexAccess(index *dpaDBIndex) { 132 index.Access = s.accessCnt 133 } 134 135 func getIndexKey(hash Key) []byte { 136 HashSize := len(hash) 137 key := make([]byte, HashSize+1) 138 key[0] = 0 139 copy(key[1:], hash[:]) 140 return key 141 } 142 143 func getDataKey(idx uint64) []byte { 144 key := make([]byte, 9) 145 key[0] = 1 146 binary.BigEndian.PutUint64(key[1:9], idx) 147 148 return key 149 } 150 151 func encodeIndex(index *dpaDBIndex) []byte { 152 data, _ := rlp.EncodeToBytes(index) 153 return data 154 } 155 156 func encodeData(chunk *Chunk) []byte { 157 return chunk.SData 158 } 159 160 func decodeIndex(data []byte, index *dpaDBIndex) { 161 _ = rlp.Decode(bytes.NewReader(data), index) 162 } 163 164 func decodeData(data []byte, chunk *Chunk) { 165 chunk.SData = data 166 chunk.Size = int64(binary.LittleEndian.Uint64(data[0:8])) 167 } 168 169 func gcListPartition(list []*gcItem, left int, right int, pivotIndex int) int { 170 pivotValue := list[pivotIndex].value 171 dd := list[pivotIndex] 172 list[pivotIndex] = list[right] 173 list[right] = dd 174 storeIndex := left 175 for i := left; i < right; i++ { 176 if list[i].value < pivotValue { 177 dd = list[storeIndex] 178 list[storeIndex] = list[i] 179 list[i] = dd 180 storeIndex++ 181 } 182 } 183 dd = list[storeIndex] 184 list[storeIndex] = list[right] 185 list[right] = dd 186 return storeIndex 187 } 188 189 func gcListSelect(list []*gcItem, left int, right int, n int) int { 190 if left == right { 191 return left 192 } 193 pivotIndex := (left + right) / 2 194 pivotIndex = gcListPartition(list, left, right, pivotIndex) 195 if n == pivotIndex { 196 return n 197 } else { 198 if n < pivotIndex { 199 return gcListSelect(list, left, pivotIndex-1, n) 200 } else { 201 return gcListSelect(list, pivotIndex+1, right, n) 202 } 203 } 204 } 205 206 func (s *DbStore) collectGarbage(ratio float32) { 207 it := s.db.NewIterator() 208 it.Seek(s.gcPos) 209 if it.Valid() { 210 s.gcPos = it.Key() 211 } else { 212 s.gcPos = nil 213 } 214 gcnt := 0 215 216 for (gcnt < gcArraySize) && (uint64(gcnt) < s.entryCnt) { 217 218 if (s.gcPos == nil) || (s.gcPos[0] != kpIndex) { 219 it.Seek(s.gcStartPos) 220 if it.Valid() { 221 s.gcPos = it.Key() 222 } else { 223 s.gcPos = nil 224 } 225 } 226 227 if (s.gcPos == nil) || (s.gcPos[0] != kpIndex) { 228 break 229 } 230 231 gci := new(gcItem) 232 gci.idxKey = s.gcPos 233 var index dpaDBIndex 234 decodeIndex(it.Value(), &index) 235 gci.idx = index.Idx 236 // the smaller, the more likely to be gc'd 237 gci.value = getIndexGCValue(&index) 238 s.gcArray[gcnt] = gci 239 gcnt++ 240 it.Next() 241 if it.Valid() { 242 s.gcPos = it.Key() 243 } else { 244 s.gcPos = nil 245 } 246 } 247 it.Release() 248 249 cutidx := gcListSelect(s.gcArray, 0, gcnt-1, int(float32(gcnt)*ratio)) 250 cutval := s.gcArray[cutidx].value 251 252 // fmt.Print(gcnt, " ", s.entryCnt, " ") 253 254 // actual gc 255 for i := 0; i < gcnt; i++ { 256 if s.gcArray[i].value <= cutval { 257 s.delete(s.gcArray[i].idx, s.gcArray[i].idxKey) 258 } 259 } 260 261 // fmt.Println(s.entryCnt) 262 263 s.db.Put(keyGCPos, s.gcPos) 264 } 265 266 // Export writes all chunks from the store to a tar archive, returning the 267 // number of chunks written. 268 func (s *DbStore) Export(out io.Writer) (int64, error) { 269 tw := tar.NewWriter(out) 270 defer tw.Close() 271 272 it := s.db.NewIterator() 273 defer it.Release() 274 var count int64 275 for ok := it.Seek([]byte{kpIndex}); ok; ok = it.Next() { 276 key := it.Key() 277 if (key == nil) || (key[0] != kpIndex) { 278 break 279 } 280 281 var index dpaDBIndex 282 decodeIndex(it.Value(), &index) 283 284 data, err := s.db.Get(getDataKey(index.Idx)) 285 if err != nil { 286 log.Warn(fmt.Sprintf("Chunk %x found but could not be accessed: %v", key[:], err)) 287 continue 288 } 289 290 hdr := &tar.Header{ 291 Name: hex.EncodeToString(key[1:]), 292 Mode: 0644, 293 Size: int64(len(data)), 294 } 295 if err := tw.WriteHeader(hdr); err != nil { 296 return count, err 297 } 298 if _, err := tw.Write(data); err != nil { 299 return count, err 300 } 301 count++ 302 } 303 304 return count, nil 305 } 306 307 // Import reads chunks into the store from a tar archive, returning the number 308 // of chunks read. 309 func (s *DbStore) Import(in io.Reader) (int64, error) { 310 tr := tar.NewReader(in) 311 312 var count int64 313 for { 314 hdr, err := tr.Next() 315 if err == io.EOF { 316 break 317 } else if err != nil { 318 return count, err 319 } 320 321 if len(hdr.Name) != 64 { 322 log.Warn("ignoring non-chunk file", "name", hdr.Name) 323 continue 324 } 325 326 key, err := hex.DecodeString(hdr.Name) 327 if err != nil { 328 log.Warn("ignoring invalid chunk file", "name", hdr.Name, "err", err) 329 continue 330 } 331 332 data, err := ioutil.ReadAll(tr) 333 if err != nil { 334 return count, err 335 } 336 337 s.Put(&Chunk{Key: key, SData: data}) 338 count++ 339 } 340 341 return count, nil 342 } 343 344 func (s *DbStore) Cleanup() { 345 //Iterates over the database and checks that there are no faulty chunks 346 it := s.db.NewIterator() 347 startPosition := []byte{kpIndex} 348 it.Seek(startPosition) 349 var key []byte 350 var errorsFound, total int 351 for it.Valid() { 352 key = it.Key() 353 if (key == nil) || (key[0] != kpIndex) { 354 break 355 } 356 total++ 357 var index dpaDBIndex 358 decodeIndex(it.Value(), &index) 359 360 data, err := s.db.Get(getDataKey(index.Idx)) 361 if err != nil { 362 log.Warn(fmt.Sprintf("Chunk %x found but could not be accessed: %v", key[:], err)) 363 s.delete(index.Idx, getIndexKey(key[1:])) 364 errorsFound++ 365 } else { 366 hasher := s.hashfunc() 367 hasher.Write(data) 368 hash := hasher.Sum(nil) 369 if !bytes.Equal(hash, key[1:]) { 370 log.Warn(fmt.Sprintf("Found invalid chunk. Hash mismatch. hash=%x, key=%x", hash, key[:])) 371 s.delete(index.Idx, getIndexKey(key[1:])) 372 errorsFound++ 373 } 374 } 375 it.Next() 376 } 377 it.Release() 378 log.Warn(fmt.Sprintf("Found %v errors out of %v entries", errorsFound, total)) 379 } 380 381 func (s *DbStore) delete(idx uint64, idxKey []byte) { 382 batch := new(leveldb.Batch) 383 batch.Delete(idxKey) 384 batch.Delete(getDataKey(idx)) 385 s.entryCnt-- 386 batch.Put(keyEntryCnt, U64ToBytes(s.entryCnt)) 387 s.db.Write(batch) 388 } 389 390 func (s *DbStore) Counter() uint64 { 391 s.lock.Lock() 392 defer s.lock.Unlock() 393 return s.dataIdx 394 } 395 396 func (s *DbStore) Put(chunk *Chunk) { 397 s.lock.Lock() 398 defer s.lock.Unlock() 399 400 ikey := getIndexKey(chunk.Key) 401 var index dpaDBIndex 402 403 if s.tryAccessIdx(ikey, &index) { 404 if chunk.dbStored != nil { 405 close(chunk.dbStored) 406 } 407 log.Trace(fmt.Sprintf("Storing to DB: chunk already exists, only update access")) 408 return // already exists, only update access 409 } 410 411 data := encodeData(chunk) 412 //data := ethutil.Encode([]interface{}{entry}) 413 414 if s.entryCnt >= s.capacity { 415 s.collectGarbage(gcArrayFreeRatio) 416 } 417 418 batch := new(leveldb.Batch) 419 420 batch.Put(getDataKey(s.dataIdx), data) 421 422 index.Idx = s.dataIdx 423 s.updateIndexAccess(&index) 424 425 idata := encodeIndex(&index) 426 batch.Put(ikey, idata) 427 428 batch.Put(keyEntryCnt, U64ToBytes(s.entryCnt)) 429 s.entryCnt++ 430 batch.Put(keyDataIdx, U64ToBytes(s.dataIdx)) 431 s.dataIdx++ 432 batch.Put(keyAccessCnt, U64ToBytes(s.accessCnt)) 433 s.accessCnt++ 434 435 s.db.Write(batch) 436 if chunk.dbStored != nil { 437 close(chunk.dbStored) 438 } 439 log.Trace(fmt.Sprintf("DbStore.Put: %v. db storage counter: %v ", chunk.Key.Log(), s.dataIdx)) 440 } 441 442 // try to find index; if found, update access cnt and return true 443 func (s *DbStore) tryAccessIdx(ikey []byte, index *dpaDBIndex) bool { 444 idata, err := s.db.Get(ikey) 445 if err != nil { 446 return false 447 } 448 decodeIndex(idata, index) 449 450 batch := new(leveldb.Batch) 451 452 batch.Put(keyAccessCnt, U64ToBytes(s.accessCnt)) 453 s.accessCnt++ 454 s.updateIndexAccess(index) 455 idata = encodeIndex(index) 456 batch.Put(ikey, idata) 457 458 s.db.Write(batch) 459 460 return true 461 } 462 463 func (s *DbStore) Get(key Key) (chunk *Chunk, err error) { 464 s.lock.Lock() 465 defer s.lock.Unlock() 466 467 var index dpaDBIndex 468 469 if s.tryAccessIdx(getIndexKey(key), &index) { 470 var data []byte 471 data, err = s.db.Get(getDataKey(index.Idx)) 472 if err != nil { 473 log.Trace(fmt.Sprintf("DBStore: Chunk %v found but could not be accessed: %v", key.Log(), err)) 474 s.delete(index.Idx, getIndexKey(key)) 475 return 476 } 477 478 hasher := s.hashfunc() 479 hasher.Write(data) 480 hash := hasher.Sum(nil) 481 if !bytes.Equal(hash, key) { 482 s.delete(index.Idx, getIndexKey(key)) 483 log.Warn("Invalid Chunk in Database. Please repair with command: 'swarm cleandb'") 484 } 485 486 chunk = &Chunk{ 487 Key: key, 488 } 489 decodeData(data, chunk) 490 } else { 491 err = notFound 492 } 493 494 return 495 496 } 497 498 func (s *DbStore) updateAccessCnt(key Key) { 499 500 s.lock.Lock() 501 defer s.lock.Unlock() 502 503 var index dpaDBIndex 504 s.tryAccessIdx(getIndexKey(key), &index) // result_chn == nil, only update access cnt 505 506 } 507 508 func (s *DbStore) setCapacity(c uint64) { 509 510 s.lock.Lock() 511 defer s.lock.Unlock() 512 513 s.capacity = c 514 515 if s.entryCnt > c { 516 ratio := float32(1.01) - float32(c)/float32(s.entryCnt) 517 if ratio < gcArrayFreeRatio { 518 ratio = gcArrayFreeRatio 519 } 520 if ratio > 1 { 521 ratio = 1 522 } 523 for s.entryCnt > c { 524 s.collectGarbage(ratio) 525 } 526 } 527 } 528 529 func (s *DbStore) Close() { 530 s.db.Close() 531 } 532 533 // describes a section of the DbStore representing the unsynced 534 // domain relevant to a peer 535 // Start - Stop designate a continuous area Keys in an address space 536 // typically the addresses closer to us than to the peer but not closer 537 // another closer peer in between 538 // From - To designates a time interval typically from the last disconnect 539 // till the latest connection (real time traffic is relayed) 540 type DbSyncState struct { 541 Start, Stop Key 542 First, Last uint64 543 } 544 545 // implements the syncer iterator interface 546 // iterates by storage index (~ time of storage = first entry to db) 547 type dbSyncIterator struct { 548 it iterator.Iterator 549 DbSyncState 550 } 551 552 // initialises a sync iterator from a syncToken (passed in with the handshake) 553 func (self *DbStore) NewSyncIterator(state DbSyncState) (si *dbSyncIterator, err error) { 554 if state.First > state.Last { 555 return nil, fmt.Errorf("no entries found") 556 } 557 si = &dbSyncIterator{ 558 it: self.db.NewIterator(), 559 DbSyncState: state, 560 } 561 si.it.Seek(getIndexKey(state.Start)) 562 return si, nil 563 } 564 565 // walk the area from Start to Stop and returns items within time interval 566 // First to Last 567 func (self *dbSyncIterator) Next() (key Key) { 568 for self.it.Valid() { 569 dbkey := self.it.Key() 570 if dbkey[0] != 0 { 571 break 572 } 573 key = Key(make([]byte, len(dbkey)-1)) 574 copy(key[:], dbkey[1:]) 575 if bytes.Compare(key[:], self.Start) <= 0 { 576 self.it.Next() 577 continue 578 } 579 if bytes.Compare(key[:], self.Stop) > 0 { 580 break 581 } 582 var index dpaDBIndex 583 decodeIndex(self.it.Value(), &index) 584 self.it.Next() 585 if (index.Idx >= self.First) && (index.Idx < self.Last) { 586 return 587 } 588 } 589 self.it.Release() 590 return nil 591 }