github.com/janotchain/janota@v0.0.0-20220824112012-93ea4c5dee78/swarm/storage/dbstore.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // disk storage layer for the package bzz 18 // DbStore implements the ChunkStore interface and is used by the DPA as 19 // persistent storage of chunks 20 // it implements purging based on access count allowing for external control of 21 // max capacity 22 23 package storage 24 25 import ( 26 "archive/tar" 27 "bytes" 28 "encoding/binary" 29 "encoding/hex" 30 "fmt" 31 "io" 32 "io/ioutil" 33 "sync" 34 35 "github.com/ethereum/go-ethereum/log" 36 "github.com/ethereum/go-ethereum/rlp" 37 "github.com/syndtr/goleveldb/leveldb" 38 "github.com/syndtr/goleveldb/leveldb/iterator" 39 ) 40 41 const ( 42 defaultDbCapacity = 5000000 43 defaultRadius = 0 // not yet used 44 45 gcArraySize = 10000 46 gcArrayFreeRatio = 0.1 47 48 // key prefixes for leveldb storage 49 kpIndex = 0 50 kpData = 1 51 ) 52 53 var ( 54 keyAccessCnt = []byte{2} 55 keyEntryCnt = []byte{3} 56 keyDataIdx = []byte{4} 57 keyGCPos = []byte{5} 58 ) 59 60 type gcItem struct { 61 idx uint64 62 value uint64 63 idxKey []byte 64 } 65 66 type DbStore struct { 67 db *LDBDatabase 68 69 // this should be stored in db, accessed transactionally 70 entryCnt, accessCnt, dataIdx, capacity uint64 71 72 gcPos, gcStartPos []byte 73 gcArray []*gcItem 74 75 hashfunc SwarmHasher 76 77 lock sync.Mutex 78 } 79 80 func NewDbStore(path string, hash SwarmHasher, capacity uint64, radius int) (s *DbStore, err error) { 81 s = new(DbStore) 82 83 s.hashfunc = hash 84 85 s.db, err = NewLDBDatabase(path) 86 if err != nil { 87 return 88 } 89 90 s.setCapacity(capacity) 91 92 s.gcStartPos = make([]byte, 1) 93 s.gcStartPos[0] = kpIndex 94 s.gcArray = make([]*gcItem, gcArraySize) 95 96 data, _ := s.db.Get(keyEntryCnt) 97 s.entryCnt = BytesToU64(data) 98 data, _ = s.db.Get(keyAccessCnt) 99 s.accessCnt = BytesToU64(data) 100 data, _ = s.db.Get(keyDataIdx) 101 s.dataIdx = BytesToU64(data) 102 s.gcPos, _ = s.db.Get(keyGCPos) 103 if s.gcPos == nil { 104 s.gcPos = s.gcStartPos 105 } 106 return 107 } 108 109 type dpaDBIndex struct { 110 Idx uint64 111 Access uint64 112 } 113 114 func BytesToU64(data []byte) uint64 { 115 if len(data) < 8 { 116 return 0 117 } 118 return binary.LittleEndian.Uint64(data) 119 } 120 121 func U64ToBytes(val uint64) []byte { 122 data := make([]byte, 8) 123 binary.LittleEndian.PutUint64(data, val) 124 return data 125 } 126 127 func getIndexGCValue(index *dpaDBIndex) uint64 { 128 return index.Access 129 } 130 131 func (s *DbStore) updateIndexAccess(index *dpaDBIndex) { 132 index.Access = s.accessCnt 133 } 134 135 func getIndexKey(hash Key) []byte { 136 HashSize := len(hash) 137 key := make([]byte, HashSize+1) 138 key[0] = 0 139 copy(key[1:], hash[:]) 140 return key 141 } 142 143 func getDataKey(idx uint64) []byte { 144 key := make([]byte, 9) 145 key[0] = 1 146 binary.BigEndian.PutUint64(key[1:9], idx) 147 148 return key 149 } 150 151 func encodeIndex(index *dpaDBIndex) []byte { 152 data, _ := rlp.EncodeToBytes(index) 153 return data 154 } 155 156 func encodeData(chunk *Chunk) []byte { 157 return chunk.SData 158 } 159 160 func decodeIndex(data []byte, index *dpaDBIndex) { 161 dec := rlp.NewStream(bytes.NewReader(data), 0) 162 dec.Decode(index) 163 } 164 165 func decodeData(data []byte, chunk *Chunk) { 166 chunk.SData = data 167 chunk.Size = int64(binary.LittleEndian.Uint64(data[0:8])) 168 } 169 170 func gcListPartition(list []*gcItem, left int, right int, pivotIndex int) int { 171 pivotValue := list[pivotIndex].value 172 dd := list[pivotIndex] 173 list[pivotIndex] = list[right] 174 list[right] = dd 175 storeIndex := left 176 for i := left; i < right; i++ { 177 if list[i].value < pivotValue { 178 dd = list[storeIndex] 179 list[storeIndex] = list[i] 180 list[i] = dd 181 storeIndex++ 182 } 183 } 184 dd = list[storeIndex] 185 list[storeIndex] = list[right] 186 list[right] = dd 187 return storeIndex 188 } 189 190 func gcListSelect(list []*gcItem, left int, right int, n int) int { 191 if left == right { 192 return left 193 } 194 pivotIndex := (left + right) / 2 195 pivotIndex = gcListPartition(list, left, right, pivotIndex) 196 if n == pivotIndex { 197 return n 198 } else { 199 if n < pivotIndex { 200 return gcListSelect(list, left, pivotIndex-1, n) 201 } else { 202 return gcListSelect(list, pivotIndex+1, right, n) 203 } 204 } 205 } 206 207 func (s *DbStore) collectGarbage(ratio float32) { 208 it := s.db.NewIterator() 209 it.Seek(s.gcPos) 210 if it.Valid() { 211 s.gcPos = it.Key() 212 } else { 213 s.gcPos = nil 214 } 215 gcnt := 0 216 217 for (gcnt < gcArraySize) && (uint64(gcnt) < s.entryCnt) { 218 219 if (s.gcPos == nil) || (s.gcPos[0] != kpIndex) { 220 it.Seek(s.gcStartPos) 221 if it.Valid() { 222 s.gcPos = it.Key() 223 } else { 224 s.gcPos = nil 225 } 226 } 227 228 if (s.gcPos == nil) || (s.gcPos[0] != kpIndex) { 229 break 230 } 231 232 gci := new(gcItem) 233 gci.idxKey = s.gcPos 234 var index dpaDBIndex 235 decodeIndex(it.Value(), &index) 236 gci.idx = index.Idx 237 // the smaller, the more likely to be gc'd 238 gci.value = getIndexGCValue(&index) 239 s.gcArray[gcnt] = gci 240 gcnt++ 241 it.Next() 242 if it.Valid() { 243 s.gcPos = it.Key() 244 } else { 245 s.gcPos = nil 246 } 247 } 248 it.Release() 249 250 cutidx := gcListSelect(s.gcArray, 0, gcnt-1, int(float32(gcnt)*ratio)) 251 cutval := s.gcArray[cutidx].value 252 253 // fmt.Print(gcnt, " ", s.entryCnt, " ") 254 255 // actual gc 256 for i := 0; i < gcnt; i++ { 257 if s.gcArray[i].value <= cutval { 258 s.delete(s.gcArray[i].idx, s.gcArray[i].idxKey) 259 } 260 } 261 262 // fmt.Println(s.entryCnt) 263 264 s.db.Put(keyGCPos, s.gcPos) 265 } 266 267 // Export writes all chunks from the store to a tar archive, returning the 268 // number of chunks written. 269 func (s *DbStore) Export(out io.Writer) (int64, error) { 270 tw := tar.NewWriter(out) 271 defer tw.Close() 272 273 it := s.db.NewIterator() 274 defer it.Release() 275 var count int64 276 for ok := it.Seek([]byte{kpIndex}); ok; ok = it.Next() { 277 key := it.Key() 278 if (key == nil) || (key[0] != kpIndex) { 279 break 280 } 281 282 var index dpaDBIndex 283 decodeIndex(it.Value(), &index) 284 285 data, err := s.db.Get(getDataKey(index.Idx)) 286 if err != nil { 287 log.Warn(fmt.Sprintf("Chunk %x found but could not be accessed: %v", key[:], err)) 288 continue 289 } 290 291 hdr := &tar.Header{ 292 Name: hex.EncodeToString(key[1:]), 293 Mode: 0644, 294 Size: int64(len(data)), 295 } 296 if err := tw.WriteHeader(hdr); err != nil { 297 return count, err 298 } 299 if _, err := tw.Write(data); err != nil { 300 return count, err 301 } 302 count++ 303 } 304 305 return count, nil 306 } 307 308 // Import reads chunks into the store from a tar archive, returning the number 309 // of chunks read. 310 func (s *DbStore) Import(in io.Reader) (int64, error) { 311 tr := tar.NewReader(in) 312 313 var count int64 314 for { 315 hdr, err := tr.Next() 316 if err == io.EOF { 317 break 318 } else if err != nil { 319 return count, err 320 } 321 322 if len(hdr.Name) != 64 { 323 log.Warn("ignoring non-chunk file", "name", hdr.Name) 324 continue 325 } 326 327 key, err := hex.DecodeString(hdr.Name) 328 if err != nil { 329 log.Warn("ignoring invalid chunk file", "name", hdr.Name, "err", err) 330 continue 331 } 332 333 data, err := ioutil.ReadAll(tr) 334 if err != nil { 335 return count, err 336 } 337 338 s.Put(&Chunk{Key: key, SData: data}) 339 count++ 340 } 341 342 return count, nil 343 } 344 345 func (s *DbStore) Cleanup() { 346 //Iterates over the database and checks that there are no faulty chunks 347 it := s.db.NewIterator() 348 startPosition := []byte{kpIndex} 349 it.Seek(startPosition) 350 var key []byte 351 var errorsFound, total int 352 for it.Valid() { 353 key = it.Key() 354 if (key == nil) || (key[0] != kpIndex) { 355 break 356 } 357 total++ 358 var index dpaDBIndex 359 decodeIndex(it.Value(), &index) 360 361 data, err := s.db.Get(getDataKey(index.Idx)) 362 if err != nil { 363 log.Warn(fmt.Sprintf("Chunk %x found but could not be accessed: %v", key[:], err)) 364 s.delete(index.Idx, getIndexKey(key[1:])) 365 errorsFound++ 366 } else { 367 hasher := s.hashfunc() 368 hasher.Write(data) 369 hash := hasher.Sum(nil) 370 if !bytes.Equal(hash, key[1:]) { 371 log.Warn(fmt.Sprintf("Found invalid chunk. Hash mismatch. hash=%x, key=%x", hash, key[:])) 372 s.delete(index.Idx, getIndexKey(key[1:])) 373 errorsFound++ 374 } 375 } 376 it.Next() 377 } 378 it.Release() 379 log.Warn(fmt.Sprintf("Found %v errors out of %v entries", errorsFound, total)) 380 } 381 382 func (s *DbStore) delete(idx uint64, idxKey []byte) { 383 batch := new(leveldb.Batch) 384 batch.Delete(idxKey) 385 batch.Delete(getDataKey(idx)) 386 s.entryCnt-- 387 batch.Put(keyEntryCnt, U64ToBytes(s.entryCnt)) 388 s.db.Write(batch) 389 } 390 391 func (s *DbStore) Counter() uint64 { 392 s.lock.Lock() 393 defer s.lock.Unlock() 394 return s.dataIdx 395 } 396 397 func (s *DbStore) Put(chunk *Chunk) { 398 s.lock.Lock() 399 defer s.lock.Unlock() 400 401 ikey := getIndexKey(chunk.Key) 402 var index dpaDBIndex 403 404 if s.tryAccessIdx(ikey, &index) { 405 if chunk.dbStored != nil { 406 close(chunk.dbStored) 407 } 408 log.Trace(fmt.Sprintf("Storing to DB: chunk already exists, only update access")) 409 return // already exists, only update access 410 } 411 412 data := encodeData(chunk) 413 //data := ethutil.Encode([]interface{}{entry}) 414 415 if s.entryCnt >= s.capacity { 416 s.collectGarbage(gcArrayFreeRatio) 417 } 418 419 batch := new(leveldb.Batch) 420 421 batch.Put(getDataKey(s.dataIdx), data) 422 423 index.Idx = s.dataIdx 424 s.updateIndexAccess(&index) 425 426 idata := encodeIndex(&index) 427 batch.Put(ikey, idata) 428 429 batch.Put(keyEntryCnt, U64ToBytes(s.entryCnt)) 430 s.entryCnt++ 431 batch.Put(keyDataIdx, U64ToBytes(s.dataIdx)) 432 s.dataIdx++ 433 batch.Put(keyAccessCnt, U64ToBytes(s.accessCnt)) 434 s.accessCnt++ 435 436 s.db.Write(batch) 437 if chunk.dbStored != nil { 438 close(chunk.dbStored) 439 } 440 log.Trace(fmt.Sprintf("DbStore.Put: %v. db storage counter: %v ", chunk.Key.Log(), s.dataIdx)) 441 } 442 443 // try to find index; if found, update access cnt and return true 444 func (s *DbStore) tryAccessIdx(ikey []byte, index *dpaDBIndex) bool { 445 idata, err := s.db.Get(ikey) 446 if err != nil { 447 return false 448 } 449 decodeIndex(idata, index) 450 451 batch := new(leveldb.Batch) 452 453 batch.Put(keyAccessCnt, U64ToBytes(s.accessCnt)) 454 s.accessCnt++ 455 s.updateIndexAccess(index) 456 idata = encodeIndex(index) 457 batch.Put(ikey, idata) 458 459 s.db.Write(batch) 460 461 return true 462 } 463 464 func (s *DbStore) Get(key Key) (chunk *Chunk, err error) { 465 s.lock.Lock() 466 defer s.lock.Unlock() 467 468 var index dpaDBIndex 469 470 if s.tryAccessIdx(getIndexKey(key), &index) { 471 var data []byte 472 data, err = s.db.Get(getDataKey(index.Idx)) 473 if err != nil { 474 log.Trace(fmt.Sprintf("DBStore: Chunk %v found but could not be accessed: %v", key.Log(), err)) 475 s.delete(index.Idx, getIndexKey(key)) 476 return 477 } 478 479 hasher := s.hashfunc() 480 hasher.Write(data) 481 hash := hasher.Sum(nil) 482 if !bytes.Equal(hash, key) { 483 s.delete(index.Idx, getIndexKey(key)) 484 log.Warn("Invalid Chunk in Database. Please repair with command: 'swarm cleandb'") 485 } 486 487 chunk = &Chunk{ 488 Key: key, 489 } 490 decodeData(data, chunk) 491 } else { 492 err = notFound 493 } 494 495 return 496 497 } 498 499 func (s *DbStore) updateAccessCnt(key Key) { 500 501 s.lock.Lock() 502 defer s.lock.Unlock() 503 504 var index dpaDBIndex 505 s.tryAccessIdx(getIndexKey(key), &index) // result_chn == nil, only update access cnt 506 507 } 508 509 func (s *DbStore) setCapacity(c uint64) { 510 511 s.lock.Lock() 512 defer s.lock.Unlock() 513 514 s.capacity = c 515 516 if s.entryCnt > c { 517 ratio := float32(1.01) - float32(c)/float32(s.entryCnt) 518 if ratio < gcArrayFreeRatio { 519 ratio = gcArrayFreeRatio 520 } 521 if ratio > 1 { 522 ratio = 1 523 } 524 for s.entryCnt > c { 525 s.collectGarbage(ratio) 526 } 527 } 528 } 529 530 func (s *DbStore) Close() { 531 s.db.Close() 532 } 533 534 // describes a section of the DbStore representing the unsynced 535 // domain relevant to a peer 536 // Start - Stop designate a continuous area Keys in an address space 537 // typically the addresses closer to us than to the peer but not closer 538 // another closer peer in between 539 // From - To designates a time interval typically from the last disconnect 540 // till the latest connection (real time traffic is relayed) 541 type DbSyncState struct { 542 Start, Stop Key 543 First, Last uint64 544 } 545 546 // implements the syncer iterator interface 547 // iterates by storage index (~ time of storage = first entry to db) 548 type dbSyncIterator struct { 549 it iterator.Iterator 550 DbSyncState 551 } 552 553 // initialises a sync iterator from a syncToken (passed in with the handshake) 554 func (self *DbStore) NewSyncIterator(state DbSyncState) (si *dbSyncIterator, err error) { 555 if state.First > state.Last { 556 return nil, fmt.Errorf("no entries found") 557 } 558 si = &dbSyncIterator{ 559 it: self.db.NewIterator(), 560 DbSyncState: state, 561 } 562 si.it.Seek(getIndexKey(state.Start)) 563 return si, nil 564 } 565 566 // walk the area from Start to Stop and returns items within time interval 567 // First to Last 568 func (self *dbSyncIterator) Next() (key Key) { 569 for self.it.Valid() { 570 dbkey := self.it.Key() 571 if dbkey[0] != 0 { 572 break 573 } 574 key = Key(make([]byte, len(dbkey)-1)) 575 copy(key[:], dbkey[1:]) 576 if bytes.Compare(key[:], self.Start) <= 0 { 577 self.it.Next() 578 continue 579 } 580 if bytes.Compare(key[:], self.Stop) > 0 { 581 break 582 } 583 var index dpaDBIndex 584 decodeIndex(self.it.Value(), &index) 585 self.it.Next() 586 if (index.Idx >= self.First) && (index.Idx < self.Last) { 587 return 588 } 589 } 590 self.it.Release() 591 return nil 592 }