github.com/gobitfly/go-ethereum@v1.8.12/swarm/storage/pyramid.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package storage 18 19 import ( 20 "encoding/binary" 21 "errors" 22 "io" 23 "io/ioutil" 24 "sync" 25 "time" 26 27 "github.com/ethereum/go-ethereum/swarm/log" 28 ) 29 30 /* 31 The main idea of a pyramid chunker is to process the input data without knowing the entire size apriori. 32 For this to be achieved, the chunker tree is built from the ground up until the data is exhausted. 33 This opens up new aveneus such as easy append and other sort of modifications to the tree thereby avoiding 34 duplication of data chunks. 35 36 37 Below is an example of a two level chunks tree. The leaf chunks are called data chunks and all the above 38 chunks are called tree chunks. The tree chunk above data chunks is level 0 and so on until it reaches 39 the root tree chunk. 40 41 42 43 T10 <- Tree chunk lvl1 44 | 45 __________________________|_____________________________ 46 / | | \ 47 / | \ \ 48 __T00__ ___T01__ ___T02__ ___T03__ <- Tree chunks lvl 0 49 / / \ / / \ / / \ / / \ 50 / / \ / / \ / / \ / / \ 51 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 <- Data Chunks 52 53 54 The split function continuously read the data and creates data chunks and send them to storage. 55 When certain no of data chunks are created (defaultBranches), a signal is sent to create a tree 56 entry. When the level 0 tree entries reaches certain threshold (defaultBranches), another signal 57 is sent to a tree entry one level up.. and so on... until only the data is exhausted AND only one 58 tree entry is present in certain level. The key of tree entry is given out as the rootKey of the file. 59 60 */ 61 62 var ( 63 errLoadingTreeRootChunk = errors.New("LoadTree Error: Could not load root chunk") 64 errLoadingTreeChunk = errors.New("LoadTree Error: Could not load chunk") 65 ) 66 67 const ( 68 ChunkProcessors = 8 69 splitTimeout = time.Minute * 5 70 ) 71 72 const ( 73 DataChunk = 0 74 TreeChunk = 1 75 ) 76 77 type PyramidSplitterParams struct { 78 SplitterParams 79 getter Getter 80 } 81 82 func NewPyramidSplitterParams(addr Address, reader io.Reader, putter Putter, getter Getter, chunkSize int64) *PyramidSplitterParams { 83 hashSize := putter.RefSize() 84 return &PyramidSplitterParams{ 85 SplitterParams: SplitterParams{ 86 ChunkerParams: ChunkerParams{ 87 chunkSize: chunkSize, 88 hashSize: hashSize, 89 }, 90 reader: reader, 91 putter: putter, 92 addr: addr, 93 }, 94 getter: getter, 95 } 96 } 97 98 /* 99 When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Key), the root hash of the entire content will fill this once processing finishes. 100 New chunks to store are store using the putter which the caller provides. 101 */ 102 func PyramidSplit(reader io.Reader, putter Putter, getter Getter) (Address, func(), error) { 103 return NewPyramidSplitter(NewPyramidSplitterParams(nil, reader, putter, getter, DefaultChunkSize)).Split() 104 } 105 106 func PyramidAppend(addr Address, reader io.Reader, putter Putter, getter Getter) (Address, func(), error) { 107 return NewPyramidSplitter(NewPyramidSplitterParams(addr, reader, putter, getter, DefaultChunkSize)).Append() 108 } 109 110 // Entry to create a tree node 111 type TreeEntry struct { 112 level int 113 branchCount int64 114 subtreeSize uint64 115 chunk []byte 116 key []byte 117 index int // used in append to indicate the index of existing tree entry 118 updatePending bool // indicates if the entry is loaded from existing tree 119 } 120 121 func NewTreeEntry(pyramid *PyramidChunker) *TreeEntry { 122 return &TreeEntry{ 123 level: 0, 124 branchCount: 0, 125 subtreeSize: 0, 126 chunk: make([]byte, pyramid.chunkSize+8), 127 key: make([]byte, pyramid.hashSize), 128 index: 0, 129 updatePending: false, 130 } 131 } 132 133 // Used by the hash processor to create a data/tree chunk and send to storage 134 type chunkJob struct { 135 key Address 136 chunk []byte 137 parentWg *sync.WaitGroup 138 } 139 140 type PyramidChunker struct { 141 chunkSize int64 142 hashSize int64 143 branches int64 144 reader io.Reader 145 putter Putter 146 getter Getter 147 key Address 148 workerCount int64 149 workerLock sync.RWMutex 150 jobC chan *chunkJob 151 wg *sync.WaitGroup 152 errC chan error 153 quitC chan bool 154 rootKey []byte 155 chunkLevel [][]*TreeEntry 156 } 157 158 func NewPyramidSplitter(params *PyramidSplitterParams) (pc *PyramidChunker) { 159 pc = &PyramidChunker{} 160 pc.reader = params.reader 161 pc.hashSize = params.hashSize 162 pc.branches = params.chunkSize / pc.hashSize 163 pc.chunkSize = pc.hashSize * pc.branches 164 pc.putter = params.putter 165 pc.getter = params.getter 166 pc.key = params.addr 167 pc.workerCount = 0 168 pc.jobC = make(chan *chunkJob, 2*ChunkProcessors) 169 pc.wg = &sync.WaitGroup{} 170 pc.errC = make(chan error) 171 pc.quitC = make(chan bool) 172 pc.rootKey = make([]byte, pc.hashSize) 173 pc.chunkLevel = make([][]*TreeEntry, pc.branches) 174 return 175 } 176 177 func (pc *PyramidChunker) Join(addr Address, getter Getter, depth int) LazySectionReader { 178 return &LazyChunkReader{ 179 key: addr, 180 depth: depth, 181 chunkSize: pc.chunkSize, 182 branches: pc.branches, 183 hashSize: pc.hashSize, 184 getter: getter, 185 } 186 } 187 188 func (pc *PyramidChunker) incrementWorkerCount() { 189 pc.workerLock.Lock() 190 defer pc.workerLock.Unlock() 191 pc.workerCount += 1 192 } 193 194 func (pc *PyramidChunker) getWorkerCount() int64 { 195 pc.workerLock.Lock() 196 defer pc.workerLock.Unlock() 197 return pc.workerCount 198 } 199 200 func (pc *PyramidChunker) decrementWorkerCount() { 201 pc.workerLock.Lock() 202 defer pc.workerLock.Unlock() 203 pc.workerCount -= 1 204 } 205 206 func (pc *PyramidChunker) Split() (k Address, wait func(), err error) { 207 log.Debug("pyramid.chunker: Split()") 208 209 pc.wg.Add(1) 210 pc.prepareChunks(false) 211 212 // closes internal error channel if all subprocesses in the workgroup finished 213 go func() { 214 215 // waiting for all chunks to finish 216 pc.wg.Wait() 217 218 //We close errC here because this is passed down to 8 parallel routines underneath. 219 // if a error happens in one of them.. that particular routine raises error... 220 // once they all complete successfully, the control comes back and we can safely close this here. 221 close(pc.errC) 222 }() 223 224 defer close(pc.quitC) 225 defer pc.putter.Close() 226 227 select { 228 case err := <-pc.errC: 229 if err != nil { 230 return nil, nil, err 231 } 232 case <-time.NewTimer(splitTimeout).C: 233 } 234 return pc.rootKey, pc.putter.Wait, nil 235 236 } 237 238 func (pc *PyramidChunker) Append() (k Address, wait func(), err error) { 239 log.Debug("pyramid.chunker: Append()") 240 // Load the right most unfinished tree chunks in every level 241 pc.loadTree() 242 243 pc.wg.Add(1) 244 pc.prepareChunks(true) 245 246 // closes internal error channel if all subprocesses in the workgroup finished 247 go func() { 248 249 // waiting for all chunks to finish 250 pc.wg.Wait() 251 252 close(pc.errC) 253 }() 254 255 defer close(pc.quitC) 256 defer pc.putter.Close() 257 258 select { 259 case err := <-pc.errC: 260 if err != nil { 261 return nil, nil, err 262 } 263 case <-time.NewTimer(splitTimeout).C: 264 } 265 266 return pc.rootKey, pc.putter.Wait, nil 267 268 } 269 270 func (pc *PyramidChunker) processor(id int64) { 271 defer pc.decrementWorkerCount() 272 for { 273 select { 274 275 case job, ok := <-pc.jobC: 276 if !ok { 277 return 278 } 279 pc.processChunk(id, job) 280 case <-pc.quitC: 281 return 282 } 283 } 284 } 285 286 func (pc *PyramidChunker) processChunk(id int64, job *chunkJob) { 287 log.Debug("pyramid.chunker: processChunk()", "id", id) 288 289 ref, err := pc.putter.Put(job.chunk) 290 if err != nil { 291 pc.errC <- err 292 } 293 294 // report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk) 295 copy(job.key, ref) 296 297 // send off new chunk to storage 298 job.parentWg.Done() 299 } 300 301 func (pc *PyramidChunker) loadTree() error { 302 log.Debug("pyramid.chunker: loadTree()") 303 // Get the root chunk to get the total size 304 chunkData, err := pc.getter.Get(Reference(pc.key)) 305 if err != nil { 306 return errLoadingTreeRootChunk 307 } 308 chunkSize := chunkData.Size() 309 log.Trace("pyramid.chunker: root chunk", "chunk.Size", chunkSize, "pc.chunkSize", pc.chunkSize) 310 311 //if data size is less than a chunk... add a parent with update as pending 312 if chunkSize <= pc.chunkSize { 313 newEntry := &TreeEntry{ 314 level: 0, 315 branchCount: 1, 316 subtreeSize: uint64(chunkSize), 317 chunk: make([]byte, pc.chunkSize+8), 318 key: make([]byte, pc.hashSize), 319 index: 0, 320 updatePending: true, 321 } 322 copy(newEntry.chunk[8:], pc.key) 323 pc.chunkLevel[0] = append(pc.chunkLevel[0], newEntry) 324 return nil 325 } 326 327 var treeSize int64 328 var depth int 329 treeSize = pc.chunkSize 330 for ; treeSize < chunkSize; treeSize *= pc.branches { 331 depth++ 332 } 333 log.Trace("pyramid.chunker", "depth", depth) 334 335 // Add the root chunk entry 336 branchCount := int64(len(chunkData)-8) / pc.hashSize 337 newEntry := &TreeEntry{ 338 level: depth - 1, 339 branchCount: branchCount, 340 subtreeSize: uint64(chunkSize), 341 chunk: chunkData, 342 key: pc.key, 343 index: 0, 344 updatePending: true, 345 } 346 pc.chunkLevel[depth-1] = append(pc.chunkLevel[depth-1], newEntry) 347 348 // Add the rest of the tree 349 for lvl := depth - 1; lvl >= 1; lvl-- { 350 351 //TODO(jmozah): instead of loading finished branches and then trim in the end, 352 //avoid loading them in the first place 353 for _, ent := range pc.chunkLevel[lvl] { 354 branchCount = int64(len(ent.chunk)-8) / pc.hashSize 355 for i := int64(0); i < branchCount; i++ { 356 key := ent.chunk[8+(i*pc.hashSize) : 8+((i+1)*pc.hashSize)] 357 newChunkData, err := pc.getter.Get(Reference(key)) 358 if err != nil { 359 return errLoadingTreeChunk 360 } 361 newChunkSize := newChunkData.Size() 362 bewBranchCount := int64(len(newChunkData)-8) / pc.hashSize 363 newEntry := &TreeEntry{ 364 level: lvl - 1, 365 branchCount: bewBranchCount, 366 subtreeSize: uint64(newChunkSize), 367 chunk: newChunkData, 368 key: key, 369 index: 0, 370 updatePending: true, 371 } 372 pc.chunkLevel[lvl-1] = append(pc.chunkLevel[lvl-1], newEntry) 373 374 } 375 376 // We need to get only the right most unfinished branch.. so trim all finished branches 377 if int64(len(pc.chunkLevel[lvl-1])) >= pc.branches { 378 pc.chunkLevel[lvl-1] = nil 379 } 380 } 381 } 382 383 return nil 384 } 385 386 func (pc *PyramidChunker) prepareChunks(isAppend bool) { 387 log.Debug("pyramid.chunker: prepareChunks", "isAppend", isAppend) 388 defer pc.wg.Done() 389 390 chunkWG := &sync.WaitGroup{} 391 392 pc.incrementWorkerCount() 393 394 go pc.processor(pc.workerCount) 395 396 parent := NewTreeEntry(pc) 397 var unfinishedChunkData ChunkData 398 var unfinishedChunkSize int64 399 400 if isAppend && len(pc.chunkLevel[0]) != 0 { 401 lastIndex := len(pc.chunkLevel[0]) - 1 402 ent := pc.chunkLevel[0][lastIndex] 403 404 if ent.branchCount < pc.branches { 405 parent = &TreeEntry{ 406 level: 0, 407 branchCount: ent.branchCount, 408 subtreeSize: ent.subtreeSize, 409 chunk: ent.chunk, 410 key: ent.key, 411 index: lastIndex, 412 updatePending: true, 413 } 414 415 lastBranch := parent.branchCount - 1 416 lastKey := parent.chunk[8+lastBranch*pc.hashSize : 8+(lastBranch+1)*pc.hashSize] 417 418 var err error 419 unfinishedChunkData, err = pc.getter.Get(lastKey) 420 if err != nil { 421 pc.errC <- err 422 } 423 unfinishedChunkSize = unfinishedChunkData.Size() 424 if unfinishedChunkSize < pc.chunkSize { 425 parent.subtreeSize = parent.subtreeSize - uint64(unfinishedChunkSize) 426 parent.branchCount = parent.branchCount - 1 427 } else { 428 unfinishedChunkData = nil 429 } 430 } 431 } 432 433 for index := 0; ; index++ { 434 var err error 435 chunkData := make([]byte, pc.chunkSize+8) 436 437 var readBytes int 438 439 if unfinishedChunkData != nil { 440 copy(chunkData, unfinishedChunkData) 441 readBytes += int(unfinishedChunkSize) 442 unfinishedChunkData = nil 443 log.Trace("pyramid.chunker: found unfinished chunk", "readBytes", readBytes) 444 } 445 446 var res []byte 447 res, err = ioutil.ReadAll(io.LimitReader(pc.reader, int64(len(chunkData)-(8+readBytes)))) 448 449 // hack for ioutil.ReadAll: 450 // a successful call to ioutil.ReadAll returns err == nil, not err == EOF, whereas we 451 // want to propagate the io.EOF error 452 if len(res) == 0 && err == nil { 453 err = io.EOF 454 } 455 copy(chunkData[8+readBytes:], res) 456 457 readBytes += len(res) 458 log.Trace("pyramid.chunker: copied all data", "readBytes", readBytes) 459 460 if err != nil { 461 if err == io.EOF || err == io.ErrUnexpectedEOF { 462 463 pc.cleanChunkLevels() 464 465 // Check if we are appending or the chunk is the only one. 466 if parent.branchCount == 1 && (pc.depth() == 0 || isAppend) { 467 // Data is exactly one chunk.. pick the last chunk key as root 468 chunkWG.Wait() 469 lastChunksKey := parent.chunk[8 : 8+pc.hashSize] 470 copy(pc.rootKey, lastChunksKey) 471 break 472 } 473 } else { 474 close(pc.quitC) 475 break 476 } 477 } 478 479 // Data ended in chunk boundary.. just signal to start bulding tree 480 if readBytes == 0 { 481 pc.buildTree(isAppend, parent, chunkWG, true, nil) 482 break 483 } else { 484 pkey := pc.enqueueDataChunk(chunkData, uint64(readBytes), parent, chunkWG) 485 486 // update tree related parent data structures 487 parent.subtreeSize += uint64(readBytes) 488 parent.branchCount++ 489 490 // Data got exhausted... signal to send any parent tree related chunks 491 if int64(readBytes) < pc.chunkSize { 492 493 pc.cleanChunkLevels() 494 495 // only one data chunk .. so dont add any parent chunk 496 if parent.branchCount <= 1 { 497 chunkWG.Wait() 498 499 if isAppend || pc.depth() == 0 { 500 // No need to build the tree if the depth is 0 501 // or we are appending. 502 // Just use the last key. 503 copy(pc.rootKey, pkey) 504 } else { 505 // We need to build the tree and and provide the lonely 506 // chunk key to replace the last tree chunk key. 507 pc.buildTree(isAppend, parent, chunkWG, true, pkey) 508 } 509 break 510 } 511 512 pc.buildTree(isAppend, parent, chunkWG, true, nil) 513 break 514 } 515 516 if parent.branchCount == pc.branches { 517 pc.buildTree(isAppend, parent, chunkWG, false, nil) 518 parent = NewTreeEntry(pc) 519 } 520 521 } 522 523 workers := pc.getWorkerCount() 524 if int64(len(pc.jobC)) > workers && workers < ChunkProcessors { 525 pc.incrementWorkerCount() 526 go pc.processor(pc.workerCount) 527 } 528 529 } 530 531 } 532 533 func (pc *PyramidChunker) buildTree(isAppend bool, ent *TreeEntry, chunkWG *sync.WaitGroup, last bool, lonelyChunkKey []byte) { 534 chunkWG.Wait() 535 pc.enqueueTreeChunk(ent, chunkWG, last) 536 537 compress := false 538 endLvl := pc.branches 539 for lvl := int64(0); lvl < pc.branches; lvl++ { 540 lvlCount := int64(len(pc.chunkLevel[lvl])) 541 if lvlCount >= pc.branches { 542 endLvl = lvl + 1 543 compress = true 544 break 545 } 546 } 547 548 if !compress && !last { 549 return 550 } 551 552 // Wait for all the keys to be processed before compressing the tree 553 chunkWG.Wait() 554 555 for lvl := int64(ent.level); lvl < endLvl; lvl++ { 556 557 lvlCount := int64(len(pc.chunkLevel[lvl])) 558 if lvlCount == 1 && last { 559 copy(pc.rootKey, pc.chunkLevel[lvl][0].key) 560 return 561 } 562 563 for startCount := int64(0); startCount < lvlCount; startCount += pc.branches { 564 565 endCount := startCount + pc.branches 566 if endCount > lvlCount { 567 endCount = lvlCount 568 } 569 570 var nextLvlCount int64 571 var tempEntry *TreeEntry 572 if len(pc.chunkLevel[lvl+1]) > 0 { 573 nextLvlCount = int64(len(pc.chunkLevel[lvl+1]) - 1) 574 tempEntry = pc.chunkLevel[lvl+1][nextLvlCount] 575 } 576 if isAppend && tempEntry != nil && tempEntry.updatePending { 577 updateEntry := &TreeEntry{ 578 level: int(lvl + 1), 579 branchCount: 0, 580 subtreeSize: 0, 581 chunk: make([]byte, pc.chunkSize+8), 582 key: make([]byte, pc.hashSize), 583 index: int(nextLvlCount), 584 updatePending: true, 585 } 586 for index := int64(0); index < lvlCount; index++ { 587 updateEntry.branchCount++ 588 updateEntry.subtreeSize += pc.chunkLevel[lvl][index].subtreeSize 589 copy(updateEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], pc.chunkLevel[lvl][index].key[:pc.hashSize]) 590 } 591 592 pc.enqueueTreeChunk(updateEntry, chunkWG, last) 593 594 } else { 595 596 noOfBranches := endCount - startCount 597 newEntry := &TreeEntry{ 598 level: int(lvl + 1), 599 branchCount: noOfBranches, 600 subtreeSize: 0, 601 chunk: make([]byte, (noOfBranches*pc.hashSize)+8), 602 key: make([]byte, pc.hashSize), 603 index: int(nextLvlCount), 604 updatePending: false, 605 } 606 607 index := int64(0) 608 for i := startCount; i < endCount; i++ { 609 entry := pc.chunkLevel[lvl][i] 610 newEntry.subtreeSize += entry.subtreeSize 611 copy(newEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], entry.key[:pc.hashSize]) 612 index++ 613 } 614 // Lonely chunk key is the key of the last chunk that is only one on the last branch. 615 // In this case, ignore the its tree chunk key and replace it with the lonely chunk key. 616 if lonelyChunkKey != nil { 617 // Overwrite the last tree chunk key with the lonely data chunk key. 618 copy(newEntry.chunk[int64(len(newEntry.chunk))-pc.hashSize:], lonelyChunkKey[:pc.hashSize]) 619 } 620 621 pc.enqueueTreeChunk(newEntry, chunkWG, last) 622 623 } 624 625 } 626 627 if !isAppend { 628 chunkWG.Wait() 629 if compress { 630 pc.chunkLevel[lvl] = nil 631 } 632 } 633 } 634 635 } 636 637 func (pc *PyramidChunker) enqueueTreeChunk(ent *TreeEntry, chunkWG *sync.WaitGroup, last bool) { 638 if ent != nil && ent.branchCount > 0 { 639 640 // wait for data chunks to get over before processing the tree chunk 641 if last { 642 chunkWG.Wait() 643 } 644 645 binary.LittleEndian.PutUint64(ent.chunk[:8], ent.subtreeSize) 646 ent.key = make([]byte, pc.hashSize) 647 chunkWG.Add(1) 648 select { 649 case pc.jobC <- &chunkJob{ent.key, ent.chunk[:ent.branchCount*pc.hashSize+8], chunkWG}: 650 case <-pc.quitC: 651 } 652 653 // Update or append based on weather it is a new entry or being reused 654 if ent.updatePending { 655 chunkWG.Wait() 656 pc.chunkLevel[ent.level][ent.index] = ent 657 } else { 658 pc.chunkLevel[ent.level] = append(pc.chunkLevel[ent.level], ent) 659 } 660 661 } 662 } 663 664 func (pc *PyramidChunker) enqueueDataChunk(chunkData []byte, size uint64, parent *TreeEntry, chunkWG *sync.WaitGroup) Address { 665 binary.LittleEndian.PutUint64(chunkData[:8], size) 666 pkey := parent.chunk[8+parent.branchCount*pc.hashSize : 8+(parent.branchCount+1)*pc.hashSize] 667 668 chunkWG.Add(1) 669 select { 670 case pc.jobC <- &chunkJob{pkey, chunkData[:size+8], chunkWG}: 671 case <-pc.quitC: 672 } 673 674 return pkey 675 676 } 677 678 // depth returns the number of chunk levels. 679 // It is used to detect if there is only one data chunk 680 // left for the last branch. 681 func (pc *PyramidChunker) depth() (d int) { 682 for _, l := range pc.chunkLevel { 683 if l == nil { 684 return 685 } 686 d++ 687 } 688 return 689 } 690 691 // cleanChunkLevels removes gaps (nil levels) between chunk levels 692 // that are not nil. 693 func (pc *PyramidChunker) cleanChunkLevels() { 694 for i, l := range pc.chunkLevel { 695 if l == nil { 696 pc.chunkLevel = append(pc.chunkLevel[:i], append(pc.chunkLevel[i+1:], nil)...) 697 } 698 } 699 }