github.com/divan/go-ethereum@v1.8.14-0.20180820134928-1de9ada4016d/swarm/storage/pyramid.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package storage 18 19 import ( 20 "context" 21 "encoding/binary" 22 "errors" 23 "io" 24 "io/ioutil" 25 "sync" 26 "time" 27 28 "github.com/ethereum/go-ethereum/swarm/chunk" 29 "github.com/ethereum/go-ethereum/swarm/log" 30 ) 31 32 /* 33 The main idea of a pyramid chunker is to process the input data without knowing the entire size apriori. 34 For this to be achieved, the chunker tree is built from the ground up until the data is exhausted. 35 This opens up new aveneus such as easy append and other sort of modifications to the tree thereby avoiding 36 duplication of data chunks. 37 38 39 Below is an example of a two level chunks tree. The leaf chunks are called data chunks and all the above 40 chunks are called tree chunks. The tree chunk above data chunks is level 0 and so on until it reaches 41 the root tree chunk. 42 43 44 45 T10 <- Tree chunk lvl1 46 | 47 __________________________|_____________________________ 48 / | | \ 49 / | \ \ 50 __T00__ ___T01__ ___T02__ ___T03__ <- Tree chunks lvl 0 51 / / \ / / \ / / \ / / \ 52 / / \ / / \ / / \ / / \ 53 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 <- Data Chunks 54 55 56 The split function continuously read the data and creates data chunks and send them to storage. 57 When certain no of data chunks are created (defaultBranches), a signal is sent to create a tree 58 entry. When the level 0 tree entries reaches certain threshold (defaultBranches), another signal 59 is sent to a tree entry one level up.. and so on... until only the data is exhausted AND only one 60 tree entry is present in certain level. The key of tree entry is given out as the rootKey of the file. 61 62 */ 63 64 var ( 65 errLoadingTreeRootChunk = errors.New("LoadTree Error: Could not load root chunk") 66 errLoadingTreeChunk = errors.New("LoadTree Error: Could not load chunk") 67 ) 68 69 const ( 70 ChunkProcessors = 8 71 splitTimeout = time.Minute * 5 72 ) 73 74 const ( 75 DataChunk = 0 76 TreeChunk = 1 77 ) 78 79 type PyramidSplitterParams struct { 80 SplitterParams 81 getter Getter 82 } 83 84 func NewPyramidSplitterParams(addr Address, reader io.Reader, putter Putter, getter Getter, chunkSize int64) *PyramidSplitterParams { 85 hashSize := putter.RefSize() 86 return &PyramidSplitterParams{ 87 SplitterParams: SplitterParams{ 88 ChunkerParams: ChunkerParams{ 89 chunkSize: chunkSize, 90 hashSize: hashSize, 91 }, 92 reader: reader, 93 putter: putter, 94 addr: addr, 95 }, 96 getter: getter, 97 } 98 } 99 100 /* 101 When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Key), the root hash of the entire content will fill this once processing finishes. 102 New chunks to store are store using the putter which the caller provides. 103 */ 104 func PyramidSplit(ctx context.Context, reader io.Reader, putter Putter, getter Getter) (Address, func(context.Context) error, error) { 105 return NewPyramidSplitter(NewPyramidSplitterParams(nil, reader, putter, getter, chunk.DefaultSize)).Split(ctx) 106 } 107 108 func PyramidAppend(ctx context.Context, addr Address, reader io.Reader, putter Putter, getter Getter) (Address, func(context.Context) error, error) { 109 return NewPyramidSplitter(NewPyramidSplitterParams(addr, reader, putter, getter, chunk.DefaultSize)).Append(ctx) 110 } 111 112 // Entry to create a tree node 113 type TreeEntry struct { 114 level int 115 branchCount int64 116 subtreeSize uint64 117 chunk []byte 118 key []byte 119 index int // used in append to indicate the index of existing tree entry 120 updatePending bool // indicates if the entry is loaded from existing tree 121 } 122 123 func NewTreeEntry(pyramid *PyramidChunker) *TreeEntry { 124 return &TreeEntry{ 125 level: 0, 126 branchCount: 0, 127 subtreeSize: 0, 128 chunk: make([]byte, pyramid.chunkSize+8), 129 key: make([]byte, pyramid.hashSize), 130 index: 0, 131 updatePending: false, 132 } 133 } 134 135 // Used by the hash processor to create a data/tree chunk and send to storage 136 type chunkJob struct { 137 key Address 138 chunk []byte 139 parentWg *sync.WaitGroup 140 } 141 142 type PyramidChunker struct { 143 chunkSize int64 144 hashSize int64 145 branches int64 146 reader io.Reader 147 putter Putter 148 getter Getter 149 key Address 150 workerCount int64 151 workerLock sync.RWMutex 152 jobC chan *chunkJob 153 wg *sync.WaitGroup 154 errC chan error 155 quitC chan bool 156 rootKey []byte 157 chunkLevel [][]*TreeEntry 158 } 159 160 func NewPyramidSplitter(params *PyramidSplitterParams) (pc *PyramidChunker) { 161 pc = &PyramidChunker{} 162 pc.reader = params.reader 163 pc.hashSize = params.hashSize 164 pc.branches = params.chunkSize / pc.hashSize 165 pc.chunkSize = pc.hashSize * pc.branches 166 pc.putter = params.putter 167 pc.getter = params.getter 168 pc.key = params.addr 169 pc.workerCount = 0 170 pc.jobC = make(chan *chunkJob, 2*ChunkProcessors) 171 pc.wg = &sync.WaitGroup{} 172 pc.errC = make(chan error) 173 pc.quitC = make(chan bool) 174 pc.rootKey = make([]byte, pc.hashSize) 175 pc.chunkLevel = make([][]*TreeEntry, pc.branches) 176 return 177 } 178 179 func (pc *PyramidChunker) Join(addr Address, getter Getter, depth int) LazySectionReader { 180 return &LazyChunkReader{ 181 key: addr, 182 depth: depth, 183 chunkSize: pc.chunkSize, 184 branches: pc.branches, 185 hashSize: pc.hashSize, 186 getter: getter, 187 } 188 } 189 190 func (pc *PyramidChunker) incrementWorkerCount() { 191 pc.workerLock.Lock() 192 defer pc.workerLock.Unlock() 193 pc.workerCount += 1 194 } 195 196 func (pc *PyramidChunker) getWorkerCount() int64 { 197 pc.workerLock.Lock() 198 defer pc.workerLock.Unlock() 199 return pc.workerCount 200 } 201 202 func (pc *PyramidChunker) decrementWorkerCount() { 203 pc.workerLock.Lock() 204 defer pc.workerLock.Unlock() 205 pc.workerCount -= 1 206 } 207 208 func (pc *PyramidChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 209 log.Debug("pyramid.chunker: Split()") 210 211 pc.wg.Add(1) 212 pc.prepareChunks(false) 213 214 // closes internal error channel if all subprocesses in the workgroup finished 215 go func() { 216 217 // waiting for all chunks to finish 218 pc.wg.Wait() 219 220 //We close errC here because this is passed down to 8 parallel routines underneath. 221 // if a error happens in one of them.. that particular routine raises error... 222 // once they all complete successfully, the control comes back and we can safely close this here. 223 close(pc.errC) 224 }() 225 226 defer close(pc.quitC) 227 defer pc.putter.Close() 228 229 select { 230 case err := <-pc.errC: 231 if err != nil { 232 return nil, nil, err 233 } 234 case <-time.NewTimer(splitTimeout).C: 235 } 236 return pc.rootKey, pc.putter.Wait, nil 237 238 } 239 240 func (pc *PyramidChunker) Append(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 241 log.Debug("pyramid.chunker: Append()") 242 // Load the right most unfinished tree chunks in every level 243 pc.loadTree() 244 245 pc.wg.Add(1) 246 pc.prepareChunks(true) 247 248 // closes internal error channel if all subprocesses in the workgroup finished 249 go func() { 250 251 // waiting for all chunks to finish 252 pc.wg.Wait() 253 254 close(pc.errC) 255 }() 256 257 defer close(pc.quitC) 258 defer pc.putter.Close() 259 260 select { 261 case err := <-pc.errC: 262 if err != nil { 263 return nil, nil, err 264 } 265 case <-time.NewTimer(splitTimeout).C: 266 } 267 268 return pc.rootKey, pc.putter.Wait, nil 269 270 } 271 272 func (pc *PyramidChunker) processor(id int64) { 273 defer pc.decrementWorkerCount() 274 for { 275 select { 276 277 case job, ok := <-pc.jobC: 278 if !ok { 279 return 280 } 281 pc.processChunk(id, job) 282 case <-pc.quitC: 283 return 284 } 285 } 286 } 287 288 func (pc *PyramidChunker) processChunk(id int64, job *chunkJob) { 289 log.Debug("pyramid.chunker: processChunk()", "id", id) 290 291 ref, err := pc.putter.Put(context.TODO(), job.chunk) 292 if err != nil { 293 pc.errC <- err 294 } 295 296 // report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk) 297 copy(job.key, ref) 298 299 // send off new chunk to storage 300 job.parentWg.Done() 301 } 302 303 func (pc *PyramidChunker) loadTree() error { 304 log.Debug("pyramid.chunker: loadTree()") 305 // Get the root chunk to get the total size 306 chunkData, err := pc.getter.Get(context.TODO(), Reference(pc.key)) 307 if err != nil { 308 return errLoadingTreeRootChunk 309 } 310 chunkSize := chunkData.Size() 311 log.Trace("pyramid.chunker: root chunk", "chunk.Size", chunkSize, "pc.chunkSize", pc.chunkSize) 312 313 //if data size is less than a chunk... add a parent with update as pending 314 if chunkSize <= pc.chunkSize { 315 newEntry := &TreeEntry{ 316 level: 0, 317 branchCount: 1, 318 subtreeSize: uint64(chunkSize), 319 chunk: make([]byte, pc.chunkSize+8), 320 key: make([]byte, pc.hashSize), 321 index: 0, 322 updatePending: true, 323 } 324 copy(newEntry.chunk[8:], pc.key) 325 pc.chunkLevel[0] = append(pc.chunkLevel[0], newEntry) 326 return nil 327 } 328 329 var treeSize int64 330 var depth int 331 treeSize = pc.chunkSize 332 for ; treeSize < chunkSize; treeSize *= pc.branches { 333 depth++ 334 } 335 log.Trace("pyramid.chunker", "depth", depth) 336 337 // Add the root chunk entry 338 branchCount := int64(len(chunkData)-8) / pc.hashSize 339 newEntry := &TreeEntry{ 340 level: depth - 1, 341 branchCount: branchCount, 342 subtreeSize: uint64(chunkSize), 343 chunk: chunkData, 344 key: pc.key, 345 index: 0, 346 updatePending: true, 347 } 348 pc.chunkLevel[depth-1] = append(pc.chunkLevel[depth-1], newEntry) 349 350 // Add the rest of the tree 351 for lvl := depth - 1; lvl >= 1; lvl-- { 352 353 //TODO(jmozah): instead of loading finished branches and then trim in the end, 354 //avoid loading them in the first place 355 for _, ent := range pc.chunkLevel[lvl] { 356 branchCount = int64(len(ent.chunk)-8) / pc.hashSize 357 for i := int64(0); i < branchCount; i++ { 358 key := ent.chunk[8+(i*pc.hashSize) : 8+((i+1)*pc.hashSize)] 359 newChunkData, err := pc.getter.Get(context.TODO(), Reference(key)) 360 if err != nil { 361 return errLoadingTreeChunk 362 } 363 newChunkSize := newChunkData.Size() 364 bewBranchCount := int64(len(newChunkData)-8) / pc.hashSize 365 newEntry := &TreeEntry{ 366 level: lvl - 1, 367 branchCount: bewBranchCount, 368 subtreeSize: uint64(newChunkSize), 369 chunk: newChunkData, 370 key: key, 371 index: 0, 372 updatePending: true, 373 } 374 pc.chunkLevel[lvl-1] = append(pc.chunkLevel[lvl-1], newEntry) 375 376 } 377 378 // We need to get only the right most unfinished branch.. so trim all finished branches 379 if int64(len(pc.chunkLevel[lvl-1])) >= pc.branches { 380 pc.chunkLevel[lvl-1] = nil 381 } 382 } 383 } 384 385 return nil 386 } 387 388 func (pc *PyramidChunker) prepareChunks(isAppend bool) { 389 log.Debug("pyramid.chunker: prepareChunks", "isAppend", isAppend) 390 defer pc.wg.Done() 391 392 chunkWG := &sync.WaitGroup{} 393 394 pc.incrementWorkerCount() 395 396 go pc.processor(pc.workerCount) 397 398 parent := NewTreeEntry(pc) 399 var unfinishedChunkData ChunkData 400 var unfinishedChunkSize int64 401 402 if isAppend && len(pc.chunkLevel[0]) != 0 { 403 lastIndex := len(pc.chunkLevel[0]) - 1 404 ent := pc.chunkLevel[0][lastIndex] 405 406 if ent.branchCount < pc.branches { 407 parent = &TreeEntry{ 408 level: 0, 409 branchCount: ent.branchCount, 410 subtreeSize: ent.subtreeSize, 411 chunk: ent.chunk, 412 key: ent.key, 413 index: lastIndex, 414 updatePending: true, 415 } 416 417 lastBranch := parent.branchCount - 1 418 lastKey := parent.chunk[8+lastBranch*pc.hashSize : 8+(lastBranch+1)*pc.hashSize] 419 420 var err error 421 unfinishedChunkData, err = pc.getter.Get(context.TODO(), lastKey) 422 if err != nil { 423 pc.errC <- err 424 } 425 unfinishedChunkSize = unfinishedChunkData.Size() 426 if unfinishedChunkSize < pc.chunkSize { 427 parent.subtreeSize = parent.subtreeSize - uint64(unfinishedChunkSize) 428 parent.branchCount = parent.branchCount - 1 429 } else { 430 unfinishedChunkData = nil 431 } 432 } 433 } 434 435 for index := 0; ; index++ { 436 var err error 437 chunkData := make([]byte, pc.chunkSize+8) 438 439 var readBytes int 440 441 if unfinishedChunkData != nil { 442 copy(chunkData, unfinishedChunkData) 443 readBytes += int(unfinishedChunkSize) 444 unfinishedChunkData = nil 445 log.Trace("pyramid.chunker: found unfinished chunk", "readBytes", readBytes) 446 } 447 448 var res []byte 449 res, err = ioutil.ReadAll(io.LimitReader(pc.reader, int64(len(chunkData)-(8+readBytes)))) 450 451 // hack for ioutil.ReadAll: 452 // a successful call to ioutil.ReadAll returns err == nil, not err == EOF, whereas we 453 // want to propagate the io.EOF error 454 if len(res) == 0 && err == nil { 455 err = io.EOF 456 } 457 copy(chunkData[8+readBytes:], res) 458 459 readBytes += len(res) 460 log.Trace("pyramid.chunker: copied all data", "readBytes", readBytes) 461 462 if err != nil { 463 if err == io.EOF || err == io.ErrUnexpectedEOF { 464 465 pc.cleanChunkLevels() 466 467 // Check if we are appending or the chunk is the only one. 468 if parent.branchCount == 1 && (pc.depth() == 0 || isAppend) { 469 // Data is exactly one chunk.. pick the last chunk key as root 470 chunkWG.Wait() 471 lastChunksKey := parent.chunk[8 : 8+pc.hashSize] 472 copy(pc.rootKey, lastChunksKey) 473 break 474 } 475 } else { 476 close(pc.quitC) 477 break 478 } 479 } 480 481 // Data ended in chunk boundary.. just signal to start bulding tree 482 if readBytes == 0 { 483 pc.buildTree(isAppend, parent, chunkWG, true, nil) 484 break 485 } else { 486 pkey := pc.enqueueDataChunk(chunkData, uint64(readBytes), parent, chunkWG) 487 488 // update tree related parent data structures 489 parent.subtreeSize += uint64(readBytes) 490 parent.branchCount++ 491 492 // Data got exhausted... signal to send any parent tree related chunks 493 if int64(readBytes) < pc.chunkSize { 494 495 pc.cleanChunkLevels() 496 497 // only one data chunk .. so dont add any parent chunk 498 if parent.branchCount <= 1 { 499 chunkWG.Wait() 500 501 if isAppend || pc.depth() == 0 { 502 // No need to build the tree if the depth is 0 503 // or we are appending. 504 // Just use the last key. 505 copy(pc.rootKey, pkey) 506 } else { 507 // We need to build the tree and and provide the lonely 508 // chunk key to replace the last tree chunk key. 509 pc.buildTree(isAppend, parent, chunkWG, true, pkey) 510 } 511 break 512 } 513 514 pc.buildTree(isAppend, parent, chunkWG, true, nil) 515 break 516 } 517 518 if parent.branchCount == pc.branches { 519 pc.buildTree(isAppend, parent, chunkWG, false, nil) 520 parent = NewTreeEntry(pc) 521 } 522 523 } 524 525 workers := pc.getWorkerCount() 526 if int64(len(pc.jobC)) > workers && workers < ChunkProcessors { 527 pc.incrementWorkerCount() 528 go pc.processor(pc.workerCount) 529 } 530 531 } 532 533 } 534 535 func (pc *PyramidChunker) buildTree(isAppend bool, ent *TreeEntry, chunkWG *sync.WaitGroup, last bool, lonelyChunkKey []byte) { 536 chunkWG.Wait() 537 pc.enqueueTreeChunk(ent, chunkWG, last) 538 539 compress := false 540 endLvl := pc.branches 541 for lvl := int64(0); lvl < pc.branches; lvl++ { 542 lvlCount := int64(len(pc.chunkLevel[lvl])) 543 if lvlCount >= pc.branches { 544 endLvl = lvl + 1 545 compress = true 546 break 547 } 548 } 549 550 if !compress && !last { 551 return 552 } 553 554 // Wait for all the keys to be processed before compressing the tree 555 chunkWG.Wait() 556 557 for lvl := int64(ent.level); lvl < endLvl; lvl++ { 558 559 lvlCount := int64(len(pc.chunkLevel[lvl])) 560 if lvlCount == 1 && last { 561 copy(pc.rootKey, pc.chunkLevel[lvl][0].key) 562 return 563 } 564 565 for startCount := int64(0); startCount < lvlCount; startCount += pc.branches { 566 567 endCount := startCount + pc.branches 568 if endCount > lvlCount { 569 endCount = lvlCount 570 } 571 572 var nextLvlCount int64 573 var tempEntry *TreeEntry 574 if len(pc.chunkLevel[lvl+1]) > 0 { 575 nextLvlCount = int64(len(pc.chunkLevel[lvl+1]) - 1) 576 tempEntry = pc.chunkLevel[lvl+1][nextLvlCount] 577 } 578 if isAppend && tempEntry != nil && tempEntry.updatePending { 579 updateEntry := &TreeEntry{ 580 level: int(lvl + 1), 581 branchCount: 0, 582 subtreeSize: 0, 583 chunk: make([]byte, pc.chunkSize+8), 584 key: make([]byte, pc.hashSize), 585 index: int(nextLvlCount), 586 updatePending: true, 587 } 588 for index := int64(0); index < lvlCount; index++ { 589 updateEntry.branchCount++ 590 updateEntry.subtreeSize += pc.chunkLevel[lvl][index].subtreeSize 591 copy(updateEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], pc.chunkLevel[lvl][index].key[:pc.hashSize]) 592 } 593 594 pc.enqueueTreeChunk(updateEntry, chunkWG, last) 595 596 } else { 597 598 noOfBranches := endCount - startCount 599 newEntry := &TreeEntry{ 600 level: int(lvl + 1), 601 branchCount: noOfBranches, 602 subtreeSize: 0, 603 chunk: make([]byte, (noOfBranches*pc.hashSize)+8), 604 key: make([]byte, pc.hashSize), 605 index: int(nextLvlCount), 606 updatePending: false, 607 } 608 609 index := int64(0) 610 for i := startCount; i < endCount; i++ { 611 entry := pc.chunkLevel[lvl][i] 612 newEntry.subtreeSize += entry.subtreeSize 613 copy(newEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], entry.key[:pc.hashSize]) 614 index++ 615 } 616 // Lonely chunk key is the key of the last chunk that is only one on the last branch. 617 // In this case, ignore the its tree chunk key and replace it with the lonely chunk key. 618 if lonelyChunkKey != nil { 619 // Overwrite the last tree chunk key with the lonely data chunk key. 620 copy(newEntry.chunk[int64(len(newEntry.chunk))-pc.hashSize:], lonelyChunkKey[:pc.hashSize]) 621 } 622 623 pc.enqueueTreeChunk(newEntry, chunkWG, last) 624 625 } 626 627 } 628 629 if !isAppend { 630 chunkWG.Wait() 631 if compress { 632 pc.chunkLevel[lvl] = nil 633 } 634 } 635 } 636 637 } 638 639 func (pc *PyramidChunker) enqueueTreeChunk(ent *TreeEntry, chunkWG *sync.WaitGroup, last bool) { 640 if ent != nil && ent.branchCount > 0 { 641 642 // wait for data chunks to get over before processing the tree chunk 643 if last { 644 chunkWG.Wait() 645 } 646 647 binary.LittleEndian.PutUint64(ent.chunk[:8], ent.subtreeSize) 648 ent.key = make([]byte, pc.hashSize) 649 chunkWG.Add(1) 650 select { 651 case pc.jobC <- &chunkJob{ent.key, ent.chunk[:ent.branchCount*pc.hashSize+8], chunkWG}: 652 case <-pc.quitC: 653 } 654 655 // Update or append based on weather it is a new entry or being reused 656 if ent.updatePending { 657 chunkWG.Wait() 658 pc.chunkLevel[ent.level][ent.index] = ent 659 } else { 660 pc.chunkLevel[ent.level] = append(pc.chunkLevel[ent.level], ent) 661 } 662 663 } 664 } 665 666 func (pc *PyramidChunker) enqueueDataChunk(chunkData []byte, size uint64, parent *TreeEntry, chunkWG *sync.WaitGroup) Address { 667 binary.LittleEndian.PutUint64(chunkData[:8], size) 668 pkey := parent.chunk[8+parent.branchCount*pc.hashSize : 8+(parent.branchCount+1)*pc.hashSize] 669 670 chunkWG.Add(1) 671 select { 672 case pc.jobC <- &chunkJob{pkey, chunkData[:size+8], chunkWG}: 673 case <-pc.quitC: 674 } 675 676 return pkey 677 678 } 679 680 // depth returns the number of chunk levels. 681 // It is used to detect if there is only one data chunk 682 // left for the last branch. 683 func (pc *PyramidChunker) depth() (d int) { 684 for _, l := range pc.chunkLevel { 685 if l == nil { 686 return 687 } 688 d++ 689 } 690 return 691 } 692 693 // cleanChunkLevels removes gaps (nil levels) between chunk levels 694 // that are not nil. 695 func (pc *PyramidChunker) cleanChunkLevels() { 696 for i, l := range pc.chunkLevel { 697 if l == nil { 698 pc.chunkLevel = append(pc.chunkLevel[:i], append(pc.chunkLevel[i+1:], nil)...) 699 } 700 } 701 }