github.com/daragao/go-ethereum@v1.8.14-0.20180809141559-45eaef243198/swarm/storage/pyramid.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package storage 18 19 import ( 20 "context" 21 "encoding/binary" 22 "errors" 23 "io" 24 "io/ioutil" 25 "sync" 26 "time" 27 28 "github.com/ethereum/go-ethereum/swarm/log" 29 ) 30 31 /* 32 The main idea of a pyramid chunker is to process the input data without knowing the entire size apriori. 33 For this to be achieved, the chunker tree is built from the ground up until the data is exhausted. 34 This opens up new aveneus such as easy append and other sort of modifications to the tree thereby avoiding 35 duplication of data chunks. 36 37 38 Below is an example of a two level chunks tree. The leaf chunks are called data chunks and all the above 39 chunks are called tree chunks. The tree chunk above data chunks is level 0 and so on until it reaches 40 the root tree chunk. 41 42 43 44 T10 <- Tree chunk lvl1 45 | 46 __________________________|_____________________________ 47 / | | \ 48 / | \ \ 49 __T00__ ___T01__ ___T02__ ___T03__ <- Tree chunks lvl 0 50 / / \ / / \ / / \ / / \ 51 / / \ / / \ / / \ / / \ 52 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 <- Data Chunks 53 54 55 The split function continuously read the data and creates data chunks and send them to storage. 56 When certain no of data chunks are created (defaultBranches), a signal is sent to create a tree 57 entry. When the level 0 tree entries reaches certain threshold (defaultBranches), another signal 58 is sent to a tree entry one level up.. and so on... until only the data is exhausted AND only one 59 tree entry is present in certain level. The key of tree entry is given out as the rootKey of the file. 60 61 */ 62 63 var ( 64 errLoadingTreeRootChunk = errors.New("LoadTree Error: Could not load root chunk") 65 errLoadingTreeChunk = errors.New("LoadTree Error: Could not load chunk") 66 ) 67 68 const ( 69 ChunkProcessors = 8 70 splitTimeout = time.Minute * 5 71 ) 72 73 const ( 74 DataChunk = 0 75 TreeChunk = 1 76 ) 77 78 type PyramidSplitterParams struct { 79 SplitterParams 80 getter Getter 81 } 82 83 func NewPyramidSplitterParams(addr Address, reader io.Reader, putter Putter, getter Getter, chunkSize int64) *PyramidSplitterParams { 84 hashSize := putter.RefSize() 85 return &PyramidSplitterParams{ 86 SplitterParams: SplitterParams{ 87 ChunkerParams: ChunkerParams{ 88 chunkSize: chunkSize, 89 hashSize: hashSize, 90 }, 91 reader: reader, 92 putter: putter, 93 addr: addr, 94 }, 95 getter: getter, 96 } 97 } 98 99 /* 100 When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Key), the root hash of the entire content will fill this once processing finishes. 101 New chunks to store are store using the putter which the caller provides. 102 */ 103 func PyramidSplit(ctx context.Context, reader io.Reader, putter Putter, getter Getter) (Address, func(context.Context) error, error) { 104 return NewPyramidSplitter(NewPyramidSplitterParams(nil, reader, putter, getter, DefaultChunkSize)).Split(ctx) 105 } 106 107 func PyramidAppend(ctx context.Context, addr Address, reader io.Reader, putter Putter, getter Getter) (Address, func(context.Context) error, error) { 108 return NewPyramidSplitter(NewPyramidSplitterParams(addr, reader, putter, getter, DefaultChunkSize)).Append(ctx) 109 } 110 111 // Entry to create a tree node 112 type TreeEntry struct { 113 level int 114 branchCount int64 115 subtreeSize uint64 116 chunk []byte 117 key []byte 118 index int // used in append to indicate the index of existing tree entry 119 updatePending bool // indicates if the entry is loaded from existing tree 120 } 121 122 func NewTreeEntry(pyramid *PyramidChunker) *TreeEntry { 123 return &TreeEntry{ 124 level: 0, 125 branchCount: 0, 126 subtreeSize: 0, 127 chunk: make([]byte, pyramid.chunkSize+8), 128 key: make([]byte, pyramid.hashSize), 129 index: 0, 130 updatePending: false, 131 } 132 } 133 134 // Used by the hash processor to create a data/tree chunk and send to storage 135 type chunkJob struct { 136 key Address 137 chunk []byte 138 parentWg *sync.WaitGroup 139 } 140 141 type PyramidChunker struct { 142 chunkSize int64 143 hashSize int64 144 branches int64 145 reader io.Reader 146 putter Putter 147 getter Getter 148 key Address 149 workerCount int64 150 workerLock sync.RWMutex 151 jobC chan *chunkJob 152 wg *sync.WaitGroup 153 errC chan error 154 quitC chan bool 155 rootKey []byte 156 chunkLevel [][]*TreeEntry 157 } 158 159 func NewPyramidSplitter(params *PyramidSplitterParams) (pc *PyramidChunker) { 160 pc = &PyramidChunker{} 161 pc.reader = params.reader 162 pc.hashSize = params.hashSize 163 pc.branches = params.chunkSize / pc.hashSize 164 pc.chunkSize = pc.hashSize * pc.branches 165 pc.putter = params.putter 166 pc.getter = params.getter 167 pc.key = params.addr 168 pc.workerCount = 0 169 pc.jobC = make(chan *chunkJob, 2*ChunkProcessors) 170 pc.wg = &sync.WaitGroup{} 171 pc.errC = make(chan error) 172 pc.quitC = make(chan bool) 173 pc.rootKey = make([]byte, pc.hashSize) 174 pc.chunkLevel = make([][]*TreeEntry, pc.branches) 175 return 176 } 177 178 func (pc *PyramidChunker) Join(addr Address, getter Getter, depth int) LazySectionReader { 179 return &LazyChunkReader{ 180 key: addr, 181 depth: depth, 182 chunkSize: pc.chunkSize, 183 branches: pc.branches, 184 hashSize: pc.hashSize, 185 getter: getter, 186 } 187 } 188 189 func (pc *PyramidChunker) incrementWorkerCount() { 190 pc.workerLock.Lock() 191 defer pc.workerLock.Unlock() 192 pc.workerCount += 1 193 } 194 195 func (pc *PyramidChunker) getWorkerCount() int64 { 196 pc.workerLock.Lock() 197 defer pc.workerLock.Unlock() 198 return pc.workerCount 199 } 200 201 func (pc *PyramidChunker) decrementWorkerCount() { 202 pc.workerLock.Lock() 203 defer pc.workerLock.Unlock() 204 pc.workerCount -= 1 205 } 206 207 func (pc *PyramidChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 208 log.Debug("pyramid.chunker: Split()") 209 210 pc.wg.Add(1) 211 pc.prepareChunks(false) 212 213 // closes internal error channel if all subprocesses in the workgroup finished 214 go func() { 215 216 // waiting for all chunks to finish 217 pc.wg.Wait() 218 219 //We close errC here because this is passed down to 8 parallel routines underneath. 220 // if a error happens in one of them.. that particular routine raises error... 221 // once they all complete successfully, the control comes back and we can safely close this here. 222 close(pc.errC) 223 }() 224 225 defer close(pc.quitC) 226 defer pc.putter.Close() 227 228 select { 229 case err := <-pc.errC: 230 if err != nil { 231 return nil, nil, err 232 } 233 case <-time.NewTimer(splitTimeout).C: 234 } 235 return pc.rootKey, pc.putter.Wait, nil 236 237 } 238 239 func (pc *PyramidChunker) Append(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 240 log.Debug("pyramid.chunker: Append()") 241 // Load the right most unfinished tree chunks in every level 242 pc.loadTree() 243 244 pc.wg.Add(1) 245 pc.prepareChunks(true) 246 247 // closes internal error channel if all subprocesses in the workgroup finished 248 go func() { 249 250 // waiting for all chunks to finish 251 pc.wg.Wait() 252 253 close(pc.errC) 254 }() 255 256 defer close(pc.quitC) 257 defer pc.putter.Close() 258 259 select { 260 case err := <-pc.errC: 261 if err != nil { 262 return nil, nil, err 263 } 264 case <-time.NewTimer(splitTimeout).C: 265 } 266 267 return pc.rootKey, pc.putter.Wait, nil 268 269 } 270 271 func (pc *PyramidChunker) processor(id int64) { 272 defer pc.decrementWorkerCount() 273 for { 274 select { 275 276 case job, ok := <-pc.jobC: 277 if !ok { 278 return 279 } 280 pc.processChunk(id, job) 281 case <-pc.quitC: 282 return 283 } 284 } 285 } 286 287 func (pc *PyramidChunker) processChunk(id int64, job *chunkJob) { 288 log.Debug("pyramid.chunker: processChunk()", "id", id) 289 290 ref, err := pc.putter.Put(context.TODO(), job.chunk) 291 if err != nil { 292 pc.errC <- err 293 } 294 295 // report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk) 296 copy(job.key, ref) 297 298 // send off new chunk to storage 299 job.parentWg.Done() 300 } 301 302 func (pc *PyramidChunker) loadTree() error { 303 log.Debug("pyramid.chunker: loadTree()") 304 // Get the root chunk to get the total size 305 chunkData, err := pc.getter.Get(context.TODO(), Reference(pc.key)) 306 if err != nil { 307 return errLoadingTreeRootChunk 308 } 309 chunkSize := chunkData.Size() 310 log.Trace("pyramid.chunker: root chunk", "chunk.Size", chunkSize, "pc.chunkSize", pc.chunkSize) 311 312 //if data size is less than a chunk... add a parent with update as pending 313 if chunkSize <= pc.chunkSize { 314 newEntry := &TreeEntry{ 315 level: 0, 316 branchCount: 1, 317 subtreeSize: uint64(chunkSize), 318 chunk: make([]byte, pc.chunkSize+8), 319 key: make([]byte, pc.hashSize), 320 index: 0, 321 updatePending: true, 322 } 323 copy(newEntry.chunk[8:], pc.key) 324 pc.chunkLevel[0] = append(pc.chunkLevel[0], newEntry) 325 return nil 326 } 327 328 var treeSize int64 329 var depth int 330 treeSize = pc.chunkSize 331 for ; treeSize < chunkSize; treeSize *= pc.branches { 332 depth++ 333 } 334 log.Trace("pyramid.chunker", "depth", depth) 335 336 // Add the root chunk entry 337 branchCount := int64(len(chunkData)-8) / pc.hashSize 338 newEntry := &TreeEntry{ 339 level: depth - 1, 340 branchCount: branchCount, 341 subtreeSize: uint64(chunkSize), 342 chunk: chunkData, 343 key: pc.key, 344 index: 0, 345 updatePending: true, 346 } 347 pc.chunkLevel[depth-1] = append(pc.chunkLevel[depth-1], newEntry) 348 349 // Add the rest of the tree 350 for lvl := depth - 1; lvl >= 1; lvl-- { 351 352 //TODO(jmozah): instead of loading finished branches and then trim in the end, 353 //avoid loading them in the first place 354 for _, ent := range pc.chunkLevel[lvl] { 355 branchCount = int64(len(ent.chunk)-8) / pc.hashSize 356 for i := int64(0); i < branchCount; i++ { 357 key := ent.chunk[8+(i*pc.hashSize) : 8+((i+1)*pc.hashSize)] 358 newChunkData, err := pc.getter.Get(context.TODO(), Reference(key)) 359 if err != nil { 360 return errLoadingTreeChunk 361 } 362 newChunkSize := newChunkData.Size() 363 bewBranchCount := int64(len(newChunkData)-8) / pc.hashSize 364 newEntry := &TreeEntry{ 365 level: lvl - 1, 366 branchCount: bewBranchCount, 367 subtreeSize: uint64(newChunkSize), 368 chunk: newChunkData, 369 key: key, 370 index: 0, 371 updatePending: true, 372 } 373 pc.chunkLevel[lvl-1] = append(pc.chunkLevel[lvl-1], newEntry) 374 375 } 376 377 // We need to get only the right most unfinished branch.. so trim all finished branches 378 if int64(len(pc.chunkLevel[lvl-1])) >= pc.branches { 379 pc.chunkLevel[lvl-1] = nil 380 } 381 } 382 } 383 384 return nil 385 } 386 387 func (pc *PyramidChunker) prepareChunks(isAppend bool) { 388 log.Debug("pyramid.chunker: prepareChunks", "isAppend", isAppend) 389 defer pc.wg.Done() 390 391 chunkWG := &sync.WaitGroup{} 392 393 pc.incrementWorkerCount() 394 395 go pc.processor(pc.workerCount) 396 397 parent := NewTreeEntry(pc) 398 var unfinishedChunkData ChunkData 399 var unfinishedChunkSize int64 400 401 if isAppend && len(pc.chunkLevel[0]) != 0 { 402 lastIndex := len(pc.chunkLevel[0]) - 1 403 ent := pc.chunkLevel[0][lastIndex] 404 405 if ent.branchCount < pc.branches { 406 parent = &TreeEntry{ 407 level: 0, 408 branchCount: ent.branchCount, 409 subtreeSize: ent.subtreeSize, 410 chunk: ent.chunk, 411 key: ent.key, 412 index: lastIndex, 413 updatePending: true, 414 } 415 416 lastBranch := parent.branchCount - 1 417 lastKey := parent.chunk[8+lastBranch*pc.hashSize : 8+(lastBranch+1)*pc.hashSize] 418 419 var err error 420 unfinishedChunkData, err = pc.getter.Get(context.TODO(), lastKey) 421 if err != nil { 422 pc.errC <- err 423 } 424 unfinishedChunkSize = unfinishedChunkData.Size() 425 if unfinishedChunkSize < pc.chunkSize { 426 parent.subtreeSize = parent.subtreeSize - uint64(unfinishedChunkSize) 427 parent.branchCount = parent.branchCount - 1 428 } else { 429 unfinishedChunkData = nil 430 } 431 } 432 } 433 434 for index := 0; ; index++ { 435 var err error 436 chunkData := make([]byte, pc.chunkSize+8) 437 438 var readBytes int 439 440 if unfinishedChunkData != nil { 441 copy(chunkData, unfinishedChunkData) 442 readBytes += int(unfinishedChunkSize) 443 unfinishedChunkData = nil 444 log.Trace("pyramid.chunker: found unfinished chunk", "readBytes", readBytes) 445 } 446 447 var res []byte 448 res, err = ioutil.ReadAll(io.LimitReader(pc.reader, int64(len(chunkData)-(8+readBytes)))) 449 450 // hack for ioutil.ReadAll: 451 // a successful call to ioutil.ReadAll returns err == nil, not err == EOF, whereas we 452 // want to propagate the io.EOF error 453 if len(res) == 0 && err == nil { 454 err = io.EOF 455 } 456 copy(chunkData[8+readBytes:], res) 457 458 readBytes += len(res) 459 log.Trace("pyramid.chunker: copied all data", "readBytes", readBytes) 460 461 if err != nil { 462 if err == io.EOF || err == io.ErrUnexpectedEOF { 463 464 pc.cleanChunkLevels() 465 466 // Check if we are appending or the chunk is the only one. 467 if parent.branchCount == 1 && (pc.depth() == 0 || isAppend) { 468 // Data is exactly one chunk.. pick the last chunk key as root 469 chunkWG.Wait() 470 lastChunksKey := parent.chunk[8 : 8+pc.hashSize] 471 copy(pc.rootKey, lastChunksKey) 472 break 473 } 474 } else { 475 close(pc.quitC) 476 break 477 } 478 } 479 480 // Data ended in chunk boundary.. just signal to start bulding tree 481 if readBytes == 0 { 482 pc.buildTree(isAppend, parent, chunkWG, true, nil) 483 break 484 } else { 485 pkey := pc.enqueueDataChunk(chunkData, uint64(readBytes), parent, chunkWG) 486 487 // update tree related parent data structures 488 parent.subtreeSize += uint64(readBytes) 489 parent.branchCount++ 490 491 // Data got exhausted... signal to send any parent tree related chunks 492 if int64(readBytes) < pc.chunkSize { 493 494 pc.cleanChunkLevels() 495 496 // only one data chunk .. so dont add any parent chunk 497 if parent.branchCount <= 1 { 498 chunkWG.Wait() 499 500 if isAppend || pc.depth() == 0 { 501 // No need to build the tree if the depth is 0 502 // or we are appending. 503 // Just use the last key. 504 copy(pc.rootKey, pkey) 505 } else { 506 // We need to build the tree and and provide the lonely 507 // chunk key to replace the last tree chunk key. 508 pc.buildTree(isAppend, parent, chunkWG, true, pkey) 509 } 510 break 511 } 512 513 pc.buildTree(isAppend, parent, chunkWG, true, nil) 514 break 515 } 516 517 if parent.branchCount == pc.branches { 518 pc.buildTree(isAppend, parent, chunkWG, false, nil) 519 parent = NewTreeEntry(pc) 520 } 521 522 } 523 524 workers := pc.getWorkerCount() 525 if int64(len(pc.jobC)) > workers && workers < ChunkProcessors { 526 pc.incrementWorkerCount() 527 go pc.processor(pc.workerCount) 528 } 529 530 } 531 532 } 533 534 func (pc *PyramidChunker) buildTree(isAppend bool, ent *TreeEntry, chunkWG *sync.WaitGroup, last bool, lonelyChunkKey []byte) { 535 chunkWG.Wait() 536 pc.enqueueTreeChunk(ent, chunkWG, last) 537 538 compress := false 539 endLvl := pc.branches 540 for lvl := int64(0); lvl < pc.branches; lvl++ { 541 lvlCount := int64(len(pc.chunkLevel[lvl])) 542 if lvlCount >= pc.branches { 543 endLvl = lvl + 1 544 compress = true 545 break 546 } 547 } 548 549 if !compress && !last { 550 return 551 } 552 553 // Wait for all the keys to be processed before compressing the tree 554 chunkWG.Wait() 555 556 for lvl := int64(ent.level); lvl < endLvl; lvl++ { 557 558 lvlCount := int64(len(pc.chunkLevel[lvl])) 559 if lvlCount == 1 && last { 560 copy(pc.rootKey, pc.chunkLevel[lvl][0].key) 561 return 562 } 563 564 for startCount := int64(0); startCount < lvlCount; startCount += pc.branches { 565 566 endCount := startCount + pc.branches 567 if endCount > lvlCount { 568 endCount = lvlCount 569 } 570 571 var nextLvlCount int64 572 var tempEntry *TreeEntry 573 if len(pc.chunkLevel[lvl+1]) > 0 { 574 nextLvlCount = int64(len(pc.chunkLevel[lvl+1]) - 1) 575 tempEntry = pc.chunkLevel[lvl+1][nextLvlCount] 576 } 577 if isAppend && tempEntry != nil && tempEntry.updatePending { 578 updateEntry := &TreeEntry{ 579 level: int(lvl + 1), 580 branchCount: 0, 581 subtreeSize: 0, 582 chunk: make([]byte, pc.chunkSize+8), 583 key: make([]byte, pc.hashSize), 584 index: int(nextLvlCount), 585 updatePending: true, 586 } 587 for index := int64(0); index < lvlCount; index++ { 588 updateEntry.branchCount++ 589 updateEntry.subtreeSize += pc.chunkLevel[lvl][index].subtreeSize 590 copy(updateEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], pc.chunkLevel[lvl][index].key[:pc.hashSize]) 591 } 592 593 pc.enqueueTreeChunk(updateEntry, chunkWG, last) 594 595 } else { 596 597 noOfBranches := endCount - startCount 598 newEntry := &TreeEntry{ 599 level: int(lvl + 1), 600 branchCount: noOfBranches, 601 subtreeSize: 0, 602 chunk: make([]byte, (noOfBranches*pc.hashSize)+8), 603 key: make([]byte, pc.hashSize), 604 index: int(nextLvlCount), 605 updatePending: false, 606 } 607 608 index := int64(0) 609 for i := startCount; i < endCount; i++ { 610 entry := pc.chunkLevel[lvl][i] 611 newEntry.subtreeSize += entry.subtreeSize 612 copy(newEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], entry.key[:pc.hashSize]) 613 index++ 614 } 615 // Lonely chunk key is the key of the last chunk that is only one on the last branch. 616 // In this case, ignore the its tree chunk key and replace it with the lonely chunk key. 617 if lonelyChunkKey != nil { 618 // Overwrite the last tree chunk key with the lonely data chunk key. 619 copy(newEntry.chunk[int64(len(newEntry.chunk))-pc.hashSize:], lonelyChunkKey[:pc.hashSize]) 620 } 621 622 pc.enqueueTreeChunk(newEntry, chunkWG, last) 623 624 } 625 626 } 627 628 if !isAppend { 629 chunkWG.Wait() 630 if compress { 631 pc.chunkLevel[lvl] = nil 632 } 633 } 634 } 635 636 } 637 638 func (pc *PyramidChunker) enqueueTreeChunk(ent *TreeEntry, chunkWG *sync.WaitGroup, last bool) { 639 if ent != nil && ent.branchCount > 0 { 640 641 // wait for data chunks to get over before processing the tree chunk 642 if last { 643 chunkWG.Wait() 644 } 645 646 binary.LittleEndian.PutUint64(ent.chunk[:8], ent.subtreeSize) 647 ent.key = make([]byte, pc.hashSize) 648 chunkWG.Add(1) 649 select { 650 case pc.jobC <- &chunkJob{ent.key, ent.chunk[:ent.branchCount*pc.hashSize+8], chunkWG}: 651 case <-pc.quitC: 652 } 653 654 // Update or append based on weather it is a new entry or being reused 655 if ent.updatePending { 656 chunkWG.Wait() 657 pc.chunkLevel[ent.level][ent.index] = ent 658 } else { 659 pc.chunkLevel[ent.level] = append(pc.chunkLevel[ent.level], ent) 660 } 661 662 } 663 } 664 665 func (pc *PyramidChunker) enqueueDataChunk(chunkData []byte, size uint64, parent *TreeEntry, chunkWG *sync.WaitGroup) Address { 666 binary.LittleEndian.PutUint64(chunkData[:8], size) 667 pkey := parent.chunk[8+parent.branchCount*pc.hashSize : 8+(parent.branchCount+1)*pc.hashSize] 668 669 chunkWG.Add(1) 670 select { 671 case pc.jobC <- &chunkJob{pkey, chunkData[:size+8], chunkWG}: 672 case <-pc.quitC: 673 } 674 675 return pkey 676 677 } 678 679 // depth returns the number of chunk levels. 680 // It is used to detect if there is only one data chunk 681 // left for the last branch. 682 func (pc *PyramidChunker) depth() (d int) { 683 for _, l := range pc.chunkLevel { 684 if l == nil { 685 return 686 } 687 d++ 688 } 689 return 690 } 691 692 // cleanChunkLevels removes gaps (nil levels) between chunk levels 693 // that are not nil. 694 func (pc *PyramidChunker) cleanChunkLevels() { 695 for i, l := range pc.chunkLevel { 696 if l == nil { 697 pc.chunkLevel = append(pc.chunkLevel[:i], append(pc.chunkLevel[i+1:], nil)...) 698 } 699 } 700 }