github.com/xxRanger/go-ethereum@v1.8.23/swarm/storage/pyramid.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package storage 18 19 import ( 20 "context" 21 "encoding/binary" 22 "errors" 23 "io" 24 "io/ioutil" 25 "sync" 26 "time" 27 28 ch "github.com/ethereum/go-ethereum/swarm/chunk" 29 "github.com/ethereum/go-ethereum/swarm/log" 30 ) 31 32 /* 33 The main idea of a pyramid chunker is to process the input data without knowing the entire size apriori. 34 For this to be achieved, the chunker tree is built from the ground up until the data is exhausted. 35 This opens up new aveneus such as easy append and other sort of modifications to the tree thereby avoiding 36 duplication of data chunks. 37 38 39 Below is an example of a two level chunks tree. The leaf chunks are called data chunks and all the above 40 chunks are called tree chunks. The tree chunk above data chunks is level 0 and so on until it reaches 41 the root tree chunk. 42 43 44 45 T10 <- Tree chunk lvl1 46 | 47 __________________________|_____________________________ 48 / | | \ 49 / | \ \ 50 __T00__ ___T01__ ___T02__ ___T03__ <- Tree chunks lvl 0 51 / / \ / / \ / / \ / / \ 52 / / \ / / \ / / \ / / \ 53 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 <- Data Chunks 54 55 56 The split function continuously read the data and creates data chunks and send them to storage. 57 When certain no of data chunks are created (defaultBranches), a signal is sent to create a tree 58 entry. When the level 0 tree entries reaches certain threshold (defaultBranches), another signal 59 is sent to a tree entry one level up.. and so on... until only the data is exhausted AND only one 60 tree entry is present in certain level. The key of tree entry is given out as the rootAddress of the file. 61 62 */ 63 64 var ( 65 errLoadingTreeRootChunk = errors.New("LoadTree Error: Could not load root chunk") 66 errLoadingTreeChunk = errors.New("LoadTree Error: Could not load chunk") 67 ) 68 69 const ( 70 ChunkProcessors = 8 71 splitTimeout = time.Minute * 5 72 ) 73 74 type PyramidSplitterParams struct { 75 SplitterParams 76 getter Getter 77 } 78 79 func NewPyramidSplitterParams(addr Address, reader io.Reader, putter Putter, getter Getter, chunkSize int64) *PyramidSplitterParams { 80 hashSize := putter.RefSize() 81 return &PyramidSplitterParams{ 82 SplitterParams: SplitterParams{ 83 ChunkerParams: ChunkerParams{ 84 chunkSize: chunkSize, 85 hashSize: hashSize, 86 }, 87 reader: reader, 88 putter: putter, 89 addr: addr, 90 }, 91 getter: getter, 92 } 93 } 94 95 /* 96 When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Address), the root hash of the entire content will fill this once processing finishes. 97 New chunks to store are store using the putter which the caller provides. 98 */ 99 func PyramidSplit(ctx context.Context, reader io.Reader, putter Putter, getter Getter) (Address, func(context.Context) error, error) { 100 return NewPyramidSplitter(NewPyramidSplitterParams(nil, reader, putter, getter, ch.DefaultSize)).Split(ctx) 101 } 102 103 func PyramidAppend(ctx context.Context, addr Address, reader io.Reader, putter Putter, getter Getter) (Address, func(context.Context) error, error) { 104 return NewPyramidSplitter(NewPyramidSplitterParams(addr, reader, putter, getter, ch.DefaultSize)).Append(ctx) 105 } 106 107 // Entry to create a tree node 108 type TreeEntry struct { 109 level int 110 branchCount int64 111 subtreeSize uint64 112 chunk []byte 113 key []byte 114 index int // used in append to indicate the index of existing tree entry 115 updatePending bool // indicates if the entry is loaded from existing tree 116 } 117 118 func NewTreeEntry(pyramid *PyramidChunker) *TreeEntry { 119 return &TreeEntry{ 120 level: 0, 121 branchCount: 0, 122 subtreeSize: 0, 123 chunk: make([]byte, pyramid.chunkSize+8), 124 key: make([]byte, pyramid.hashSize), 125 index: 0, 126 updatePending: false, 127 } 128 } 129 130 // Used by the hash processor to create a data/tree chunk and send to storage 131 type chunkJob struct { 132 key Address 133 chunk []byte 134 parentWg *sync.WaitGroup 135 } 136 137 type PyramidChunker struct { 138 chunkSize int64 139 hashSize int64 140 branches int64 141 reader io.Reader 142 putter Putter 143 getter Getter 144 key Address 145 workerCount int64 146 workerLock sync.RWMutex 147 jobC chan *chunkJob 148 wg *sync.WaitGroup 149 errC chan error 150 quitC chan bool 151 rootAddress []byte 152 chunkLevel [][]*TreeEntry 153 } 154 155 func NewPyramidSplitter(params *PyramidSplitterParams) (pc *PyramidChunker) { 156 pc = &PyramidChunker{} 157 pc.reader = params.reader 158 pc.hashSize = params.hashSize 159 pc.branches = params.chunkSize / pc.hashSize 160 pc.chunkSize = pc.hashSize * pc.branches 161 pc.putter = params.putter 162 pc.getter = params.getter 163 pc.key = params.addr 164 pc.workerCount = 0 165 pc.jobC = make(chan *chunkJob, 2*ChunkProcessors) 166 pc.wg = &sync.WaitGroup{} 167 pc.errC = make(chan error) 168 pc.quitC = make(chan bool) 169 pc.rootAddress = make([]byte, pc.hashSize) 170 pc.chunkLevel = make([][]*TreeEntry, pc.branches) 171 return 172 } 173 174 func (pc *PyramidChunker) Join(addr Address, getter Getter, depth int) LazySectionReader { 175 return &LazyChunkReader{ 176 addr: addr, 177 depth: depth, 178 chunkSize: pc.chunkSize, 179 branches: pc.branches, 180 hashSize: pc.hashSize, 181 getter: getter, 182 } 183 } 184 185 func (pc *PyramidChunker) incrementWorkerCount() { 186 pc.workerLock.Lock() 187 defer pc.workerLock.Unlock() 188 pc.workerCount += 1 189 } 190 191 func (pc *PyramidChunker) getWorkerCount() int64 { 192 pc.workerLock.Lock() 193 defer pc.workerLock.Unlock() 194 return pc.workerCount 195 } 196 197 func (pc *PyramidChunker) decrementWorkerCount() { 198 pc.workerLock.Lock() 199 defer pc.workerLock.Unlock() 200 pc.workerCount -= 1 201 } 202 203 func (pc *PyramidChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 204 pc.wg.Add(1) 205 pc.prepareChunks(ctx, false) 206 207 // closes internal error channel if all subprocesses in the workgroup finished 208 go func() { 209 210 // waiting for all chunks to finish 211 pc.wg.Wait() 212 213 //We close errC here because this is passed down to 8 parallel routines underneath. 214 // if a error happens in one of them.. that particular routine raises error... 215 // once they all complete successfully, the control comes back and we can safely close this here. 216 close(pc.errC) 217 }() 218 219 defer close(pc.quitC) 220 defer pc.putter.Close() 221 222 select { 223 case err := <-pc.errC: 224 if err != nil { 225 return nil, nil, err 226 } 227 case <-ctx.Done(): 228 _ = pc.putter.Wait(ctx) //??? 229 return nil, nil, ctx.Err() 230 } 231 return pc.rootAddress, pc.putter.Wait, nil 232 233 } 234 235 func (pc *PyramidChunker) Append(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 236 // Load the right most unfinished tree chunks in every level 237 pc.loadTree(ctx) 238 239 pc.wg.Add(1) 240 pc.prepareChunks(ctx, true) 241 242 // closes internal error channel if all subprocesses in the workgroup finished 243 go func() { 244 245 // waiting for all chunks to finish 246 pc.wg.Wait() 247 248 close(pc.errC) 249 }() 250 251 defer close(pc.quitC) 252 defer pc.putter.Close() 253 254 select { 255 case err := <-pc.errC: 256 if err != nil { 257 return nil, nil, err 258 } 259 case <-time.NewTimer(splitTimeout).C: 260 } 261 262 return pc.rootAddress, pc.putter.Wait, nil 263 264 } 265 266 func (pc *PyramidChunker) processor(ctx context.Context, id int64) { 267 defer pc.decrementWorkerCount() 268 for { 269 select { 270 271 case job, ok := <-pc.jobC: 272 if !ok { 273 return 274 } 275 pc.processChunk(ctx, id, job) 276 case <-pc.quitC: 277 return 278 } 279 } 280 } 281 282 func (pc *PyramidChunker) processChunk(ctx context.Context, id int64, job *chunkJob) { 283 ref, err := pc.putter.Put(ctx, job.chunk) 284 if err != nil { 285 select { 286 case pc.errC <- err: 287 case <-pc.quitC: 288 } 289 } 290 291 // report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk) 292 copy(job.key, ref) 293 294 // send off new chunk to storage 295 job.parentWg.Done() 296 } 297 298 func (pc *PyramidChunker) loadTree(ctx context.Context) error { 299 // Get the root chunk to get the total size 300 chunkData, err := pc.getter.Get(ctx, Reference(pc.key)) 301 if err != nil { 302 return errLoadingTreeRootChunk 303 } 304 chunkSize := int64(chunkData.Size()) 305 log.Trace("pyramid.chunker: root chunk", "chunk.Size", chunkSize, "pc.chunkSize", pc.chunkSize) 306 307 //if data size is less than a chunk... add a parent with update as pending 308 if chunkSize <= pc.chunkSize { 309 newEntry := &TreeEntry{ 310 level: 0, 311 branchCount: 1, 312 subtreeSize: uint64(chunkSize), 313 chunk: make([]byte, pc.chunkSize+8), 314 key: make([]byte, pc.hashSize), 315 index: 0, 316 updatePending: true, 317 } 318 copy(newEntry.chunk[8:], pc.key) 319 pc.chunkLevel[0] = append(pc.chunkLevel[0], newEntry) 320 return nil 321 } 322 323 var treeSize int64 324 var depth int 325 treeSize = pc.chunkSize 326 for ; treeSize < chunkSize; treeSize *= pc.branches { 327 depth++ 328 } 329 log.Trace("pyramid.chunker", "depth", depth) 330 331 // Add the root chunk entry 332 branchCount := int64(len(chunkData)-8) / pc.hashSize 333 newEntry := &TreeEntry{ 334 level: depth - 1, 335 branchCount: branchCount, 336 subtreeSize: uint64(chunkSize), 337 chunk: chunkData, 338 key: pc.key, 339 index: 0, 340 updatePending: true, 341 } 342 pc.chunkLevel[depth-1] = append(pc.chunkLevel[depth-1], newEntry) 343 344 // Add the rest of the tree 345 for lvl := depth - 1; lvl >= 1; lvl-- { 346 347 //TODO(jmozah): instead of loading finished branches and then trim in the end, 348 //avoid loading them in the first place 349 for _, ent := range pc.chunkLevel[lvl] { 350 branchCount = int64(len(ent.chunk)-8) / pc.hashSize 351 for i := int64(0); i < branchCount; i++ { 352 key := ent.chunk[8+(i*pc.hashSize) : 8+((i+1)*pc.hashSize)] 353 newChunkData, err := pc.getter.Get(ctx, Reference(key)) 354 if err != nil { 355 return errLoadingTreeChunk 356 } 357 newChunkSize := newChunkData.Size() 358 bewBranchCount := int64(len(newChunkData)-8) / pc.hashSize 359 newEntry := &TreeEntry{ 360 level: lvl - 1, 361 branchCount: bewBranchCount, 362 subtreeSize: newChunkSize, 363 chunk: newChunkData, 364 key: key, 365 index: 0, 366 updatePending: true, 367 } 368 pc.chunkLevel[lvl-1] = append(pc.chunkLevel[lvl-1], newEntry) 369 370 } 371 372 // We need to get only the right most unfinished branch.. so trim all finished branches 373 if int64(len(pc.chunkLevel[lvl-1])) >= pc.branches { 374 pc.chunkLevel[lvl-1] = nil 375 } 376 } 377 } 378 379 return nil 380 } 381 382 func (pc *PyramidChunker) prepareChunks(ctx context.Context, isAppend bool) { 383 defer pc.wg.Done() 384 385 chunkWG := &sync.WaitGroup{} 386 387 pc.incrementWorkerCount() 388 389 go pc.processor(ctx, pc.workerCount) 390 391 parent := NewTreeEntry(pc) 392 var unfinishedChunkData ChunkData 393 var unfinishedChunkSize uint64 394 395 if isAppend && len(pc.chunkLevel[0]) != 0 { 396 lastIndex := len(pc.chunkLevel[0]) - 1 397 ent := pc.chunkLevel[0][lastIndex] 398 399 if ent.branchCount < pc.branches { 400 parent = &TreeEntry{ 401 level: 0, 402 branchCount: ent.branchCount, 403 subtreeSize: ent.subtreeSize, 404 chunk: ent.chunk, 405 key: ent.key, 406 index: lastIndex, 407 updatePending: true, 408 } 409 410 lastBranch := parent.branchCount - 1 411 lastAddress := parent.chunk[8+lastBranch*pc.hashSize : 8+(lastBranch+1)*pc.hashSize] 412 413 var err error 414 unfinishedChunkData, err = pc.getter.Get(ctx, lastAddress) 415 if err != nil { 416 pc.errC <- err 417 } 418 unfinishedChunkSize = unfinishedChunkData.Size() 419 if unfinishedChunkSize < uint64(pc.chunkSize) { 420 parent.subtreeSize = parent.subtreeSize - unfinishedChunkSize 421 parent.branchCount = parent.branchCount - 1 422 } else { 423 unfinishedChunkData = nil 424 } 425 } 426 } 427 428 for index := 0; ; index++ { 429 var err error 430 chunkData := make([]byte, pc.chunkSize+8) 431 432 var readBytes int 433 434 if unfinishedChunkData != nil { 435 copy(chunkData, unfinishedChunkData) 436 readBytes += int(unfinishedChunkSize) 437 unfinishedChunkData = nil 438 log.Trace("pyramid.chunker: found unfinished chunk", "readBytes", readBytes) 439 } 440 441 var res []byte 442 res, err = ioutil.ReadAll(io.LimitReader(pc.reader, int64(len(chunkData)-(8+readBytes)))) 443 444 // hack for ioutil.ReadAll: 445 // a successful call to ioutil.ReadAll returns err == nil, not err == EOF, whereas we 446 // want to propagate the io.EOF error 447 if len(res) == 0 && err == nil { 448 err = io.EOF 449 } 450 copy(chunkData[8+readBytes:], res) 451 452 readBytes += len(res) 453 log.Trace("pyramid.chunker: copied all data", "readBytes", readBytes) 454 455 if err != nil { 456 if err == io.EOF || err == io.ErrUnexpectedEOF { 457 458 pc.cleanChunkLevels() 459 460 // Check if we are appending or the chunk is the only one. 461 if parent.branchCount == 1 && (pc.depth() == 0 || isAppend) { 462 // Data is exactly one chunk.. pick the last chunk key as root 463 chunkWG.Wait() 464 lastChunksAddress := parent.chunk[8 : 8+pc.hashSize] 465 copy(pc.rootAddress, lastChunksAddress) 466 break 467 } 468 } else { 469 close(pc.quitC) 470 break 471 } 472 } 473 474 // Data ended in chunk boundary.. just signal to start bulding tree 475 if readBytes == 0 { 476 pc.buildTree(isAppend, parent, chunkWG, true, nil) 477 break 478 } else { 479 pkey := pc.enqueueDataChunk(chunkData, uint64(readBytes), parent, chunkWG) 480 481 // update tree related parent data structures 482 parent.subtreeSize += uint64(readBytes) 483 parent.branchCount++ 484 485 // Data got exhausted... signal to send any parent tree related chunks 486 if int64(readBytes) < pc.chunkSize { 487 488 pc.cleanChunkLevels() 489 490 // only one data chunk .. so dont add any parent chunk 491 if parent.branchCount <= 1 { 492 chunkWG.Wait() 493 494 if isAppend || pc.depth() == 0 { 495 // No need to build the tree if the depth is 0 496 // or we are appending. 497 // Just use the last key. 498 copy(pc.rootAddress, pkey) 499 } else { 500 // We need to build the tree and and provide the lonely 501 // chunk key to replace the last tree chunk key. 502 pc.buildTree(isAppend, parent, chunkWG, true, pkey) 503 } 504 break 505 } 506 507 pc.buildTree(isAppend, parent, chunkWG, true, nil) 508 break 509 } 510 511 if parent.branchCount == pc.branches { 512 pc.buildTree(isAppend, parent, chunkWG, false, nil) 513 parent = NewTreeEntry(pc) 514 } 515 516 } 517 518 workers := pc.getWorkerCount() 519 if int64(len(pc.jobC)) > workers && workers < ChunkProcessors { 520 pc.incrementWorkerCount() 521 go pc.processor(ctx, pc.workerCount) 522 } 523 524 } 525 526 } 527 528 func (pc *PyramidChunker) buildTree(isAppend bool, ent *TreeEntry, chunkWG *sync.WaitGroup, last bool, lonelyChunkKey []byte) { 529 chunkWG.Wait() 530 pc.enqueueTreeChunk(ent, chunkWG, last) 531 532 compress := false 533 endLvl := pc.branches 534 for lvl := int64(0); lvl < pc.branches; lvl++ { 535 lvlCount := int64(len(pc.chunkLevel[lvl])) 536 if lvlCount >= pc.branches { 537 endLvl = lvl + 1 538 compress = true 539 break 540 } 541 } 542 543 if !compress && !last { 544 return 545 } 546 547 // Wait for all the keys to be processed before compressing the tree 548 chunkWG.Wait() 549 550 for lvl := int64(ent.level); lvl < endLvl; lvl++ { 551 552 lvlCount := int64(len(pc.chunkLevel[lvl])) 553 if lvlCount == 1 && last { 554 copy(pc.rootAddress, pc.chunkLevel[lvl][0].key) 555 return 556 } 557 558 for startCount := int64(0); startCount < lvlCount; startCount += pc.branches { 559 560 endCount := startCount + pc.branches 561 if endCount > lvlCount { 562 endCount = lvlCount 563 } 564 565 var nextLvlCount int64 566 var tempEntry *TreeEntry 567 if len(pc.chunkLevel[lvl+1]) > 0 { 568 nextLvlCount = int64(len(pc.chunkLevel[lvl+1]) - 1) 569 tempEntry = pc.chunkLevel[lvl+1][nextLvlCount] 570 } 571 if isAppend && tempEntry != nil && tempEntry.updatePending { 572 updateEntry := &TreeEntry{ 573 level: int(lvl + 1), 574 branchCount: 0, 575 subtreeSize: 0, 576 chunk: make([]byte, pc.chunkSize+8), 577 key: make([]byte, pc.hashSize), 578 index: int(nextLvlCount), 579 updatePending: true, 580 } 581 for index := int64(0); index < lvlCount; index++ { 582 updateEntry.branchCount++ 583 updateEntry.subtreeSize += pc.chunkLevel[lvl][index].subtreeSize 584 copy(updateEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], pc.chunkLevel[lvl][index].key[:pc.hashSize]) 585 } 586 587 pc.enqueueTreeChunk(updateEntry, chunkWG, last) 588 589 } else { 590 591 noOfBranches := endCount - startCount 592 newEntry := &TreeEntry{ 593 level: int(lvl + 1), 594 branchCount: noOfBranches, 595 subtreeSize: 0, 596 chunk: make([]byte, (noOfBranches*pc.hashSize)+8), 597 key: make([]byte, pc.hashSize), 598 index: int(nextLvlCount), 599 updatePending: false, 600 } 601 602 index := int64(0) 603 for i := startCount; i < endCount; i++ { 604 entry := pc.chunkLevel[lvl][i] 605 newEntry.subtreeSize += entry.subtreeSize 606 copy(newEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], entry.key[:pc.hashSize]) 607 index++ 608 } 609 // Lonely chunk key is the key of the last chunk that is only one on the last branch. 610 // In this case, ignore the its tree chunk key and replace it with the lonely chunk key. 611 if lonelyChunkKey != nil { 612 // Overwrite the last tree chunk key with the lonely data chunk key. 613 copy(newEntry.chunk[int64(len(newEntry.chunk))-pc.hashSize:], lonelyChunkKey[:pc.hashSize]) 614 } 615 616 pc.enqueueTreeChunk(newEntry, chunkWG, last) 617 618 } 619 620 } 621 622 if !isAppend { 623 chunkWG.Wait() 624 if compress { 625 pc.chunkLevel[lvl] = nil 626 } 627 } 628 } 629 630 } 631 632 func (pc *PyramidChunker) enqueueTreeChunk(ent *TreeEntry, chunkWG *sync.WaitGroup, last bool) { 633 if ent != nil && ent.branchCount > 0 { 634 635 // wait for data chunks to get over before processing the tree chunk 636 if last { 637 chunkWG.Wait() 638 } 639 640 binary.LittleEndian.PutUint64(ent.chunk[:8], ent.subtreeSize) 641 ent.key = make([]byte, pc.hashSize) 642 chunkWG.Add(1) 643 select { 644 case pc.jobC <- &chunkJob{ent.key, ent.chunk[:ent.branchCount*pc.hashSize+8], chunkWG}: 645 case <-pc.quitC: 646 } 647 648 // Update or append based on weather it is a new entry or being reused 649 if ent.updatePending { 650 chunkWG.Wait() 651 pc.chunkLevel[ent.level][ent.index] = ent 652 } else { 653 pc.chunkLevel[ent.level] = append(pc.chunkLevel[ent.level], ent) 654 } 655 656 } 657 } 658 659 func (pc *PyramidChunker) enqueueDataChunk(chunkData []byte, size uint64, parent *TreeEntry, chunkWG *sync.WaitGroup) Address { 660 binary.LittleEndian.PutUint64(chunkData[:8], size) 661 pkey := parent.chunk[8+parent.branchCount*pc.hashSize : 8+(parent.branchCount+1)*pc.hashSize] 662 663 chunkWG.Add(1) 664 select { 665 case pc.jobC <- &chunkJob{pkey, chunkData[:size+8], chunkWG}: 666 case <-pc.quitC: 667 } 668 669 return pkey 670 671 } 672 673 // depth returns the number of chunk levels. 674 // It is used to detect if there is only one data chunk 675 // left for the last branch. 676 func (pc *PyramidChunker) depth() (d int) { 677 for _, l := range pc.chunkLevel { 678 if l == nil { 679 return 680 } 681 d++ 682 } 683 return 684 } 685 686 // cleanChunkLevels removes gaps (nil levels) between chunk levels 687 // that are not nil. 688 func (pc *PyramidChunker) cleanChunkLevels() { 689 for i, l := range pc.chunkLevel { 690 if l == nil { 691 pc.chunkLevel = append(pc.chunkLevel[:i], append(pc.chunkLevel[i+1:], nil)...) 692 } 693 } 694 }