github.com/FUSIONFoundation/efsn@v3.6.2-0.20200916075423-dbb5dd5d2cc7+incompatible/swarm/storage/pyramid.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package storage 18 19 import ( 20 "context" 21 "encoding/binary" 22 "errors" 23 "io" 24 "io/ioutil" 25 "sync" 26 "time" 27 28 ch "github.com/FusionFoundation/efsn/swarm/chunk" 29 "github.com/FusionFoundation/efsn/swarm/log" 30 ) 31 32 /* 33 The main idea of a pyramid chunker is to process the input data without knowing the entire size apriori. 34 For this to be achieved, the chunker tree is built from the ground up until the data is exhausted. 35 This opens up new aveneus such as easy append and other sort of modifications to the tree thereby avoiding 36 duplication of data chunks. 37 38 39 Below is an example of a two level chunks tree. The leaf chunks are called data chunks and all the above 40 chunks are called tree chunks. The tree chunk above data chunks is level 0 and so on until it reaches 41 the root tree chunk. 42 43 44 45 T10 <- Tree chunk lvl1 46 | 47 __________________________|_____________________________ 48 / | | \ 49 / | \ \ 50 __T00__ ___T01__ ___T02__ ___T03__ <- Tree chunks lvl 0 51 / / \ / / \ / / \ / / \ 52 / / \ / / \ / / \ / / \ 53 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 <- Data Chunks 54 55 56 The split function continuously read the data and creates data chunks and send them to storage. 57 When certain no of data chunks are created (defaultBranches), a signal is sent to create a tree 58 entry. When the level 0 tree entries reaches certain threshold (defaultBranches), another signal 59 is sent to a tree entry one level up.. and so on... until only the data is exhausted AND only one 60 tree entry is present in certain level. The key of tree entry is given out as the rootAddress of the file. 61 62 */ 63 64 var ( 65 errLoadingTreeRootChunk = errors.New("LoadTree Error: Could not load root chunk") 66 errLoadingTreeChunk = errors.New("LoadTree Error: Could not load chunk") 67 ) 68 69 const ( 70 ChunkProcessors = 8 71 splitTimeout = time.Minute * 5 72 ) 73 74 const ( 75 DataChunk = 0 76 TreeChunk = 1 77 ) 78 79 type PyramidSplitterParams struct { 80 SplitterParams 81 getter Getter 82 } 83 84 func NewPyramidSplitterParams(addr Address, reader io.Reader, putter Putter, getter Getter, chunkSize int64) *PyramidSplitterParams { 85 hashSize := putter.RefSize() 86 return &PyramidSplitterParams{ 87 SplitterParams: SplitterParams{ 88 ChunkerParams: ChunkerParams{ 89 chunkSize: chunkSize, 90 hashSize: hashSize, 91 }, 92 reader: reader, 93 putter: putter, 94 addr: addr, 95 }, 96 getter: getter, 97 } 98 } 99 100 /* 101 When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Address), the root hash of the entire content will fill this once processing finishes. 102 New chunks to store are store using the putter which the caller provides. 103 */ 104 func PyramidSplit(ctx context.Context, reader io.Reader, putter Putter, getter Getter) (Address, func(context.Context) error, error) { 105 return NewPyramidSplitter(NewPyramidSplitterParams(nil, reader, putter, getter, ch.DefaultSize)).Split(ctx) 106 } 107 108 func PyramidAppend(ctx context.Context, addr Address, reader io.Reader, putter Putter, getter Getter) (Address, func(context.Context) error, error) { 109 return NewPyramidSplitter(NewPyramidSplitterParams(addr, reader, putter, getter, ch.DefaultSize)).Append(ctx) 110 } 111 112 // Entry to create a tree node 113 type TreeEntry struct { 114 level int 115 branchCount int64 116 subtreeSize uint64 117 chunk []byte 118 key []byte 119 index int // used in append to indicate the index of existing tree entry 120 updatePending bool // indicates if the entry is loaded from existing tree 121 } 122 123 func NewTreeEntry(pyramid *PyramidChunker) *TreeEntry { 124 return &TreeEntry{ 125 level: 0, 126 branchCount: 0, 127 subtreeSize: 0, 128 chunk: make([]byte, pyramid.chunkSize+8), 129 key: make([]byte, pyramid.hashSize), 130 index: 0, 131 updatePending: false, 132 } 133 } 134 135 // Used by the hash processor to create a data/tree chunk and send to storage 136 type chunkJob struct { 137 key Address 138 chunk []byte 139 parentWg *sync.WaitGroup 140 } 141 142 type PyramidChunker struct { 143 chunkSize int64 144 hashSize int64 145 branches int64 146 reader io.Reader 147 putter Putter 148 getter Getter 149 key Address 150 workerCount int64 151 workerLock sync.RWMutex 152 jobC chan *chunkJob 153 wg *sync.WaitGroup 154 errC chan error 155 quitC chan bool 156 rootAddress []byte 157 chunkLevel [][]*TreeEntry 158 } 159 160 func NewPyramidSplitter(params *PyramidSplitterParams) (pc *PyramidChunker) { 161 pc = &PyramidChunker{} 162 pc.reader = params.reader 163 pc.hashSize = params.hashSize 164 pc.branches = params.chunkSize / pc.hashSize 165 pc.chunkSize = pc.hashSize * pc.branches 166 pc.putter = params.putter 167 pc.getter = params.getter 168 pc.key = params.addr 169 pc.workerCount = 0 170 pc.jobC = make(chan *chunkJob, 2*ChunkProcessors) 171 pc.wg = &sync.WaitGroup{} 172 pc.errC = make(chan error) 173 pc.quitC = make(chan bool) 174 pc.rootAddress = make([]byte, pc.hashSize) 175 pc.chunkLevel = make([][]*TreeEntry, pc.branches) 176 return 177 } 178 179 func (pc *PyramidChunker) Join(addr Address, getter Getter, depth int) LazySectionReader { 180 return &LazyChunkReader{ 181 addr: addr, 182 depth: depth, 183 chunkSize: pc.chunkSize, 184 branches: pc.branches, 185 hashSize: pc.hashSize, 186 getter: getter, 187 } 188 } 189 190 func (pc *PyramidChunker) incrementWorkerCount() { 191 pc.workerLock.Lock() 192 defer pc.workerLock.Unlock() 193 pc.workerCount += 1 194 } 195 196 func (pc *PyramidChunker) getWorkerCount() int64 { 197 pc.workerLock.Lock() 198 defer pc.workerLock.Unlock() 199 return pc.workerCount 200 } 201 202 func (pc *PyramidChunker) decrementWorkerCount() { 203 pc.workerLock.Lock() 204 defer pc.workerLock.Unlock() 205 pc.workerCount -= 1 206 } 207 208 func (pc *PyramidChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 209 log.Debug("pyramid.chunker: Split()") 210 211 pc.wg.Add(1) 212 pc.prepareChunks(ctx, false) 213 214 // closes internal error channel if all subprocesses in the workgroup finished 215 go func() { 216 217 // waiting for all chunks to finish 218 pc.wg.Wait() 219 220 //We close errC here because this is passed down to 8 parallel routines underneath. 221 // if a error happens in one of them.. that particular routine raises error... 222 // once they all complete successfully, the control comes back and we can safely close this here. 223 close(pc.errC) 224 }() 225 226 defer close(pc.quitC) 227 defer pc.putter.Close() 228 229 select { 230 case err := <-pc.errC: 231 if err != nil { 232 return nil, nil, err 233 } 234 case <-ctx.Done(): 235 _ = pc.putter.Wait(ctx) //??? 236 return nil, nil, ctx.Err() 237 } 238 return pc.rootAddress, pc.putter.Wait, nil 239 240 } 241 242 func (pc *PyramidChunker) Append(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 243 log.Debug("pyramid.chunker: Append()") 244 // Load the right most unfinished tree chunks in every level 245 pc.loadTree(ctx) 246 247 pc.wg.Add(1) 248 pc.prepareChunks(ctx, true) 249 250 // closes internal error channel if all subprocesses in the workgroup finished 251 go func() { 252 253 // waiting for all chunks to finish 254 pc.wg.Wait() 255 256 close(pc.errC) 257 }() 258 259 defer close(pc.quitC) 260 defer pc.putter.Close() 261 262 select { 263 case err := <-pc.errC: 264 if err != nil { 265 return nil, nil, err 266 } 267 case <-time.NewTimer(splitTimeout).C: 268 } 269 270 return pc.rootAddress, pc.putter.Wait, nil 271 272 } 273 274 func (pc *PyramidChunker) processor(ctx context.Context, id int64) { 275 defer pc.decrementWorkerCount() 276 for { 277 select { 278 279 case job, ok := <-pc.jobC: 280 if !ok { 281 return 282 } 283 pc.processChunk(ctx, id, job) 284 case <-pc.quitC: 285 return 286 } 287 } 288 } 289 290 func (pc *PyramidChunker) processChunk(ctx context.Context, id int64, job *chunkJob) { 291 log.Debug("pyramid.chunker: processChunk()", "id", id) 292 293 ref, err := pc.putter.Put(ctx, job.chunk) 294 if err != nil { 295 select { 296 case pc.errC <- err: 297 case <-pc.quitC: 298 } 299 } 300 301 // report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk) 302 copy(job.key, ref) 303 304 // send off new chunk to storage 305 job.parentWg.Done() 306 } 307 308 func (pc *PyramidChunker) loadTree(ctx context.Context) error { 309 log.Debug("pyramid.chunker: loadTree()") 310 // Get the root chunk to get the total size 311 chunkData, err := pc.getter.Get(ctx, Reference(pc.key)) 312 if err != nil { 313 return errLoadingTreeRootChunk 314 } 315 chunkSize := int64(chunkData.Size()) 316 log.Trace("pyramid.chunker: root chunk", "chunk.Size", chunkSize, "pc.chunkSize", pc.chunkSize) 317 318 //if data size is less than a chunk... add a parent with update as pending 319 if chunkSize <= pc.chunkSize { 320 newEntry := &TreeEntry{ 321 level: 0, 322 branchCount: 1, 323 subtreeSize: uint64(chunkSize), 324 chunk: make([]byte, pc.chunkSize+8), 325 key: make([]byte, pc.hashSize), 326 index: 0, 327 updatePending: true, 328 } 329 copy(newEntry.chunk[8:], pc.key) 330 pc.chunkLevel[0] = append(pc.chunkLevel[0], newEntry) 331 return nil 332 } 333 334 var treeSize int64 335 var depth int 336 treeSize = pc.chunkSize 337 for ; treeSize < chunkSize; treeSize *= pc.branches { 338 depth++ 339 } 340 log.Trace("pyramid.chunker", "depth", depth) 341 342 // Add the root chunk entry 343 branchCount := int64(len(chunkData)-8) / pc.hashSize 344 newEntry := &TreeEntry{ 345 level: depth - 1, 346 branchCount: branchCount, 347 subtreeSize: uint64(chunkSize), 348 chunk: chunkData, 349 key: pc.key, 350 index: 0, 351 updatePending: true, 352 } 353 pc.chunkLevel[depth-1] = append(pc.chunkLevel[depth-1], newEntry) 354 355 // Add the rest of the tree 356 for lvl := depth - 1; lvl >= 1; lvl-- { 357 358 //TODO(jmozah): instead of loading finished branches and then trim in the end, 359 //avoid loading them in the first place 360 for _, ent := range pc.chunkLevel[lvl] { 361 branchCount = int64(len(ent.chunk)-8) / pc.hashSize 362 for i := int64(0); i < branchCount; i++ { 363 key := ent.chunk[8+(i*pc.hashSize) : 8+((i+1)*pc.hashSize)] 364 newChunkData, err := pc.getter.Get(ctx, Reference(key)) 365 if err != nil { 366 return errLoadingTreeChunk 367 } 368 newChunkSize := newChunkData.Size() 369 bewBranchCount := int64(len(newChunkData)-8) / pc.hashSize 370 newEntry := &TreeEntry{ 371 level: lvl - 1, 372 branchCount: bewBranchCount, 373 subtreeSize: newChunkSize, 374 chunk: newChunkData, 375 key: key, 376 index: 0, 377 updatePending: true, 378 } 379 pc.chunkLevel[lvl-1] = append(pc.chunkLevel[lvl-1], newEntry) 380 381 } 382 383 // We need to get only the right most unfinished branch.. so trim all finished branches 384 if int64(len(pc.chunkLevel[lvl-1])) >= pc.branches { 385 pc.chunkLevel[lvl-1] = nil 386 } 387 } 388 } 389 390 return nil 391 } 392 393 func (pc *PyramidChunker) prepareChunks(ctx context.Context, isAppend bool) { 394 log.Debug("pyramid.chunker: prepareChunks", "isAppend", isAppend) 395 defer pc.wg.Done() 396 397 chunkWG := &sync.WaitGroup{} 398 399 pc.incrementWorkerCount() 400 401 go pc.processor(ctx, pc.workerCount) 402 403 parent := NewTreeEntry(pc) 404 var unfinishedChunkData ChunkData 405 var unfinishedChunkSize uint64 406 407 if isAppend && len(pc.chunkLevel[0]) != 0 { 408 lastIndex := len(pc.chunkLevel[0]) - 1 409 ent := pc.chunkLevel[0][lastIndex] 410 411 if ent.branchCount < pc.branches { 412 parent = &TreeEntry{ 413 level: 0, 414 branchCount: ent.branchCount, 415 subtreeSize: ent.subtreeSize, 416 chunk: ent.chunk, 417 key: ent.key, 418 index: lastIndex, 419 updatePending: true, 420 } 421 422 lastBranch := parent.branchCount - 1 423 lastAddress := parent.chunk[8+lastBranch*pc.hashSize : 8+(lastBranch+1)*pc.hashSize] 424 425 var err error 426 unfinishedChunkData, err = pc.getter.Get(ctx, lastAddress) 427 if err != nil { 428 pc.errC <- err 429 } 430 unfinishedChunkSize = unfinishedChunkData.Size() 431 if unfinishedChunkSize < uint64(pc.chunkSize) { 432 parent.subtreeSize = parent.subtreeSize - unfinishedChunkSize 433 parent.branchCount = parent.branchCount - 1 434 } else { 435 unfinishedChunkData = nil 436 } 437 } 438 } 439 440 for index := 0; ; index++ { 441 var err error 442 chunkData := make([]byte, pc.chunkSize+8) 443 444 var readBytes int 445 446 if unfinishedChunkData != nil { 447 copy(chunkData, unfinishedChunkData) 448 readBytes += int(unfinishedChunkSize) 449 unfinishedChunkData = nil 450 log.Trace("pyramid.chunker: found unfinished chunk", "readBytes", readBytes) 451 } 452 453 var res []byte 454 res, err = ioutil.ReadAll(io.LimitReader(pc.reader, int64(len(chunkData)-(8+readBytes)))) 455 456 // hack for ioutil.ReadAll: 457 // a successful call to ioutil.ReadAll returns err == nil, not err == EOF, whereas we 458 // want to propagate the io.EOF error 459 if len(res) == 0 && err == nil { 460 err = io.EOF 461 } 462 copy(chunkData[8+readBytes:], res) 463 464 readBytes += len(res) 465 log.Trace("pyramid.chunker: copied all data", "readBytes", readBytes) 466 467 if err != nil { 468 if err == io.EOF || err == io.ErrUnexpectedEOF { 469 470 pc.cleanChunkLevels() 471 472 // Check if we are appending or the chunk is the only one. 473 if parent.branchCount == 1 && (pc.depth() == 0 || isAppend) { 474 // Data is exactly one chunk.. pick the last chunk key as root 475 chunkWG.Wait() 476 lastChunksAddress := parent.chunk[8 : 8+pc.hashSize] 477 copy(pc.rootAddress, lastChunksAddress) 478 break 479 } 480 } else { 481 close(pc.quitC) 482 break 483 } 484 } 485 486 // Data ended in chunk boundary.. just signal to start bulding tree 487 if readBytes == 0 { 488 pc.buildTree(isAppend, parent, chunkWG, true, nil) 489 break 490 } else { 491 pkey := pc.enqueueDataChunk(chunkData, uint64(readBytes), parent, chunkWG) 492 493 // update tree related parent data structures 494 parent.subtreeSize += uint64(readBytes) 495 parent.branchCount++ 496 497 // Data got exhausted... signal to send any parent tree related chunks 498 if int64(readBytes) < pc.chunkSize { 499 500 pc.cleanChunkLevels() 501 502 // only one data chunk .. so dont add any parent chunk 503 if parent.branchCount <= 1 { 504 chunkWG.Wait() 505 506 if isAppend || pc.depth() == 0 { 507 // No need to build the tree if the depth is 0 508 // or we are appending. 509 // Just use the last key. 510 copy(pc.rootAddress, pkey) 511 } else { 512 // We need to build the tree and and provide the lonely 513 // chunk key to replace the last tree chunk key. 514 pc.buildTree(isAppend, parent, chunkWG, true, pkey) 515 } 516 break 517 } 518 519 pc.buildTree(isAppend, parent, chunkWG, true, nil) 520 break 521 } 522 523 if parent.branchCount == pc.branches { 524 pc.buildTree(isAppend, parent, chunkWG, false, nil) 525 parent = NewTreeEntry(pc) 526 } 527 528 } 529 530 workers := pc.getWorkerCount() 531 if int64(len(pc.jobC)) > workers && workers < ChunkProcessors { 532 pc.incrementWorkerCount() 533 go pc.processor(ctx, pc.workerCount) 534 } 535 536 } 537 538 } 539 540 func (pc *PyramidChunker) buildTree(isAppend bool, ent *TreeEntry, chunkWG *sync.WaitGroup, last bool, lonelyChunkKey []byte) { 541 chunkWG.Wait() 542 pc.enqueueTreeChunk(ent, chunkWG, last) 543 544 compress := false 545 endLvl := pc.branches 546 for lvl := int64(0); lvl < pc.branches; lvl++ { 547 lvlCount := int64(len(pc.chunkLevel[lvl])) 548 if lvlCount >= pc.branches { 549 endLvl = lvl + 1 550 compress = true 551 break 552 } 553 } 554 555 if !compress && !last { 556 return 557 } 558 559 // Wait for all the keys to be processed before compressing the tree 560 chunkWG.Wait() 561 562 for lvl := int64(ent.level); lvl < endLvl; lvl++ { 563 564 lvlCount := int64(len(pc.chunkLevel[lvl])) 565 if lvlCount == 1 && last { 566 copy(pc.rootAddress, pc.chunkLevel[lvl][0].key) 567 return 568 } 569 570 for startCount := int64(0); startCount < lvlCount; startCount += pc.branches { 571 572 endCount := startCount + pc.branches 573 if endCount > lvlCount { 574 endCount = lvlCount 575 } 576 577 var nextLvlCount int64 578 var tempEntry *TreeEntry 579 if len(pc.chunkLevel[lvl+1]) > 0 { 580 nextLvlCount = int64(len(pc.chunkLevel[lvl+1]) - 1) 581 tempEntry = pc.chunkLevel[lvl+1][nextLvlCount] 582 } 583 if isAppend && tempEntry != nil && tempEntry.updatePending { 584 updateEntry := &TreeEntry{ 585 level: int(lvl + 1), 586 branchCount: 0, 587 subtreeSize: 0, 588 chunk: make([]byte, pc.chunkSize+8), 589 key: make([]byte, pc.hashSize), 590 index: int(nextLvlCount), 591 updatePending: true, 592 } 593 for index := int64(0); index < lvlCount; index++ { 594 updateEntry.branchCount++ 595 updateEntry.subtreeSize += pc.chunkLevel[lvl][index].subtreeSize 596 copy(updateEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], pc.chunkLevel[lvl][index].key[:pc.hashSize]) 597 } 598 599 pc.enqueueTreeChunk(updateEntry, chunkWG, last) 600 601 } else { 602 603 noOfBranches := endCount - startCount 604 newEntry := &TreeEntry{ 605 level: int(lvl + 1), 606 branchCount: noOfBranches, 607 subtreeSize: 0, 608 chunk: make([]byte, (noOfBranches*pc.hashSize)+8), 609 key: make([]byte, pc.hashSize), 610 index: int(nextLvlCount), 611 updatePending: false, 612 } 613 614 index := int64(0) 615 for i := startCount; i < endCount; i++ { 616 entry := pc.chunkLevel[lvl][i] 617 newEntry.subtreeSize += entry.subtreeSize 618 copy(newEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], entry.key[:pc.hashSize]) 619 index++ 620 } 621 // Lonely chunk key is the key of the last chunk that is only one on the last branch. 622 // In this case, ignore the its tree chunk key and replace it with the lonely chunk key. 623 if lonelyChunkKey != nil { 624 // Overwrite the last tree chunk key with the lonely data chunk key. 625 copy(newEntry.chunk[int64(len(newEntry.chunk))-pc.hashSize:], lonelyChunkKey[:pc.hashSize]) 626 } 627 628 pc.enqueueTreeChunk(newEntry, chunkWG, last) 629 630 } 631 632 } 633 634 if !isAppend { 635 chunkWG.Wait() 636 if compress { 637 pc.chunkLevel[lvl] = nil 638 } 639 } 640 } 641 642 } 643 644 func (pc *PyramidChunker) enqueueTreeChunk(ent *TreeEntry, chunkWG *sync.WaitGroup, last bool) { 645 if ent != nil && ent.branchCount > 0 { 646 647 // wait for data chunks to get over before processing the tree chunk 648 if last { 649 chunkWG.Wait() 650 } 651 652 binary.LittleEndian.PutUint64(ent.chunk[:8], ent.subtreeSize) 653 ent.key = make([]byte, pc.hashSize) 654 chunkWG.Add(1) 655 select { 656 case pc.jobC <- &chunkJob{ent.key, ent.chunk[:ent.branchCount*pc.hashSize+8], chunkWG}: 657 case <-pc.quitC: 658 } 659 660 // Update or append based on weather it is a new entry or being reused 661 if ent.updatePending { 662 chunkWG.Wait() 663 pc.chunkLevel[ent.level][ent.index] = ent 664 } else { 665 pc.chunkLevel[ent.level] = append(pc.chunkLevel[ent.level], ent) 666 } 667 668 } 669 } 670 671 func (pc *PyramidChunker) enqueueDataChunk(chunkData []byte, size uint64, parent *TreeEntry, chunkWG *sync.WaitGroup) Address { 672 binary.LittleEndian.PutUint64(chunkData[:8], size) 673 pkey := parent.chunk[8+parent.branchCount*pc.hashSize : 8+(parent.branchCount+1)*pc.hashSize] 674 675 chunkWG.Add(1) 676 select { 677 case pc.jobC <- &chunkJob{pkey, chunkData[:size+8], chunkWG}: 678 case <-pc.quitC: 679 } 680 681 return pkey 682 683 } 684 685 // depth returns the number of chunk levels. 686 // It is used to detect if there is only one data chunk 687 // left for the last branch. 688 func (pc *PyramidChunker) depth() (d int) { 689 for _, l := range pc.chunkLevel { 690 if l == nil { 691 return 692 } 693 d++ 694 } 695 return 696 } 697 698 // cleanChunkLevels removes gaps (nil levels) between chunk levels 699 // that are not nil. 700 func (pc *PyramidChunker) cleanChunkLevels() { 701 for i, l := range pc.chunkLevel { 702 if l == nil { 703 pc.chunkLevel = append(pc.chunkLevel[:i], append(pc.chunkLevel[i+1:], nil)...) 704 } 705 } 706 }