github.com/shyftnetwork/go-empyrean@v1.8.3-0.20191127201940-fbfca9338f04/swarm/storage/pyramid.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package storage 18 19 import ( 20 "context" 21 "encoding/binary" 22 "errors" 23 "io" 24 "io/ioutil" 25 "sync" 26 "time" 27 28 ch "github.com/ShyftNetwork/go-empyrean/swarm/chunk" 29 "github.com/ShyftNetwork/go-empyrean/swarm/log" 30 ) 31 32 /* 33 The main idea of a pyramid chunker is to process the input data without knowing the entire size apriori. 34 For this to be achieved, the chunker tree is built from the ground up until the data is exhausted. 35 This opens up new aveneus such as easy append and other sort of modifications to the tree thereby avoiding 36 duplication of data chunks. 37 38 39 Below is an example of a two level chunks tree. The leaf chunks are called data chunks and all the above 40 chunks are called tree chunks. The tree chunk above data chunks is level 0 and so on until it reaches 41 the root tree chunk. 42 43 44 45 T10 <- Tree chunk lvl1 46 | 47 __________________________|_____________________________ 48 / | | \ 49 / | \ \ 50 __T00__ ___T01__ ___T02__ ___T03__ <- Tree chunks lvl 0 51 / / \ / / \ / / \ / / \ 52 / / \ / / \ / / \ / / \ 53 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 <- Data Chunks 54 55 56 The split function continuously read the data and creates data chunks and send them to storage. 57 When certain no of data chunks are created (defaultBranches), a signal is sent to create a tree 58 entry. When the level 0 tree entries reaches certain threshold (defaultBranches), another signal 59 is sent to a tree entry one level up.. and so on... until only the data is exhausted AND only one 60 tree entry is present in certain level. The key of tree entry is given out as the rootAddress of the file. 61 62 */ 63 64 var ( 65 errLoadingTreeRootChunk = errors.New("LoadTree Error: Could not load root chunk") 66 errLoadingTreeChunk = errors.New("LoadTree Error: Could not load chunk") 67 ) 68 69 const ( 70 ChunkProcessors = 8 71 splitTimeout = time.Minute * 5 72 ) 73 74 type PyramidSplitterParams struct { 75 SplitterParams 76 getter Getter 77 } 78 79 func NewPyramidSplitterParams(addr Address, reader io.Reader, putter Putter, getter Getter, chunkSize int64) *PyramidSplitterParams { 80 hashSize := putter.RefSize() 81 return &PyramidSplitterParams{ 82 SplitterParams: SplitterParams{ 83 ChunkerParams: ChunkerParams{ 84 chunkSize: chunkSize, 85 hashSize: hashSize, 86 }, 87 reader: reader, 88 putter: putter, 89 addr: addr, 90 }, 91 getter: getter, 92 } 93 } 94 95 /* 96 When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Address), the root hash of the entire content will fill this once processing finishes. 97 New chunks to store are store using the putter which the caller provides. 98 */ 99 func PyramidSplit(ctx context.Context, reader io.Reader, putter Putter, getter Getter) (Address, func(context.Context) error, error) { 100 return NewPyramidSplitter(NewPyramidSplitterParams(nil, reader, putter, getter, ch.DefaultSize)).Split(ctx) 101 } 102 103 func PyramidAppend(ctx context.Context, addr Address, reader io.Reader, putter Putter, getter Getter) (Address, func(context.Context) error, error) { 104 return NewPyramidSplitter(NewPyramidSplitterParams(addr, reader, putter, getter, ch.DefaultSize)).Append(ctx) 105 } 106 107 // Entry to create a tree node 108 type TreeEntry struct { 109 level int 110 branchCount int64 111 subtreeSize uint64 112 chunk []byte 113 key []byte 114 index int // used in append to indicate the index of existing tree entry 115 updatePending bool // indicates if the entry is loaded from existing tree 116 } 117 118 func NewTreeEntry(pyramid *PyramidChunker) *TreeEntry { 119 return &TreeEntry{ 120 level: 0, 121 branchCount: 0, 122 subtreeSize: 0, 123 chunk: make([]byte, pyramid.chunkSize+8), 124 key: make([]byte, pyramid.hashSize), 125 index: 0, 126 updatePending: false, 127 } 128 } 129 130 // Used by the hash processor to create a data/tree chunk and send to storage 131 type chunkJob struct { 132 key Address 133 chunk []byte 134 parentWg *sync.WaitGroup 135 } 136 137 type PyramidChunker struct { 138 chunkSize int64 139 hashSize int64 140 branches int64 141 reader io.Reader 142 putter Putter 143 getter Getter 144 key Address 145 workerCount int64 146 workerLock sync.RWMutex 147 jobC chan *chunkJob 148 wg *sync.WaitGroup 149 errC chan error 150 quitC chan bool 151 rootAddress []byte 152 chunkLevel [][]*TreeEntry 153 } 154 155 func NewPyramidSplitter(params *PyramidSplitterParams) (pc *PyramidChunker) { 156 pc = &PyramidChunker{} 157 pc.reader = params.reader 158 pc.hashSize = params.hashSize 159 pc.branches = params.chunkSize / pc.hashSize 160 pc.chunkSize = pc.hashSize * pc.branches 161 pc.putter = params.putter 162 pc.getter = params.getter 163 pc.key = params.addr 164 pc.workerCount = 0 165 pc.jobC = make(chan *chunkJob, 2*ChunkProcessors) 166 pc.wg = &sync.WaitGroup{} 167 pc.errC = make(chan error) 168 pc.quitC = make(chan bool) 169 pc.rootAddress = make([]byte, pc.hashSize) 170 pc.chunkLevel = make([][]*TreeEntry, pc.branches) 171 return 172 } 173 174 func (pc *PyramidChunker) Join(addr Address, getter Getter, depth int) LazySectionReader { 175 return &LazyChunkReader{ 176 addr: addr, 177 depth: depth, 178 chunkSize: pc.chunkSize, 179 branches: pc.branches, 180 hashSize: pc.hashSize, 181 getter: getter, 182 } 183 } 184 185 func (pc *PyramidChunker) incrementWorkerCount() { 186 pc.workerLock.Lock() 187 defer pc.workerLock.Unlock() 188 pc.workerCount += 1 189 } 190 191 func (pc *PyramidChunker) getWorkerCount() int64 { 192 pc.workerLock.Lock() 193 defer pc.workerLock.Unlock() 194 return pc.workerCount 195 } 196 197 func (pc *PyramidChunker) decrementWorkerCount() { 198 pc.workerLock.Lock() 199 defer pc.workerLock.Unlock() 200 pc.workerCount -= 1 201 } 202 203 func (pc *PyramidChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 204 log.Debug("pyramid.chunker: Split()") 205 206 pc.wg.Add(1) 207 pc.prepareChunks(ctx, false) 208 209 // closes internal error channel if all subprocesses in the workgroup finished 210 go func() { 211 212 // waiting for all chunks to finish 213 pc.wg.Wait() 214 215 //We close errC here because this is passed down to 8 parallel routines underneath. 216 // if a error happens in one of them.. that particular routine raises error... 217 // once they all complete successfully, the control comes back and we can safely close this here. 218 close(pc.errC) 219 }() 220 221 defer close(pc.quitC) 222 defer pc.putter.Close() 223 224 select { 225 case err := <-pc.errC: 226 if err != nil { 227 return nil, nil, err 228 } 229 case <-ctx.Done(): 230 _ = pc.putter.Wait(ctx) //??? 231 return nil, nil, ctx.Err() 232 } 233 return pc.rootAddress, pc.putter.Wait, nil 234 235 } 236 237 func (pc *PyramidChunker) Append(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 238 log.Debug("pyramid.chunker: Append()") 239 // Load the right most unfinished tree chunks in every level 240 pc.loadTree(ctx) 241 242 pc.wg.Add(1) 243 pc.prepareChunks(ctx, true) 244 245 // closes internal error channel if all subprocesses in the workgroup finished 246 go func() { 247 248 // waiting for all chunks to finish 249 pc.wg.Wait() 250 251 close(pc.errC) 252 }() 253 254 defer close(pc.quitC) 255 defer pc.putter.Close() 256 257 select { 258 case err := <-pc.errC: 259 if err != nil { 260 return nil, nil, err 261 } 262 case <-time.NewTimer(splitTimeout).C: 263 } 264 265 return pc.rootAddress, pc.putter.Wait, nil 266 267 } 268 269 func (pc *PyramidChunker) processor(ctx context.Context, id int64) { 270 defer pc.decrementWorkerCount() 271 for { 272 select { 273 274 case job, ok := <-pc.jobC: 275 if !ok { 276 return 277 } 278 pc.processChunk(ctx, id, job) 279 case <-pc.quitC: 280 return 281 } 282 } 283 } 284 285 func (pc *PyramidChunker) processChunk(ctx context.Context, id int64, job *chunkJob) { 286 log.Debug("pyramid.chunker: processChunk()", "id", id) 287 288 ref, err := pc.putter.Put(ctx, job.chunk) 289 if err != nil { 290 select { 291 case pc.errC <- err: 292 case <-pc.quitC: 293 } 294 } 295 296 // report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk) 297 copy(job.key, ref) 298 299 // send off new chunk to storage 300 job.parentWg.Done() 301 } 302 303 func (pc *PyramidChunker) loadTree(ctx context.Context) error { 304 log.Debug("pyramid.chunker: loadTree()") 305 // Get the root chunk to get the total size 306 chunkData, err := pc.getter.Get(ctx, Reference(pc.key)) 307 if err != nil { 308 return errLoadingTreeRootChunk 309 } 310 chunkSize := int64(chunkData.Size()) 311 log.Trace("pyramid.chunker: root chunk", "chunk.Size", chunkSize, "pc.chunkSize", pc.chunkSize) 312 313 //if data size is less than a chunk... add a parent with update as pending 314 if chunkSize <= pc.chunkSize { 315 newEntry := &TreeEntry{ 316 level: 0, 317 branchCount: 1, 318 subtreeSize: uint64(chunkSize), 319 chunk: make([]byte, pc.chunkSize+8), 320 key: make([]byte, pc.hashSize), 321 index: 0, 322 updatePending: true, 323 } 324 copy(newEntry.chunk[8:], pc.key) 325 pc.chunkLevel[0] = append(pc.chunkLevel[0], newEntry) 326 return nil 327 } 328 329 var treeSize int64 330 var depth int 331 treeSize = pc.chunkSize 332 for ; treeSize < chunkSize; treeSize *= pc.branches { 333 depth++ 334 } 335 log.Trace("pyramid.chunker", "depth", depth) 336 337 // Add the root chunk entry 338 branchCount := int64(len(chunkData)-8) / pc.hashSize 339 newEntry := &TreeEntry{ 340 level: depth - 1, 341 branchCount: branchCount, 342 subtreeSize: uint64(chunkSize), 343 chunk: chunkData, 344 key: pc.key, 345 index: 0, 346 updatePending: true, 347 } 348 pc.chunkLevel[depth-1] = append(pc.chunkLevel[depth-1], newEntry) 349 350 // Add the rest of the tree 351 for lvl := depth - 1; lvl >= 1; lvl-- { 352 353 //TODO(jmozah): instead of loading finished branches and then trim in the end, 354 //avoid loading them in the first place 355 for _, ent := range pc.chunkLevel[lvl] { 356 branchCount = int64(len(ent.chunk)-8) / pc.hashSize 357 for i := int64(0); i < branchCount; i++ { 358 key := ent.chunk[8+(i*pc.hashSize) : 8+((i+1)*pc.hashSize)] 359 newChunkData, err := pc.getter.Get(ctx, Reference(key)) 360 if err != nil { 361 return errLoadingTreeChunk 362 } 363 newChunkSize := newChunkData.Size() 364 bewBranchCount := int64(len(newChunkData)-8) / pc.hashSize 365 newEntry := &TreeEntry{ 366 level: lvl - 1, 367 branchCount: bewBranchCount, 368 subtreeSize: newChunkSize, 369 chunk: newChunkData, 370 key: key, 371 index: 0, 372 updatePending: true, 373 } 374 pc.chunkLevel[lvl-1] = append(pc.chunkLevel[lvl-1], newEntry) 375 376 } 377 378 // We need to get only the right most unfinished branch.. so trim all finished branches 379 if int64(len(pc.chunkLevel[lvl-1])) >= pc.branches { 380 pc.chunkLevel[lvl-1] = nil 381 } 382 } 383 } 384 385 return nil 386 } 387 388 func (pc *PyramidChunker) prepareChunks(ctx context.Context, isAppend bool) { 389 log.Debug("pyramid.chunker: prepareChunks", "isAppend", isAppend) 390 defer pc.wg.Done() 391 392 chunkWG := &sync.WaitGroup{} 393 394 pc.incrementWorkerCount() 395 396 go pc.processor(ctx, pc.workerCount) 397 398 parent := NewTreeEntry(pc) 399 var unfinishedChunkData ChunkData 400 var unfinishedChunkSize uint64 401 402 if isAppend && len(pc.chunkLevel[0]) != 0 { 403 lastIndex := len(pc.chunkLevel[0]) - 1 404 ent := pc.chunkLevel[0][lastIndex] 405 406 if ent.branchCount < pc.branches { 407 parent = &TreeEntry{ 408 level: 0, 409 branchCount: ent.branchCount, 410 subtreeSize: ent.subtreeSize, 411 chunk: ent.chunk, 412 key: ent.key, 413 index: lastIndex, 414 updatePending: true, 415 } 416 417 lastBranch := parent.branchCount - 1 418 lastAddress := parent.chunk[8+lastBranch*pc.hashSize : 8+(lastBranch+1)*pc.hashSize] 419 420 var err error 421 unfinishedChunkData, err = pc.getter.Get(ctx, lastAddress) 422 if err != nil { 423 pc.errC <- err 424 } 425 unfinishedChunkSize = unfinishedChunkData.Size() 426 if unfinishedChunkSize < uint64(pc.chunkSize) { 427 parent.subtreeSize = parent.subtreeSize - unfinishedChunkSize 428 parent.branchCount = parent.branchCount - 1 429 } else { 430 unfinishedChunkData = nil 431 } 432 } 433 } 434 435 for index := 0; ; index++ { 436 var err error 437 chunkData := make([]byte, pc.chunkSize+8) 438 439 var readBytes int 440 441 if unfinishedChunkData != nil { 442 copy(chunkData, unfinishedChunkData) 443 readBytes += int(unfinishedChunkSize) 444 unfinishedChunkData = nil 445 log.Trace("pyramid.chunker: found unfinished chunk", "readBytes", readBytes) 446 } 447 448 var res []byte 449 res, err = ioutil.ReadAll(io.LimitReader(pc.reader, int64(len(chunkData)-(8+readBytes)))) 450 451 // hack for ioutil.ReadAll: 452 // a successful call to ioutil.ReadAll returns err == nil, not err == EOF, whereas we 453 // want to propagate the io.EOF error 454 if len(res) == 0 && err == nil { 455 err = io.EOF 456 } 457 copy(chunkData[8+readBytes:], res) 458 459 readBytes += len(res) 460 log.Trace("pyramid.chunker: copied all data", "readBytes", readBytes) 461 462 if err != nil { 463 if err == io.EOF || err == io.ErrUnexpectedEOF { 464 465 pc.cleanChunkLevels() 466 467 // Check if we are appending or the chunk is the only one. 468 if parent.branchCount == 1 && (pc.depth() == 0 || isAppend) { 469 // Data is exactly one chunk.. pick the last chunk key as root 470 chunkWG.Wait() 471 lastChunksAddress := parent.chunk[8 : 8+pc.hashSize] 472 copy(pc.rootAddress, lastChunksAddress) 473 break 474 } 475 } else { 476 close(pc.quitC) 477 break 478 } 479 } 480 481 // Data ended in chunk boundary.. just signal to start bulding tree 482 if readBytes == 0 { 483 pc.buildTree(isAppend, parent, chunkWG, true, nil) 484 break 485 } else { 486 pkey := pc.enqueueDataChunk(chunkData, uint64(readBytes), parent, chunkWG) 487 488 // update tree related parent data structures 489 parent.subtreeSize += uint64(readBytes) 490 parent.branchCount++ 491 492 // Data got exhausted... signal to send any parent tree related chunks 493 if int64(readBytes) < pc.chunkSize { 494 495 pc.cleanChunkLevels() 496 497 // only one data chunk .. so dont add any parent chunk 498 if parent.branchCount <= 1 { 499 chunkWG.Wait() 500 501 if isAppend || pc.depth() == 0 { 502 // No need to build the tree if the depth is 0 503 // or we are appending. 504 // Just use the last key. 505 copy(pc.rootAddress, pkey) 506 } else { 507 // We need to build the tree and and provide the lonely 508 // chunk key to replace the last tree chunk key. 509 pc.buildTree(isAppend, parent, chunkWG, true, pkey) 510 } 511 break 512 } 513 514 pc.buildTree(isAppend, parent, chunkWG, true, nil) 515 break 516 } 517 518 if parent.branchCount == pc.branches { 519 pc.buildTree(isAppend, parent, chunkWG, false, nil) 520 parent = NewTreeEntry(pc) 521 } 522 523 } 524 525 workers := pc.getWorkerCount() 526 if int64(len(pc.jobC)) > workers && workers < ChunkProcessors { 527 pc.incrementWorkerCount() 528 go pc.processor(ctx, pc.workerCount) 529 } 530 531 } 532 533 } 534 535 func (pc *PyramidChunker) buildTree(isAppend bool, ent *TreeEntry, chunkWG *sync.WaitGroup, last bool, lonelyChunkKey []byte) { 536 chunkWG.Wait() 537 pc.enqueueTreeChunk(ent, chunkWG, last) 538 539 compress := false 540 endLvl := pc.branches 541 for lvl := int64(0); lvl < pc.branches; lvl++ { 542 lvlCount := int64(len(pc.chunkLevel[lvl])) 543 if lvlCount >= pc.branches { 544 endLvl = lvl + 1 545 compress = true 546 break 547 } 548 } 549 550 if !compress && !last { 551 return 552 } 553 554 // Wait for all the keys to be processed before compressing the tree 555 chunkWG.Wait() 556 557 for lvl := int64(ent.level); lvl < endLvl; lvl++ { 558 559 lvlCount := int64(len(pc.chunkLevel[lvl])) 560 if lvlCount == 1 && last { 561 copy(pc.rootAddress, pc.chunkLevel[lvl][0].key) 562 return 563 } 564 565 for startCount := int64(0); startCount < lvlCount; startCount += pc.branches { 566 567 endCount := startCount + pc.branches 568 if endCount > lvlCount { 569 endCount = lvlCount 570 } 571 572 var nextLvlCount int64 573 var tempEntry *TreeEntry 574 if len(pc.chunkLevel[lvl+1]) > 0 { 575 nextLvlCount = int64(len(pc.chunkLevel[lvl+1]) - 1) 576 tempEntry = pc.chunkLevel[lvl+1][nextLvlCount] 577 } 578 if isAppend && tempEntry != nil && tempEntry.updatePending { 579 updateEntry := &TreeEntry{ 580 level: int(lvl + 1), 581 branchCount: 0, 582 subtreeSize: 0, 583 chunk: make([]byte, pc.chunkSize+8), 584 key: make([]byte, pc.hashSize), 585 index: int(nextLvlCount), 586 updatePending: true, 587 } 588 for index := int64(0); index < lvlCount; index++ { 589 updateEntry.branchCount++ 590 updateEntry.subtreeSize += pc.chunkLevel[lvl][index].subtreeSize 591 copy(updateEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], pc.chunkLevel[lvl][index].key[:pc.hashSize]) 592 } 593 594 pc.enqueueTreeChunk(updateEntry, chunkWG, last) 595 596 } else { 597 598 noOfBranches := endCount - startCount 599 newEntry := &TreeEntry{ 600 level: int(lvl + 1), 601 branchCount: noOfBranches, 602 subtreeSize: 0, 603 chunk: make([]byte, (noOfBranches*pc.hashSize)+8), 604 key: make([]byte, pc.hashSize), 605 index: int(nextLvlCount), 606 updatePending: false, 607 } 608 609 index := int64(0) 610 for i := startCount; i < endCount; i++ { 611 entry := pc.chunkLevel[lvl][i] 612 newEntry.subtreeSize += entry.subtreeSize 613 copy(newEntry.chunk[8+(index*pc.hashSize):8+((index+1)*pc.hashSize)], entry.key[:pc.hashSize]) 614 index++ 615 } 616 // Lonely chunk key is the key of the last chunk that is only one on the last branch. 617 // In this case, ignore the its tree chunk key and replace it with the lonely chunk key. 618 if lonelyChunkKey != nil { 619 // Overwrite the last tree chunk key with the lonely data chunk key. 620 copy(newEntry.chunk[int64(len(newEntry.chunk))-pc.hashSize:], lonelyChunkKey[:pc.hashSize]) 621 } 622 623 pc.enqueueTreeChunk(newEntry, chunkWG, last) 624 625 } 626 627 } 628 629 if !isAppend { 630 chunkWG.Wait() 631 if compress { 632 pc.chunkLevel[lvl] = nil 633 } 634 } 635 } 636 637 } 638 639 func (pc *PyramidChunker) enqueueTreeChunk(ent *TreeEntry, chunkWG *sync.WaitGroup, last bool) { 640 if ent != nil && ent.branchCount > 0 { 641 642 // wait for data chunks to get over before processing the tree chunk 643 if last { 644 chunkWG.Wait() 645 } 646 647 binary.LittleEndian.PutUint64(ent.chunk[:8], ent.subtreeSize) 648 ent.key = make([]byte, pc.hashSize) 649 chunkWG.Add(1) 650 select { 651 case pc.jobC <- &chunkJob{ent.key, ent.chunk[:ent.branchCount*pc.hashSize+8], chunkWG}: 652 case <-pc.quitC: 653 } 654 655 // Update or append based on weather it is a new entry or being reused 656 if ent.updatePending { 657 chunkWG.Wait() 658 pc.chunkLevel[ent.level][ent.index] = ent 659 } else { 660 pc.chunkLevel[ent.level] = append(pc.chunkLevel[ent.level], ent) 661 } 662 663 } 664 } 665 666 func (pc *PyramidChunker) enqueueDataChunk(chunkData []byte, size uint64, parent *TreeEntry, chunkWG *sync.WaitGroup) Address { 667 binary.LittleEndian.PutUint64(chunkData[:8], size) 668 pkey := parent.chunk[8+parent.branchCount*pc.hashSize : 8+(parent.branchCount+1)*pc.hashSize] 669 670 chunkWG.Add(1) 671 select { 672 case pc.jobC <- &chunkJob{pkey, chunkData[:size+8], chunkWG}: 673 case <-pc.quitC: 674 } 675 676 return pkey 677 678 } 679 680 // depth returns the number of chunk levels. 681 // It is used to detect if there is only one data chunk 682 // left for the last branch. 683 func (pc *PyramidChunker) depth() (d int) { 684 for _, l := range pc.chunkLevel { 685 if l == nil { 686 return 687 } 688 d++ 689 } 690 return 691 } 692 693 // cleanChunkLevels removes gaps (nil levels) between chunk levels 694 // that are not nil. 695 func (pc *PyramidChunker) cleanChunkLevels() { 696 for i, l := range pc.chunkLevel { 697 if l == nil { 698 pc.chunkLevel = append(pc.chunkLevel[:i], append(pc.chunkLevel[i+1:], nil)...) 699 } 700 } 701 }