github.com/m3shine/gochain@v2.2.26+incompatible/swarm/storage/pyramid.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package storage 18 19 import ( 20 "encoding/binary" 21 "errors" 22 "fmt" 23 "io" 24 "sync" 25 "time" 26 ) 27 28 /* 29 The main idea of a pyramid chunker is to process the input data without knowing the entire size apriori. 30 For this to be achieved, the chunker tree is built from the ground up until the data is exhausted. 31 This opens up new aveneus such as easy append and other sort of modifications to the tree thereby avoiding 32 duplication of data chunks. 33 34 35 Below is an example of a two level chunks tree. The leaf chunks are called data chunks and all the above 36 chunks are called tree chunks. The tree chunk above data chunks is level 0 and so on until it reaches 37 the root tree chunk. 38 39 40 41 T10 <- Tree chunk lvl1 42 | 43 __________________________|_____________________________ 44 / | | \ 45 / | \ \ 46 __T00__ ___T01__ ___T02__ ___T03__ <- Tree chunks lvl 0 47 / / \ / / \ / / \ / / \ 48 / / \ / / \ / / \ / / \ 49 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 <- Data Chunks 50 51 52 The split function continuously read the data and creates data chunks and send them to storage. 53 When certain no of data chunks are created (defaultBranches), a signal is sent to create a tree 54 entry. When the level 0 tree entries reaches certain threshold (defaultBranches), another signal 55 is sent to a tree entry one level up.. and so on... until only the data is exhausted AND only one 56 tree entry is present in certain level. The key of tree entry is given out as the rootKey of the file. 57 58 */ 59 60 var ( 61 errLoadingTreeRootChunk = errors.New("LoadTree Error: Could not load root chunk") 62 errLoadingTreeChunk = errors.New("LoadTree Error: Could not load chunk") 63 ) 64 65 const ( 66 ChunkProcessors = 8 67 DefaultBranches int64 = 128 68 splitTimeout = time.Minute * 5 69 ) 70 71 var timeoutErr = fmt.Errorf("timed out after %s", splitTimeout) 72 73 const ( 74 DataChunk = 0 75 TreeChunk = 1 76 ) 77 78 type ChunkerParams struct { 79 Branches int64 80 Hash string 81 } 82 83 func NewChunkerParams() *ChunkerParams { 84 return &ChunkerParams{ 85 Branches: DefaultBranches, 86 Hash: SHA3Hash, 87 } 88 } 89 90 // Entry to create a tree node 91 type TreeEntry struct { 92 level int 93 branchCount int64 94 subtreeSize uint64 95 chunk []byte 96 key []byte 97 index int // used in append to indicate the index of existing tree entry 98 updatePending bool // indicates if the entry is loaded from existing tree 99 } 100 101 func NewTreeEntry(pyramid *PyramidChunker) *TreeEntry { 102 return &TreeEntry{ 103 level: 0, 104 branchCount: 0, 105 subtreeSize: 0, 106 chunk: make([]byte, pyramid.chunkSize+8), 107 key: make([]byte, pyramid.hashSize), 108 index: 0, 109 updatePending: false, 110 } 111 } 112 113 // Used by the hash processor to create a data/tree chunk and send to storage 114 type chunkJob struct { 115 key Key 116 chunk []byte 117 size int64 118 done func() 119 chunkType int // used to identify the tree related chunks for debugging 120 chunkLvl int // leaf-1 is level 0 and goes upwards until it reaches root 121 } 122 123 type PyramidChunker struct { 124 hashFunc SwarmHasher 125 chunkSize int64 126 hashSize int64 127 branches int64 128 workerCount int64 129 workerLock sync.RWMutex 130 } 131 132 func NewPyramidChunker(params *ChunkerParams) *PyramidChunker { 133 p := &PyramidChunker{} 134 p.hashFunc = MakeHashFunc(params.Hash) 135 p.branches = params.Branches 136 p.hashSize = int64(p.hashFunc().Size()) 137 p.chunkSize = p.hashSize * p.branches 138 p.workerCount = 0 139 return p 140 } 141 142 func (p *PyramidChunker) Join(key Key, chunkC chan *Chunk) LazySectionReader { 143 return &LazyChunkReader{ 144 key: key, 145 chunkC: chunkC, 146 chunkSize: p.chunkSize, 147 branches: p.branches, 148 hashSize: p.hashSize, 149 } 150 } 151 152 func (p *PyramidChunker) incrementWorkerCount() { 153 p.workerLock.Lock() 154 defer p.workerLock.Unlock() 155 p.workerCount += 1 156 } 157 158 func (p *PyramidChunker) getWorkerCount() int64 { 159 p.workerLock.Lock() 160 defer p.workerLock.Unlock() 161 return p.workerCount 162 } 163 164 func (p *PyramidChunker) decrementWorkerCount() { 165 p.workerLock.Lock() 166 defer p.workerLock.Unlock() 167 p.workerCount -= 1 168 } 169 170 func (p *PyramidChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg *sync.WaitGroup) (Key, error) { 171 rootKey := make([]byte, p.hashSize) 172 chunkLevel := make([][]*TreeEntry, p.branches) 173 quitC := make(chan bool) 174 defer close(quitC) 175 176 var wg sync.WaitGroup 177 wg.Add(1) 178 go p.prepareChunks(false, chunkLevel, data, rootKey, quitC, wg.Done, chunkC, swg) 179 180 done := make(chan struct{}) 181 go func() { 182 wg.Wait() 183 if swg != nil { 184 swg.Wait() 185 } 186 close(done) 187 }() 188 189 select { 190 case <-done: 191 case <-time.After(splitTimeout): 192 return nil, timeoutErr 193 } 194 return rootKey, nil 195 196 } 197 198 func (p *PyramidChunker) Append(key Key, data io.Reader, chunkC chan *Chunk, swg *sync.WaitGroup) (Key, error) { 199 rootKey := make([]byte, p.hashSize) 200 chunkLevel := make([][]*TreeEntry, p.branches) 201 quitC := make(chan bool) 202 defer close(quitC) 203 204 // Load the right most unfinished tree chunks in every level 205 p.loadTree(chunkLevel, key, chunkC, quitC) 206 207 var wg sync.WaitGroup 208 wg.Add(1) 209 go p.prepareChunks(true, chunkLevel, data, rootKey, quitC, wg.Done, chunkC, swg) 210 211 done := make(chan struct{}) 212 go func() { 213 wg.Wait() 214 if swg != nil { 215 swg.Wait() 216 } 217 close(done) 218 }() 219 220 select { 221 case <-done: 222 case <-time.After(splitTimeout): 223 return nil, timeoutErr 224 } 225 return rootKey, nil 226 227 } 228 229 func (p *PyramidChunker) processor(id int64, jobC <-chan *chunkJob, chunkC chan *Chunk, quitC chan bool, swg *sync.WaitGroup) { 230 defer p.decrementWorkerCount() 231 if swg != nil { 232 defer swg.Done() 233 } 234 235 hasher := p.hashFunc() 236 for { 237 select { 238 239 case job, ok := <-jobC: 240 if !ok { 241 return 242 } 243 p.processChunk(id, hasher, job, chunkC, swg) 244 case <-quitC: 245 return 246 } 247 } 248 } 249 250 func (p *PyramidChunker) processChunk(id int64, hasher SwarmHash, job *chunkJob, chunkC chan *Chunk, swg *sync.WaitGroup) { 251 defer job.done() 252 hasher.ResetWithLength(job.chunk[:8]) // 8 bytes of length 253 hasher.Write(job.chunk[8:]) // minus 8 []byte length 254 h := hasher.Sum(nil) 255 256 // report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk) 257 copy(job.key, h) 258 259 // send off new chunk to storage 260 if chunkC != nil { 261 if swg != nil { 262 swg.Add(1) 263 } 264 chunkC <- &Chunk{Key: h, SData: job.chunk, Size: job.size, wg: swg} 265 } 266 } 267 268 func (p *PyramidChunker) loadTree(chunkLevel [][]*TreeEntry, key Key, chunkC chan *Chunk, quitC chan bool) error { 269 // Get the root chunk to get the total size 270 chunk := retrieve(key, chunkC, quitC) 271 if chunk == nil { 272 return errLoadingTreeRootChunk 273 } 274 275 //if data size is less than a chunk... add a parent with update as pending 276 if chunk.Size <= p.chunkSize { 277 newEntry := &TreeEntry{ 278 level: 0, 279 branchCount: 1, 280 subtreeSize: uint64(chunk.Size), 281 chunk: make([]byte, p.chunkSize+8), 282 key: make([]byte, p.hashSize), 283 index: 0, 284 updatePending: true, 285 } 286 copy(newEntry.chunk[8:], chunk.Key) 287 chunkLevel[0] = append(chunkLevel[0], newEntry) 288 return nil 289 } 290 291 var treeSize int64 292 var depth int 293 treeSize = p.chunkSize 294 for ; treeSize < chunk.Size; treeSize *= p.branches { 295 depth++ 296 } 297 298 // Add the root chunk entry 299 branchCount := int64(len(chunk.SData)-8) / p.hashSize 300 newEntry := &TreeEntry{ 301 level: depth - 1, 302 branchCount: branchCount, 303 subtreeSize: uint64(chunk.Size), 304 chunk: chunk.SData, 305 key: key, 306 index: 0, 307 updatePending: true, 308 } 309 chunkLevel[depth-1] = append(chunkLevel[depth-1], newEntry) 310 311 // Add the rest of the tree 312 for lvl := depth - 1; lvl >= 1; lvl-- { 313 314 //TODO(jmozah): instead of loading finished branches and then trim in the end, 315 //avoid loading them in the first place 316 for _, ent := range chunkLevel[lvl] { 317 branchCount = int64(len(ent.chunk)-8) / p.hashSize 318 for i := int64(0); i < branchCount; i++ { 319 key := ent.chunk[8+(i*p.hashSize) : 8+((i+1)*p.hashSize)] 320 newChunk := retrieve(key, chunkC, quitC) 321 if newChunk == nil { 322 return errLoadingTreeChunk 323 } 324 bewBranchCount := int64(len(newChunk.SData)-8) / p.hashSize 325 newEntry := &TreeEntry{ 326 level: lvl - 1, 327 branchCount: bewBranchCount, 328 subtreeSize: uint64(newChunk.Size), 329 chunk: newChunk.SData, 330 key: key, 331 index: 0, 332 updatePending: true, 333 } 334 chunkLevel[lvl-1] = append(chunkLevel[lvl-1], newEntry) 335 336 } 337 338 // We need to get only the right most unfinished branch.. so trim all finished branches 339 if int64(len(chunkLevel[lvl-1])) >= p.branches { 340 chunkLevel[lvl-1] = nil 341 } 342 } 343 } 344 345 return nil 346 } 347 348 func (p *PyramidChunker) prepareChunks(isAppend bool, chunkLevel [][]*TreeEntry, data io.Reader, rootKey []byte, quitC chan bool, done func(), chunkC chan *Chunk, swg *sync.WaitGroup) { 349 defer done() 350 jobC := make(chan *chunkJob, 2*ChunkProcessors) 351 defer close(jobC) 352 353 chunkWG := &sync.WaitGroup{} 354 totalDataSize := 0 355 356 p.incrementWorkerCount() 357 if swg != nil { 358 swg.Add(1) 359 } 360 go p.processor(p.workerCount, jobC, chunkC, quitC, swg) 361 362 parent := NewTreeEntry(p) 363 var unFinishedChunk *Chunk 364 365 if isAppend && len(chunkLevel[0]) != 0 { 366 lastIndex := len(chunkLevel[0]) - 1 367 ent := chunkLevel[0][lastIndex] 368 369 if ent.branchCount < p.branches { 370 parent = &TreeEntry{ 371 level: 0, 372 branchCount: ent.branchCount, 373 subtreeSize: ent.subtreeSize, 374 chunk: ent.chunk, 375 key: ent.key, 376 index: lastIndex, 377 updatePending: true, 378 } 379 380 lastBranch := parent.branchCount - 1 381 lastKey := parent.chunk[8+lastBranch*p.hashSize : 8+(lastBranch+1)*p.hashSize] 382 383 unFinishedChunk = retrieve(lastKey, chunkC, quitC) 384 if unFinishedChunk.Size < p.chunkSize { 385 386 parent.subtreeSize = parent.subtreeSize - uint64(unFinishedChunk.Size) 387 parent.branchCount = parent.branchCount - 1 388 } else { 389 unFinishedChunk = nil 390 } 391 } 392 } 393 394 for index := 0; ; index++ { 395 var n int 396 var err error 397 chunkData := make([]byte, p.chunkSize+8) 398 if unFinishedChunk != nil { 399 copy(chunkData, unFinishedChunk.SData) 400 n, err = data.Read(chunkData[8+unFinishedChunk.Size:]) 401 n += int(unFinishedChunk.Size) 402 unFinishedChunk = nil 403 } else { 404 n, err = data.Read(chunkData[8:]) 405 } 406 407 totalDataSize += n 408 if err != nil { 409 if err == io.EOF || err == io.ErrUnexpectedEOF { 410 if parent.branchCount == 1 { 411 // Data is exactly one chunk.. pick the last chunk key as root 412 chunkWG.Wait() 413 lastChunksKey := parent.chunk[8 : 8+p.hashSize] 414 copy(rootKey, lastChunksKey) 415 break 416 } 417 } else { 418 close(quitC) 419 break 420 } 421 } 422 423 // Data ended in chunk boundary.. just signal to start bulding tree 424 if n == 0 { 425 p.buildTree(isAppend, chunkLevel, parent, chunkWG, jobC, quitC, true, rootKey) 426 break 427 } else { 428 429 pkey := p.enqueueDataChunk(chunkData, uint64(n), parent, chunkWG, jobC, quitC) 430 431 // update tree related parent data structures 432 parent.subtreeSize += uint64(n) 433 parent.branchCount++ 434 435 // Data got exhausted... signal to send any parent tree related chunks 436 if int64(n) < p.chunkSize { 437 438 // only one data chunk .. so dont add any parent chunk 439 if parent.branchCount <= 1 { 440 chunkWG.Wait() 441 copy(rootKey, pkey) 442 break 443 } 444 445 p.buildTree(isAppend, chunkLevel, parent, chunkWG, jobC, quitC, true, rootKey) 446 break 447 } 448 449 if parent.branchCount == p.branches { 450 p.buildTree(isAppend, chunkLevel, parent, chunkWG, jobC, quitC, false, rootKey) 451 parent = NewTreeEntry(p) 452 } 453 454 } 455 456 workers := p.getWorkerCount() 457 if int64(len(jobC)) > workers && workers < ChunkProcessors { 458 p.incrementWorkerCount() 459 if swg != nil { 460 swg.Add(1) 461 } 462 go p.processor(p.workerCount, jobC, chunkC, quitC, swg) 463 } 464 } 465 } 466 467 func (p *PyramidChunker) buildTree(isAppend bool, chunkLevel [][]*TreeEntry, ent *TreeEntry, chunkWG *sync.WaitGroup, jobC chan *chunkJob, quitC chan bool, last bool, rootKey []byte) { 468 chunkWG.Wait() 469 p.enqueueTreeChunk(chunkLevel, ent, chunkWG, jobC, quitC, last) 470 471 compress := false 472 endLvl := p.branches 473 for lvl := int64(0); lvl < p.branches; lvl++ { 474 lvlCount := int64(len(chunkLevel[lvl])) 475 if lvlCount >= p.branches { 476 endLvl = lvl + 1 477 compress = true 478 break 479 } 480 } 481 482 if !compress && !last { 483 return 484 } 485 486 // Wait for all the keys to be processed before compressing the tree 487 chunkWG.Wait() 488 489 for lvl := int64(ent.level); lvl < endLvl; lvl++ { 490 491 lvlCount := int64(len(chunkLevel[lvl])) 492 if lvlCount == 1 && last { 493 copy(rootKey, chunkLevel[lvl][0].key) 494 return 495 } 496 497 for startCount := int64(0); startCount < lvlCount; startCount += p.branches { 498 499 endCount := startCount + p.branches 500 if endCount > lvlCount { 501 endCount = lvlCount 502 } 503 504 var nextLvlCount int64 505 var tempEntry *TreeEntry 506 if len(chunkLevel[lvl+1]) > 0 { 507 nextLvlCount = int64(len(chunkLevel[lvl+1]) - 1) 508 tempEntry = chunkLevel[lvl+1][nextLvlCount] 509 } 510 if isAppend && tempEntry != nil && tempEntry.updatePending { 511 updateEntry := &TreeEntry{ 512 level: int(lvl + 1), 513 branchCount: 0, 514 subtreeSize: 0, 515 chunk: make([]byte, p.chunkSize+8), 516 key: make([]byte, p.hashSize), 517 index: int(nextLvlCount), 518 updatePending: true, 519 } 520 for index := int64(0); index < lvlCount; index++ { 521 updateEntry.branchCount++ 522 updateEntry.subtreeSize += chunkLevel[lvl][index].subtreeSize 523 copy(updateEntry.chunk[8+(index*p.hashSize):8+((index+1)*p.hashSize)], chunkLevel[lvl][index].key[:p.hashSize]) 524 } 525 526 p.enqueueTreeChunk(chunkLevel, updateEntry, chunkWG, jobC, quitC, last) 527 528 } else { 529 530 noOfBranches := endCount - startCount 531 newEntry := &TreeEntry{ 532 level: int(lvl + 1), 533 branchCount: noOfBranches, 534 subtreeSize: 0, 535 chunk: make([]byte, (noOfBranches*p.hashSize)+8), 536 key: make([]byte, p.hashSize), 537 index: int(nextLvlCount), 538 updatePending: false, 539 } 540 541 index := int64(0) 542 for i := startCount; i < endCount; i++ { 543 entry := chunkLevel[lvl][i] 544 newEntry.subtreeSize += entry.subtreeSize 545 copy(newEntry.chunk[8+(index*p.hashSize):8+((index+1)*p.hashSize)], entry.key[:p.hashSize]) 546 index++ 547 } 548 549 p.enqueueTreeChunk(chunkLevel, newEntry, chunkWG, jobC, quitC, last) 550 551 } 552 553 } 554 555 if !isAppend { 556 chunkWG.Wait() 557 if compress { 558 chunkLevel[lvl] = nil 559 } 560 } 561 } 562 563 } 564 565 func (p *PyramidChunker) enqueueTreeChunk(chunkLevel [][]*TreeEntry, ent *TreeEntry, chunkWG *sync.WaitGroup, jobC chan *chunkJob, quitC chan bool, last bool) { 566 if ent != nil { 567 568 // wait for data chunks to get over before processing the tree chunk 569 if last { 570 chunkWG.Wait() 571 } 572 573 binary.LittleEndian.PutUint64(ent.chunk[:8], ent.subtreeSize) 574 ent.key = make([]byte, p.hashSize) 575 chunkWG.Add(1) 576 select { 577 case jobC <- &chunkJob{ent.key, ent.chunk[:ent.branchCount*p.hashSize+8], int64(ent.subtreeSize), chunkWG.Done, TreeChunk, 0}: 578 case <-quitC: 579 } 580 581 // Update or append based on weather it is a new entry or being reused 582 if ent.updatePending { 583 chunkWG.Wait() 584 chunkLevel[ent.level][ent.index] = ent 585 } else { 586 chunkLevel[ent.level] = append(chunkLevel[ent.level], ent) 587 } 588 589 } 590 } 591 592 func (p *PyramidChunker) enqueueDataChunk(chunkData []byte, size uint64, parent *TreeEntry, chunkWG *sync.WaitGroup, jobC chan *chunkJob, quitC chan bool) Key { 593 binary.LittleEndian.PutUint64(chunkData[:8], size) 594 pkey := parent.chunk[8+parent.branchCount*p.hashSize : 8+(parent.branchCount+1)*p.hashSize] 595 596 chunkWG.Add(1) 597 select { 598 case jobC <- &chunkJob{pkey, chunkData[:size+8], int64(size), chunkWG.Done, DataChunk, -1}: 599 case <-quitC: 600 } 601 602 return pkey 603 604 }