github.com/cuiweixie/go-ethereum@v1.8.2-0.20180303084001-66cd41af1e38/swarm/storage/pyramid.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package storage 18 19 import ( 20 "encoding/binary" 21 "errors" 22 "io" 23 "sync" 24 "time" 25 ) 26 27 /* 28 The main idea of a pyramid chunker is to process the input data without knowing the entire size apriori. 29 For this to be achieved, the chunker tree is built from the ground up until the data is exhausted. 30 This opens up new aveneus such as easy append and other sort of modifications to the tree thereby avoiding 31 duplication of data chunks. 32 33 34 Below is an example of a two level chunks tree. The leaf chunks are called data chunks and all the above 35 chunks are called tree chunks. The tree chunk above data chunks is level 0 and so on until it reaches 36 the root tree chunk. 37 38 39 40 T10 <- Tree chunk lvl1 41 | 42 __________________________|_____________________________ 43 / | | \ 44 / | \ \ 45 __T00__ ___T01__ ___T02__ ___T03__ <- Tree chunks lvl 0 46 / / \ / / \ / / \ / / \ 47 / / \ / / \ / / \ / / \ 48 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 <- Data Chunks 49 50 51 The split function continuously read the data and creates data chunks and send them to storage. 52 When certain no of data chunks are created (defaultBranches), a signal is sent to create a tree 53 entry. When the level 0 tree entries reaches certain threshold (defaultBranches), another signal 54 is sent to a tree entry one level up.. and so on... until only the data is exhausted AND only one 55 tree entry is present in certain level. The key of tree entry is given out as the rootKey of the file. 56 57 */ 58 59 var ( 60 errLoadingTreeRootChunk = errors.New("LoadTree Error: Could not load root chunk") 61 errLoadingTreeChunk = errors.New("LoadTree Error: Could not load chunk") 62 ) 63 64 const ( 65 ChunkProcessors = 8 66 DefaultBranches int64 = 128 67 splitTimeout = time.Minute * 5 68 ) 69 70 const ( 71 DataChunk = 0 72 TreeChunk = 1 73 ) 74 75 type ChunkerParams struct { 76 Branches int64 77 Hash string 78 } 79 80 func NewChunkerParams() *ChunkerParams { 81 return &ChunkerParams{ 82 Branches: DefaultBranches, 83 Hash: SHA3Hash, 84 } 85 } 86 87 // Entry to create a tree node 88 type TreeEntry struct { 89 level int 90 branchCount int64 91 subtreeSize uint64 92 chunk []byte 93 key []byte 94 index int // used in append to indicate the index of existing tree entry 95 updatePending bool // indicates if the entry is loaded from existing tree 96 } 97 98 func NewTreeEntry(pyramid *PyramidChunker) *TreeEntry { 99 return &TreeEntry{ 100 level: 0, 101 branchCount: 0, 102 subtreeSize: 0, 103 chunk: make([]byte, pyramid.chunkSize+8), 104 key: make([]byte, pyramid.hashSize), 105 index: 0, 106 updatePending: false, 107 } 108 } 109 110 // Used by the hash processor to create a data/tree chunk and send to storage 111 type chunkJob struct { 112 key Key 113 chunk []byte 114 size int64 115 parentWg *sync.WaitGroup 116 chunkType int // used to identify the tree related chunks for debugging 117 chunkLvl int // leaf-1 is level 0 and goes upwards until it reaches root 118 } 119 120 type PyramidChunker struct { 121 hashFunc SwarmHasher 122 chunkSize int64 123 hashSize int64 124 branches int64 125 workerCount int64 126 workerLock sync.RWMutex 127 } 128 129 func NewPyramidChunker(params *ChunkerParams) (self *PyramidChunker) { 130 self = &PyramidChunker{} 131 self.hashFunc = MakeHashFunc(params.Hash) 132 self.branches = params.Branches 133 self.hashSize = int64(self.hashFunc().Size()) 134 self.chunkSize = self.hashSize * self.branches 135 self.workerCount = 0 136 return 137 } 138 139 func (self *PyramidChunker) Join(key Key, chunkC chan *Chunk) LazySectionReader { 140 return &LazyChunkReader{ 141 key: key, 142 chunkC: chunkC, 143 chunkSize: self.chunkSize, 144 branches: self.branches, 145 hashSize: self.hashSize, 146 } 147 } 148 149 func (self *PyramidChunker) incrementWorkerCount() { 150 self.workerLock.Lock() 151 defer self.workerLock.Unlock() 152 self.workerCount += 1 153 } 154 155 func (self *PyramidChunker) getWorkerCount() int64 { 156 self.workerLock.Lock() 157 defer self.workerLock.Unlock() 158 return self.workerCount 159 } 160 161 func (self *PyramidChunker) decrementWorkerCount() { 162 self.workerLock.Lock() 163 defer self.workerLock.Unlock() 164 self.workerCount -= 1 165 } 166 167 func (self *PyramidChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, storageWG, processorWG *sync.WaitGroup) (Key, error) { 168 jobC := make(chan *chunkJob, 2*ChunkProcessors) 169 wg := &sync.WaitGroup{} 170 errC := make(chan error) 171 quitC := make(chan bool) 172 rootKey := make([]byte, self.hashSize) 173 chunkLevel := make([][]*TreeEntry, self.branches) 174 175 wg.Add(1) 176 go self.prepareChunks(false, chunkLevel, data, rootKey, quitC, wg, jobC, processorWG, chunkC, errC, storageWG) 177 178 // closes internal error channel if all subprocesses in the workgroup finished 179 go func() { 180 181 // waiting for all chunks to finish 182 wg.Wait() 183 184 // if storage waitgroup is non-nil, we wait for storage to finish too 185 if storageWG != nil { 186 storageWG.Wait() 187 } 188 //We close errC here because this is passed down to 8 parallel routines underneath. 189 // if a error happens in one of them.. that particular routine raises error... 190 // once they all complete successfully, the control comes back and we can safely close this here. 191 close(errC) 192 }() 193 194 defer close(quitC) 195 196 select { 197 case err := <-errC: 198 if err != nil { 199 return nil, err 200 } 201 case <-time.NewTimer(splitTimeout).C: 202 } 203 return rootKey, nil 204 205 } 206 207 func (self *PyramidChunker) Append(key Key, data io.Reader, chunkC chan *Chunk, storageWG, processorWG *sync.WaitGroup) (Key, error) { 208 quitC := make(chan bool) 209 rootKey := make([]byte, self.hashSize) 210 chunkLevel := make([][]*TreeEntry, self.branches) 211 212 // Load the right most unfinished tree chunks in every level 213 self.loadTree(chunkLevel, key, chunkC, quitC) 214 215 jobC := make(chan *chunkJob, 2*ChunkProcessors) 216 wg := &sync.WaitGroup{} 217 errC := make(chan error) 218 219 wg.Add(1) 220 go self.prepareChunks(true, chunkLevel, data, rootKey, quitC, wg, jobC, processorWG, chunkC, errC, storageWG) 221 222 // closes internal error channel if all subprocesses in the workgroup finished 223 go func() { 224 225 // waiting for all chunks to finish 226 wg.Wait() 227 228 // if storage waitgroup is non-nil, we wait for storage to finish too 229 if storageWG != nil { 230 storageWG.Wait() 231 } 232 close(errC) 233 }() 234 235 defer close(quitC) 236 237 select { 238 case err := <-errC: 239 if err != nil { 240 return nil, err 241 } 242 case <-time.NewTimer(splitTimeout).C: 243 } 244 return rootKey, nil 245 246 } 247 248 func (self *PyramidChunker) processor(id int64, jobC chan *chunkJob, chunkC chan *Chunk, errC chan error, quitC chan bool, swg, wwg *sync.WaitGroup) { 249 defer self.decrementWorkerCount() 250 251 hasher := self.hashFunc() 252 if wwg != nil { 253 defer wwg.Done() 254 } 255 for { 256 select { 257 258 case job, ok := <-jobC: 259 if !ok { 260 return 261 } 262 self.processChunk(id, hasher, job, chunkC, swg) 263 case <-quitC: 264 return 265 } 266 } 267 } 268 269 func (self *PyramidChunker) processChunk(id int64, hasher SwarmHash, job *chunkJob, chunkC chan *Chunk, swg *sync.WaitGroup) { 270 hasher.ResetWithLength(job.chunk[:8]) // 8 bytes of length 271 hasher.Write(job.chunk[8:]) // minus 8 []byte length 272 h := hasher.Sum(nil) 273 274 newChunk := &Chunk{ 275 Key: h, 276 SData: job.chunk, 277 Size: job.size, 278 wg: swg, 279 } 280 281 // report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk) 282 copy(job.key, h) 283 284 // send off new chunk to storage 285 if chunkC != nil { 286 if swg != nil { 287 swg.Add(1) 288 } 289 } 290 job.parentWg.Done() 291 292 if chunkC != nil { 293 chunkC <- newChunk 294 } 295 } 296 297 func (self *PyramidChunker) loadTree(chunkLevel [][]*TreeEntry, key Key, chunkC chan *Chunk, quitC chan bool) error { 298 // Get the root chunk to get the total size 299 chunk := retrieve(key, chunkC, quitC) 300 if chunk == nil { 301 return errLoadingTreeRootChunk 302 } 303 304 //if data size is less than a chunk... add a parent with update as pending 305 if chunk.Size <= self.chunkSize { 306 newEntry := &TreeEntry{ 307 level: 0, 308 branchCount: 1, 309 subtreeSize: uint64(chunk.Size), 310 chunk: make([]byte, self.chunkSize+8), 311 key: make([]byte, self.hashSize), 312 index: 0, 313 updatePending: true, 314 } 315 copy(newEntry.chunk[8:], chunk.Key) 316 chunkLevel[0] = append(chunkLevel[0], newEntry) 317 return nil 318 } 319 320 var treeSize int64 321 var depth int 322 treeSize = self.chunkSize 323 for ; treeSize < chunk.Size; treeSize *= self.branches { 324 depth++ 325 } 326 327 // Add the root chunk entry 328 branchCount := int64(len(chunk.SData)-8) / self.hashSize 329 newEntry := &TreeEntry{ 330 level: depth - 1, 331 branchCount: branchCount, 332 subtreeSize: uint64(chunk.Size), 333 chunk: chunk.SData, 334 key: key, 335 index: 0, 336 updatePending: true, 337 } 338 chunkLevel[depth-1] = append(chunkLevel[depth-1], newEntry) 339 340 // Add the rest of the tree 341 for lvl := depth - 1; lvl >= 1; lvl-- { 342 343 //TODO(jmozah): instead of loading finished branches and then trim in the end, 344 //avoid loading them in the first place 345 for _, ent := range chunkLevel[lvl] { 346 branchCount = int64(len(ent.chunk)-8) / self.hashSize 347 for i := int64(0); i < branchCount; i++ { 348 key := ent.chunk[8+(i*self.hashSize) : 8+((i+1)*self.hashSize)] 349 newChunk := retrieve(key, chunkC, quitC) 350 if newChunk == nil { 351 return errLoadingTreeChunk 352 } 353 bewBranchCount := int64(len(newChunk.SData)-8) / self.hashSize 354 newEntry := &TreeEntry{ 355 level: lvl - 1, 356 branchCount: bewBranchCount, 357 subtreeSize: uint64(newChunk.Size), 358 chunk: newChunk.SData, 359 key: key, 360 index: 0, 361 updatePending: true, 362 } 363 chunkLevel[lvl-1] = append(chunkLevel[lvl-1], newEntry) 364 365 } 366 367 // We need to get only the right most unfinished branch.. so trim all finished branches 368 if int64(len(chunkLevel[lvl-1])) >= self.branches { 369 chunkLevel[lvl-1] = nil 370 } 371 } 372 } 373 374 return nil 375 } 376 377 func (self *PyramidChunker) prepareChunks(isAppend bool, chunkLevel [][]*TreeEntry, data io.Reader, rootKey []byte, quitC chan bool, wg *sync.WaitGroup, jobC chan *chunkJob, processorWG *sync.WaitGroup, chunkC chan *Chunk, errC chan error, storageWG *sync.WaitGroup) { 378 defer wg.Done() 379 380 chunkWG := &sync.WaitGroup{} 381 totalDataSize := 0 382 383 // processorWG keeps track of workers spawned for hashing chunks 384 if processorWG != nil { 385 processorWG.Add(1) 386 } 387 388 self.incrementWorkerCount() 389 go self.processor(self.workerCount, jobC, chunkC, errC, quitC, storageWG, processorWG) 390 391 parent := NewTreeEntry(self) 392 var unFinishedChunk *Chunk 393 394 if isAppend && len(chunkLevel[0]) != 0 { 395 396 lastIndex := len(chunkLevel[0]) - 1 397 ent := chunkLevel[0][lastIndex] 398 399 if ent.branchCount < self.branches { 400 parent = &TreeEntry{ 401 level: 0, 402 branchCount: ent.branchCount, 403 subtreeSize: ent.subtreeSize, 404 chunk: ent.chunk, 405 key: ent.key, 406 index: lastIndex, 407 updatePending: true, 408 } 409 410 lastBranch := parent.branchCount - 1 411 lastKey := parent.chunk[8+lastBranch*self.hashSize : 8+(lastBranch+1)*self.hashSize] 412 413 unFinishedChunk = retrieve(lastKey, chunkC, quitC) 414 if unFinishedChunk.Size < self.chunkSize { 415 416 parent.subtreeSize = parent.subtreeSize - uint64(unFinishedChunk.Size) 417 parent.branchCount = parent.branchCount - 1 418 } else { 419 unFinishedChunk = nil 420 } 421 } 422 } 423 424 for index := 0; ; index++ { 425 426 var n int 427 var err error 428 chunkData := make([]byte, self.chunkSize+8) 429 if unFinishedChunk != nil { 430 copy(chunkData, unFinishedChunk.SData) 431 n, err = data.Read(chunkData[8+unFinishedChunk.Size:]) 432 n += int(unFinishedChunk.Size) 433 unFinishedChunk = nil 434 } else { 435 n, err = data.Read(chunkData[8:]) 436 } 437 438 totalDataSize += n 439 if err != nil { 440 if err == io.EOF || err == io.ErrUnexpectedEOF { 441 if parent.branchCount == 1 { 442 // Data is exactly one chunk.. pick the last chunk key as root 443 chunkWG.Wait() 444 lastChunksKey := parent.chunk[8 : 8+self.hashSize] 445 copy(rootKey, lastChunksKey) 446 break 447 } 448 } else { 449 close(quitC) 450 break 451 } 452 } 453 454 // Data ended in chunk boundary.. just signal to start bulding tree 455 if n == 0 { 456 self.buildTree(isAppend, chunkLevel, parent, chunkWG, jobC, quitC, true, rootKey) 457 break 458 } else { 459 460 pkey := self.enqueueDataChunk(chunkData, uint64(n), parent, chunkWG, jobC, quitC) 461 462 // update tree related parent data structures 463 parent.subtreeSize += uint64(n) 464 parent.branchCount++ 465 466 // Data got exhausted... signal to send any parent tree related chunks 467 if int64(n) < self.chunkSize { 468 469 // only one data chunk .. so dont add any parent chunk 470 if parent.branchCount <= 1 { 471 chunkWG.Wait() 472 copy(rootKey, pkey) 473 break 474 } 475 476 self.buildTree(isAppend, chunkLevel, parent, chunkWG, jobC, quitC, true, rootKey) 477 break 478 } 479 480 if parent.branchCount == self.branches { 481 self.buildTree(isAppend, chunkLevel, parent, chunkWG, jobC, quitC, false, rootKey) 482 parent = NewTreeEntry(self) 483 } 484 485 } 486 487 workers := self.getWorkerCount() 488 if int64(len(jobC)) > workers && workers < ChunkProcessors { 489 if processorWG != nil { 490 processorWG.Add(1) 491 } 492 self.incrementWorkerCount() 493 go self.processor(self.workerCount, jobC, chunkC, errC, quitC, storageWG, processorWG) 494 } 495 496 } 497 498 } 499 500 func (self *PyramidChunker) buildTree(isAppend bool, chunkLevel [][]*TreeEntry, ent *TreeEntry, chunkWG *sync.WaitGroup, jobC chan *chunkJob, quitC chan bool, last bool, rootKey []byte) { 501 chunkWG.Wait() 502 self.enqueueTreeChunk(chunkLevel, ent, chunkWG, jobC, quitC, last) 503 504 compress := false 505 endLvl := self.branches 506 for lvl := int64(0); lvl < self.branches; lvl++ { 507 lvlCount := int64(len(chunkLevel[lvl])) 508 if lvlCount >= self.branches { 509 endLvl = lvl + 1 510 compress = true 511 break 512 } 513 } 514 515 if !compress && !last { 516 return 517 } 518 519 // Wait for all the keys to be processed before compressing the tree 520 chunkWG.Wait() 521 522 for lvl := int64(ent.level); lvl < endLvl; lvl++ { 523 524 lvlCount := int64(len(chunkLevel[lvl])) 525 if lvlCount == 1 && last { 526 copy(rootKey, chunkLevel[lvl][0].key) 527 return 528 } 529 530 for startCount := int64(0); startCount < lvlCount; startCount += self.branches { 531 532 endCount := startCount + self.branches 533 if endCount > lvlCount { 534 endCount = lvlCount 535 } 536 537 var nextLvlCount int64 538 var tempEntry *TreeEntry 539 if len(chunkLevel[lvl+1]) > 0 { 540 nextLvlCount = int64(len(chunkLevel[lvl+1]) - 1) 541 tempEntry = chunkLevel[lvl+1][nextLvlCount] 542 } 543 if isAppend && tempEntry != nil && tempEntry.updatePending { 544 updateEntry := &TreeEntry{ 545 level: int(lvl + 1), 546 branchCount: 0, 547 subtreeSize: 0, 548 chunk: make([]byte, self.chunkSize+8), 549 key: make([]byte, self.hashSize), 550 index: int(nextLvlCount), 551 updatePending: true, 552 } 553 for index := int64(0); index < lvlCount; index++ { 554 updateEntry.branchCount++ 555 updateEntry.subtreeSize += chunkLevel[lvl][index].subtreeSize 556 copy(updateEntry.chunk[8+(index*self.hashSize):8+((index+1)*self.hashSize)], chunkLevel[lvl][index].key[:self.hashSize]) 557 } 558 559 self.enqueueTreeChunk(chunkLevel, updateEntry, chunkWG, jobC, quitC, last) 560 561 } else { 562 563 noOfBranches := endCount - startCount 564 newEntry := &TreeEntry{ 565 level: int(lvl + 1), 566 branchCount: noOfBranches, 567 subtreeSize: 0, 568 chunk: make([]byte, (noOfBranches*self.hashSize)+8), 569 key: make([]byte, self.hashSize), 570 index: int(nextLvlCount), 571 updatePending: false, 572 } 573 574 index := int64(0) 575 for i := startCount; i < endCount; i++ { 576 entry := chunkLevel[lvl][i] 577 newEntry.subtreeSize += entry.subtreeSize 578 copy(newEntry.chunk[8+(index*self.hashSize):8+((index+1)*self.hashSize)], entry.key[:self.hashSize]) 579 index++ 580 } 581 582 self.enqueueTreeChunk(chunkLevel, newEntry, chunkWG, jobC, quitC, last) 583 584 } 585 586 } 587 588 if !isAppend { 589 chunkWG.Wait() 590 if compress { 591 chunkLevel[lvl] = nil 592 } 593 } 594 } 595 596 } 597 598 func (self *PyramidChunker) enqueueTreeChunk(chunkLevel [][]*TreeEntry, ent *TreeEntry, chunkWG *sync.WaitGroup, jobC chan *chunkJob, quitC chan bool, last bool) { 599 if ent != nil { 600 601 // wait for data chunks to get over before processing the tree chunk 602 if last { 603 chunkWG.Wait() 604 } 605 606 binary.LittleEndian.PutUint64(ent.chunk[:8], ent.subtreeSize) 607 ent.key = make([]byte, self.hashSize) 608 chunkWG.Add(1) 609 select { 610 case jobC <- &chunkJob{ent.key, ent.chunk[:ent.branchCount*self.hashSize+8], int64(ent.subtreeSize), chunkWG, TreeChunk, 0}: 611 case <-quitC: 612 } 613 614 // Update or append based on weather it is a new entry or being reused 615 if ent.updatePending { 616 chunkWG.Wait() 617 chunkLevel[ent.level][ent.index] = ent 618 } else { 619 chunkLevel[ent.level] = append(chunkLevel[ent.level], ent) 620 } 621 622 } 623 } 624 625 func (self *PyramidChunker) enqueueDataChunk(chunkData []byte, size uint64, parent *TreeEntry, chunkWG *sync.WaitGroup, jobC chan *chunkJob, quitC chan bool) Key { 626 binary.LittleEndian.PutUint64(chunkData[:8], size) 627 pkey := parent.chunk[8+parent.branchCount*self.hashSize : 8+(parent.branchCount+1)*self.hashSize] 628 629 chunkWG.Add(1) 630 select { 631 case jobC <- &chunkJob{pkey, chunkData[:size+8], int64(size), chunkWG, DataChunk, -1}: 632 case <-quitC: 633 } 634 635 return pkey 636 637 }