github.com/sberex/go-sberex@v1.8.2-0.20181113200658-ed96ac38f7d7/swarm/storage/pyramid.go (about) 1 // This file is part of the go-sberex library. The go-sberex library is 2 // free software: you can redistribute it and/or modify it under the terms 3 // of the GNU Lesser General Public License as published by the Free 4 // Software Foundation, either version 3 of the License, or (at your option) 5 // any later version. 6 // 7 // The go-sberex library is distributed in the hope that it will be useful, 8 // but WITHOUT ANY WARRANTY; without even the implied warranty of 9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser 10 // General Public License <http://www.gnu.org/licenses/> for more details. 11 12 package storage 13 14 import ( 15 "encoding/binary" 16 "errors" 17 "io" 18 "sync" 19 "time" 20 ) 21 22 /* 23 The main idea of a pyramid chunker is to process the input data without knowing the entire size apriori. 24 For this to be achieved, the chunker tree is built from the ground up until the data is exhausted. 25 This opens up new aveneus such as easy append and other sort of modifications to the tree thereby avoiding 26 duplication of data chunks. 27 28 29 Below is an example of a two level chunks tree. The leaf chunks are called data chunks and all the above 30 chunks are called tree chunks. The tree chunk above data chunks is level 0 and so on until it reaches 31 the root tree chunk. 32 33 34 35 T10 <- Tree chunk lvl1 36 | 37 __________________________|_____________________________ 38 / | | \ 39 / | \ \ 40 __T00__ ___T01__ ___T02__ ___T03__ <- Tree chunks lvl 0 41 / / \ / / \ / / \ / / \ 42 / / \ / / \ / / \ / / \ 43 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 D1 D2 ... D128 <- Data Chunks 44 45 46 The split function continuously read the data and creates data chunks and send them to storage. 47 When certain no of data chunks are created (defaultBranches), a signal is sent to create a tree 48 entry. When the level 0 tree entries reaches certain threshold (defaultBranches), another signal 49 is sent to a tree entry one level up.. and so on... until only the data is exhausted AND only one 50 tree entry is present in certain level. The key of tree entry is given out as the rootKey of the file. 51 52 */ 53 54 var ( 55 errLoadingTreeRootChunk = errors.New("LoadTree Error: Could not load root chunk") 56 errLoadingTreeChunk = errors.New("LoadTree Error: Could not load chunk") 57 ) 58 59 const ( 60 ChunkProcessors = 8 61 DefaultBranches int64 = 128 62 splitTimeout = time.Minute * 5 63 ) 64 65 const ( 66 DataChunk = 0 67 TreeChunk = 1 68 ) 69 70 type ChunkerParams struct { 71 Branches int64 72 Hash string 73 } 74 75 func NewChunkerParams() *ChunkerParams { 76 return &ChunkerParams{ 77 Branches: DefaultBranches, 78 Hash: SHA3Hash, 79 } 80 } 81 82 // Entry to create a tree node 83 type TreeEntry struct { 84 level int 85 branchCount int64 86 subtreeSize uint64 87 chunk []byte 88 key []byte 89 index int // used in append to indicate the index of existing tree entry 90 updatePending bool // indicates if the entry is loaded from existing tree 91 } 92 93 func NewTreeEntry(pyramid *PyramidChunker) *TreeEntry { 94 return &TreeEntry{ 95 level: 0, 96 branchCount: 0, 97 subtreeSize: 0, 98 chunk: make([]byte, pyramid.chunkSize+8), 99 key: make([]byte, pyramid.hashSize), 100 index: 0, 101 updatePending: false, 102 } 103 } 104 105 // Used by the hash processor to create a data/tree chunk and send to storage 106 type chunkJob struct { 107 key Key 108 chunk []byte 109 size int64 110 parentWg *sync.WaitGroup 111 chunkType int // used to identify the tree related chunks for debugging 112 chunkLvl int // leaf-1 is level 0 and goes upwards until it reaches root 113 } 114 115 type PyramidChunker struct { 116 hashFunc SwarmHasher 117 chunkSize int64 118 hashSize int64 119 branches int64 120 workerCount int64 121 workerLock sync.RWMutex 122 } 123 124 func NewPyramidChunker(params *ChunkerParams) (self *PyramidChunker) { 125 self = &PyramidChunker{} 126 self.hashFunc = MakeHashFunc(params.Hash) 127 self.branches = params.Branches 128 self.hashSize = int64(self.hashFunc().Size()) 129 self.chunkSize = self.hashSize * self.branches 130 self.workerCount = 0 131 return 132 } 133 134 func (self *PyramidChunker) Join(key Key, chunkC chan *Chunk) LazySectionReader { 135 return &LazyChunkReader{ 136 key: key, 137 chunkC: chunkC, 138 chunkSize: self.chunkSize, 139 branches: self.branches, 140 hashSize: self.hashSize, 141 } 142 } 143 144 func (self *PyramidChunker) incrementWorkerCount() { 145 self.workerLock.Lock() 146 defer self.workerLock.Unlock() 147 self.workerCount += 1 148 } 149 150 func (self *PyramidChunker) getWorkerCount() int64 { 151 self.workerLock.Lock() 152 defer self.workerLock.Unlock() 153 return self.workerCount 154 } 155 156 func (self *PyramidChunker) decrementWorkerCount() { 157 self.workerLock.Lock() 158 defer self.workerLock.Unlock() 159 self.workerCount -= 1 160 } 161 162 func (self *PyramidChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, storageWG, processorWG *sync.WaitGroup) (Key, error) { 163 jobC := make(chan *chunkJob, 2*ChunkProcessors) 164 wg := &sync.WaitGroup{} 165 errC := make(chan error) 166 quitC := make(chan bool) 167 rootKey := make([]byte, self.hashSize) 168 chunkLevel := make([][]*TreeEntry, self.branches) 169 170 wg.Add(1) 171 go self.prepareChunks(false, chunkLevel, data, rootKey, quitC, wg, jobC, processorWG, chunkC, errC, storageWG) 172 173 // closes internal error channel if all subprocesses in the workgroup finished 174 go func() { 175 176 // waiting for all chunks to finish 177 wg.Wait() 178 179 // if storage waitgroup is non-nil, we wait for storage to finish too 180 if storageWG != nil { 181 storageWG.Wait() 182 } 183 //We close errC here because this is passed down to 8 parallel routines underneath. 184 // if a error happens in one of them.. that particular routine raises error... 185 // once they all complete successfully, the control comes back and we can safely close this here. 186 close(errC) 187 }() 188 189 defer close(quitC) 190 191 select { 192 case err := <-errC: 193 if err != nil { 194 return nil, err 195 } 196 case <-time.NewTimer(splitTimeout).C: 197 } 198 return rootKey, nil 199 200 } 201 202 func (self *PyramidChunker) Append(key Key, data io.Reader, chunkC chan *Chunk, storageWG, processorWG *sync.WaitGroup) (Key, error) { 203 quitC := make(chan bool) 204 rootKey := make([]byte, self.hashSize) 205 chunkLevel := make([][]*TreeEntry, self.branches) 206 207 // Load the right most unfinished tree chunks in every level 208 self.loadTree(chunkLevel, key, chunkC, quitC) 209 210 jobC := make(chan *chunkJob, 2*ChunkProcessors) 211 wg := &sync.WaitGroup{} 212 errC := make(chan error) 213 214 wg.Add(1) 215 go self.prepareChunks(true, chunkLevel, data, rootKey, quitC, wg, jobC, processorWG, chunkC, errC, storageWG) 216 217 // closes internal error channel if all subprocesses in the workgroup finished 218 go func() { 219 220 // waiting for all chunks to finish 221 wg.Wait() 222 223 // if storage waitgroup is non-nil, we wait for storage to finish too 224 if storageWG != nil { 225 storageWG.Wait() 226 } 227 close(errC) 228 }() 229 230 defer close(quitC) 231 232 select { 233 case err := <-errC: 234 if err != nil { 235 return nil, err 236 } 237 case <-time.NewTimer(splitTimeout).C: 238 } 239 return rootKey, nil 240 241 } 242 243 func (self *PyramidChunker) processor(id int64, jobC chan *chunkJob, chunkC chan *Chunk, errC chan error, quitC chan bool, swg, wwg *sync.WaitGroup) { 244 defer self.decrementWorkerCount() 245 246 hasher := self.hashFunc() 247 if wwg != nil { 248 defer wwg.Done() 249 } 250 for { 251 select { 252 253 case job, ok := <-jobC: 254 if !ok { 255 return 256 } 257 self.processChunk(id, hasher, job, chunkC, swg) 258 case <-quitC: 259 return 260 } 261 } 262 } 263 264 func (self *PyramidChunker) processChunk(id int64, hasher SwarmHash, job *chunkJob, chunkC chan *Chunk, swg *sync.WaitGroup) { 265 hasher.ResetWithLength(job.chunk[:8]) // 8 bytes of length 266 hasher.Write(job.chunk[8:]) // minus 8 []byte length 267 h := hasher.Sum(nil) 268 269 newChunk := &Chunk{ 270 Key: h, 271 SData: job.chunk, 272 Size: job.size, 273 wg: swg, 274 } 275 276 // report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk) 277 copy(job.key, h) 278 279 // send off new chunk to storage 280 if chunkC != nil { 281 if swg != nil { 282 swg.Add(1) 283 } 284 } 285 job.parentWg.Done() 286 287 if chunkC != nil { 288 chunkC <- newChunk 289 } 290 } 291 292 func (self *PyramidChunker) loadTree(chunkLevel [][]*TreeEntry, key Key, chunkC chan *Chunk, quitC chan bool) error { 293 // Get the root chunk to get the total size 294 chunk := retrieve(key, chunkC, quitC) 295 if chunk == nil { 296 return errLoadingTreeRootChunk 297 } 298 299 //if data size is less than a chunk... add a parent with update as pending 300 if chunk.Size <= self.chunkSize { 301 newEntry := &TreeEntry{ 302 level: 0, 303 branchCount: 1, 304 subtreeSize: uint64(chunk.Size), 305 chunk: make([]byte, self.chunkSize+8), 306 key: make([]byte, self.hashSize), 307 index: 0, 308 updatePending: true, 309 } 310 copy(newEntry.chunk[8:], chunk.Key) 311 chunkLevel[0] = append(chunkLevel[0], newEntry) 312 return nil 313 } 314 315 var treeSize int64 316 var depth int 317 treeSize = self.chunkSize 318 for ; treeSize < chunk.Size; treeSize *= self.branches { 319 depth++ 320 } 321 322 // Add the root chunk entry 323 branchCount := int64(len(chunk.SData)-8) / self.hashSize 324 newEntry := &TreeEntry{ 325 level: depth - 1, 326 branchCount: branchCount, 327 subtreeSize: uint64(chunk.Size), 328 chunk: chunk.SData, 329 key: key, 330 index: 0, 331 updatePending: true, 332 } 333 chunkLevel[depth-1] = append(chunkLevel[depth-1], newEntry) 334 335 // Add the rest of the tree 336 for lvl := depth - 1; lvl >= 1; lvl-- { 337 338 //TODO(jmozah): instead of loading finished branches and then trim in the end, 339 //avoid loading them in the first place 340 for _, ent := range chunkLevel[lvl] { 341 branchCount = int64(len(ent.chunk)-8) / self.hashSize 342 for i := int64(0); i < branchCount; i++ { 343 key := ent.chunk[8+(i*self.hashSize) : 8+((i+1)*self.hashSize)] 344 newChunk := retrieve(key, chunkC, quitC) 345 if newChunk == nil { 346 return errLoadingTreeChunk 347 } 348 bewBranchCount := int64(len(newChunk.SData)-8) / self.hashSize 349 newEntry := &TreeEntry{ 350 level: lvl - 1, 351 branchCount: bewBranchCount, 352 subtreeSize: uint64(newChunk.Size), 353 chunk: newChunk.SData, 354 key: key, 355 index: 0, 356 updatePending: true, 357 } 358 chunkLevel[lvl-1] = append(chunkLevel[lvl-1], newEntry) 359 360 } 361 362 // We need to get only the right most unfinished branch.. so trim all finished branches 363 if int64(len(chunkLevel[lvl-1])) >= self.branches { 364 chunkLevel[lvl-1] = nil 365 } 366 } 367 } 368 369 return nil 370 } 371 372 func (self *PyramidChunker) prepareChunks(isAppend bool, chunkLevel [][]*TreeEntry, data io.Reader, rootKey []byte, quitC chan bool, wg *sync.WaitGroup, jobC chan *chunkJob, processorWG *sync.WaitGroup, chunkC chan *Chunk, errC chan error, storageWG *sync.WaitGroup) { 373 defer wg.Done() 374 375 chunkWG := &sync.WaitGroup{} 376 totalDataSize := 0 377 378 // processorWG keeps track of workers spawned for hashing chunks 379 if processorWG != nil { 380 processorWG.Add(1) 381 } 382 383 self.incrementWorkerCount() 384 go self.processor(self.workerCount, jobC, chunkC, errC, quitC, storageWG, processorWG) 385 386 parent := NewTreeEntry(self) 387 var unFinishedChunk *Chunk 388 389 if isAppend && len(chunkLevel[0]) != 0 { 390 391 lastIndex := len(chunkLevel[0]) - 1 392 ent := chunkLevel[0][lastIndex] 393 394 if ent.branchCount < self.branches { 395 parent = &TreeEntry{ 396 level: 0, 397 branchCount: ent.branchCount, 398 subtreeSize: ent.subtreeSize, 399 chunk: ent.chunk, 400 key: ent.key, 401 index: lastIndex, 402 updatePending: true, 403 } 404 405 lastBranch := parent.branchCount - 1 406 lastKey := parent.chunk[8+lastBranch*self.hashSize : 8+(lastBranch+1)*self.hashSize] 407 408 unFinishedChunk = retrieve(lastKey, chunkC, quitC) 409 if unFinishedChunk.Size < self.chunkSize { 410 411 parent.subtreeSize = parent.subtreeSize - uint64(unFinishedChunk.Size) 412 parent.branchCount = parent.branchCount - 1 413 } else { 414 unFinishedChunk = nil 415 } 416 } 417 } 418 419 for index := 0; ; index++ { 420 421 var n int 422 var err error 423 chunkData := make([]byte, self.chunkSize+8) 424 if unFinishedChunk != nil { 425 copy(chunkData, unFinishedChunk.SData) 426 n, err = data.Read(chunkData[8+unFinishedChunk.Size:]) 427 n += int(unFinishedChunk.Size) 428 unFinishedChunk = nil 429 } else { 430 n, err = data.Read(chunkData[8:]) 431 } 432 433 totalDataSize += n 434 if err != nil { 435 if err == io.EOF || err == io.ErrUnexpectedEOF { 436 if parent.branchCount == 1 { 437 // Data is exactly one chunk.. pick the last chunk key as root 438 chunkWG.Wait() 439 lastChunksKey := parent.chunk[8 : 8+self.hashSize] 440 copy(rootKey, lastChunksKey) 441 break 442 } 443 } else { 444 close(quitC) 445 break 446 } 447 } 448 449 // Data ended in chunk boundary.. just signal to start bulding tree 450 if n == 0 { 451 self.buildTree(isAppend, chunkLevel, parent, chunkWG, jobC, quitC, true, rootKey) 452 break 453 } else { 454 455 pkey := self.enqueueDataChunk(chunkData, uint64(n), parent, chunkWG, jobC, quitC) 456 457 // update tree related parent data structures 458 parent.subtreeSize += uint64(n) 459 parent.branchCount++ 460 461 // Data got exhausted... signal to send any parent tree related chunks 462 if int64(n) < self.chunkSize { 463 464 // only one data chunk .. so dont add any parent chunk 465 if parent.branchCount <= 1 { 466 chunkWG.Wait() 467 copy(rootKey, pkey) 468 break 469 } 470 471 self.buildTree(isAppend, chunkLevel, parent, chunkWG, jobC, quitC, true, rootKey) 472 break 473 } 474 475 if parent.branchCount == self.branches { 476 self.buildTree(isAppend, chunkLevel, parent, chunkWG, jobC, quitC, false, rootKey) 477 parent = NewTreeEntry(self) 478 } 479 480 } 481 482 workers := self.getWorkerCount() 483 if int64(len(jobC)) > workers && workers < ChunkProcessors { 484 if processorWG != nil { 485 processorWG.Add(1) 486 } 487 self.incrementWorkerCount() 488 go self.processor(self.workerCount, jobC, chunkC, errC, quitC, storageWG, processorWG) 489 } 490 491 } 492 493 } 494 495 func (self *PyramidChunker) buildTree(isAppend bool, chunkLevel [][]*TreeEntry, ent *TreeEntry, chunkWG *sync.WaitGroup, jobC chan *chunkJob, quitC chan bool, last bool, rootKey []byte) { 496 chunkWG.Wait() 497 self.enqueueTreeChunk(chunkLevel, ent, chunkWG, jobC, quitC, last) 498 499 compress := false 500 endLvl := self.branches 501 for lvl := int64(0); lvl < self.branches; lvl++ { 502 lvlCount := int64(len(chunkLevel[lvl])) 503 if lvlCount >= self.branches { 504 endLvl = lvl + 1 505 compress = true 506 break 507 } 508 } 509 510 if !compress && !last { 511 return 512 } 513 514 // Wait for all the keys to be processed before compressing the tree 515 chunkWG.Wait() 516 517 for lvl := int64(ent.level); lvl < endLvl; lvl++ { 518 519 lvlCount := int64(len(chunkLevel[lvl])) 520 if lvlCount == 1 && last { 521 copy(rootKey, chunkLevel[lvl][0].key) 522 return 523 } 524 525 for startCount := int64(0); startCount < lvlCount; startCount += self.branches { 526 527 endCount := startCount + self.branches 528 if endCount > lvlCount { 529 endCount = lvlCount 530 } 531 532 var nextLvlCount int64 533 var tempEntry *TreeEntry 534 if len(chunkLevel[lvl+1]) > 0 { 535 nextLvlCount = int64(len(chunkLevel[lvl+1]) - 1) 536 tempEntry = chunkLevel[lvl+1][nextLvlCount] 537 } 538 if isAppend && tempEntry != nil && tempEntry.updatePending { 539 updateEntry := &TreeEntry{ 540 level: int(lvl + 1), 541 branchCount: 0, 542 subtreeSize: 0, 543 chunk: make([]byte, self.chunkSize+8), 544 key: make([]byte, self.hashSize), 545 index: int(nextLvlCount), 546 updatePending: true, 547 } 548 for index := int64(0); index < lvlCount; index++ { 549 updateEntry.branchCount++ 550 updateEntry.subtreeSize += chunkLevel[lvl][index].subtreeSize 551 copy(updateEntry.chunk[8+(index*self.hashSize):8+((index+1)*self.hashSize)], chunkLevel[lvl][index].key[:self.hashSize]) 552 } 553 554 self.enqueueTreeChunk(chunkLevel, updateEntry, chunkWG, jobC, quitC, last) 555 556 } else { 557 558 noOfBranches := endCount - startCount 559 newEntry := &TreeEntry{ 560 level: int(lvl + 1), 561 branchCount: noOfBranches, 562 subtreeSize: 0, 563 chunk: make([]byte, (noOfBranches*self.hashSize)+8), 564 key: make([]byte, self.hashSize), 565 index: int(nextLvlCount), 566 updatePending: false, 567 } 568 569 index := int64(0) 570 for i := startCount; i < endCount; i++ { 571 entry := chunkLevel[lvl][i] 572 newEntry.subtreeSize += entry.subtreeSize 573 copy(newEntry.chunk[8+(index*self.hashSize):8+((index+1)*self.hashSize)], entry.key[:self.hashSize]) 574 index++ 575 } 576 577 self.enqueueTreeChunk(chunkLevel, newEntry, chunkWG, jobC, quitC, last) 578 579 } 580 581 } 582 583 if !isAppend { 584 chunkWG.Wait() 585 if compress { 586 chunkLevel[lvl] = nil 587 } 588 } 589 } 590 591 } 592 593 func (self *PyramidChunker) enqueueTreeChunk(chunkLevel [][]*TreeEntry, ent *TreeEntry, chunkWG *sync.WaitGroup, jobC chan *chunkJob, quitC chan bool, last bool) { 594 if ent != nil { 595 596 // wait for data chunks to get over before processing the tree chunk 597 if last { 598 chunkWG.Wait() 599 } 600 601 binary.LittleEndian.PutUint64(ent.chunk[:8], ent.subtreeSize) 602 ent.key = make([]byte, self.hashSize) 603 chunkWG.Add(1) 604 select { 605 case jobC <- &chunkJob{ent.key, ent.chunk[:ent.branchCount*self.hashSize+8], int64(ent.subtreeSize), chunkWG, TreeChunk, 0}: 606 case <-quitC: 607 } 608 609 // Update or append based on weather it is a new entry or being reused 610 if ent.updatePending { 611 chunkWG.Wait() 612 chunkLevel[ent.level][ent.index] = ent 613 } else { 614 chunkLevel[ent.level] = append(chunkLevel[ent.level], ent) 615 } 616 617 } 618 } 619 620 func (self *PyramidChunker) enqueueDataChunk(chunkData []byte, size uint64, parent *TreeEntry, chunkWG *sync.WaitGroup, jobC chan *chunkJob, quitC chan bool) Key { 621 binary.LittleEndian.PutUint64(chunkData[:8], size) 622 pkey := parent.chunk[8+parent.branchCount*self.hashSize : 8+(parent.branchCount+1)*self.hashSize] 623 624 chunkWG.Add(1) 625 select { 626 case jobC <- &chunkJob{pkey, chunkData[:size+8], int64(size), chunkWG, DataChunk, -1}: 627 case <-quitC: 628 } 629 630 return pkey 631 632 }