github.com/SmartMeshFoundation/Spectrum@v0.0.0-20220621030607-452a266fee1e/swarm/storage/chunker.go (about) 1 // Copyright 2016 The Spectrum Authors 2 // This file is part of the Spectrum library. 3 // 4 // The Spectrum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The Spectrum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the Spectrum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package storage 18 19 import ( 20 "encoding/binary" 21 "errors" 22 "fmt" 23 "io" 24 "sync" 25 "time" 26 ) 27 28 /* 29 The distributed storage implemented in this package requires fix sized chunks of content. 30 31 Chunker is the interface to a component that is responsible for disassembling and assembling larger data. 32 33 TreeChunker implements a Chunker based on a tree structure defined as follows: 34 35 1 each node in the tree including the root and other branching nodes are stored as a chunk. 36 37 2 branching nodes encode data contents that includes the size of the dataslice covered by its entire subtree under the node as well as the hash keys of all its children : 38 data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1} 39 40 3 Leaf nodes encode an actual subslice of the input data. 41 42 4 if data size is not more than maximum chunksize, the data is stored in a single chunk 43 key = hash(int64(size) + data) 44 45 5 if data size is more than chunksize*branches^l, but no more than chunksize* 46 branches^(l+1), the data vector is split into slices of chunksize* 47 branches^l length (except the last one). 48 key = hash(int64(size) + key(slice0) + key(slice1) + ...) 49 50 The underlying hash function is configurable 51 */ 52 53 /* 54 Tree chunker is a concrete implementation of data chunking. 55 This chunker works in a simple way, it builds a tree out of the document so that each node either represents a chunk of real data or a chunk of data representing an branching non-leaf node of the tree. In particular each such non-leaf chunk will represent is a concatenation of the hash of its respective children. This scheme simultaneously guarantees data integrity as well as self addressing. Abstract nodes are transparent since their represented size component is strictly greater than their maximum data size, since they encode a subtree. 56 57 If all is well it is possible to implement this by simply composing readers so that no extra allocation or buffering is necessary for the data splitting and joining. This means that in principle there can be direct IO between : memory, file system, network socket (bzz peers storage request is read from the socket). In practice there may be need for several stages of internal buffering. 58 The hashing itself does use extra copies and allocation though, since it does need it. 59 */ 60 61 var ( 62 errAppendOppNotSuported = errors.New("Append operation not supported") 63 errOperationTimedOut = errors.New("operation timed out") 64 ) 65 66 type TreeChunker struct { 67 branches int64 68 hashFunc SwarmHasher 69 // calculated 70 hashSize int64 // self.hashFunc.New().Size() 71 chunkSize int64 // hashSize* branches 72 workerCount int64 // the number of worker routines used 73 workerLock sync.RWMutex // lock for the worker count 74 } 75 76 func NewTreeChunker(params *ChunkerParams) (self *TreeChunker) { 77 self = &TreeChunker{} 78 self.hashFunc = MakeHashFunc(params.Hash) 79 self.branches = params.Branches 80 self.hashSize = int64(self.hashFunc().Size()) 81 self.chunkSize = self.hashSize * self.branches 82 self.workerCount = 0 83 84 return 85 } 86 87 // func (self *TreeChunker) KeySize() int64 { 88 // return self.hashSize 89 // } 90 91 // String() for pretty printing 92 func (self *Chunk) String() string { 93 return fmt.Sprintf("Key: %v TreeSize: %v Chunksize: %v", self.Key.Log(), self.Size, len(self.SData)) 94 } 95 96 type hashJob struct { 97 key Key 98 chunk []byte 99 size int64 100 parentWg *sync.WaitGroup 101 } 102 103 func (self *TreeChunker) incrementWorkerCount() { 104 self.workerLock.Lock() 105 defer self.workerLock.Unlock() 106 self.workerCount += 1 107 } 108 109 func (self *TreeChunker) getWorkerCount() int64 { 110 self.workerLock.RLock() 111 defer self.workerLock.RUnlock() 112 return self.workerCount 113 } 114 115 func (self *TreeChunker) decrementWorkerCount() { 116 self.workerLock.Lock() 117 defer self.workerLock.Unlock() 118 self.workerCount -= 1 119 } 120 121 func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) { 122 if self.chunkSize <= 0 { 123 panic("chunker must be initialised") 124 } 125 126 jobC := make(chan *hashJob, 2*ChunkProcessors) 127 wg := &sync.WaitGroup{} 128 errC := make(chan error) 129 quitC := make(chan bool) 130 131 // wwg = workers waitgroup keeps track of hashworkers spawned by this split call 132 if wwg != nil { 133 wwg.Add(1) 134 } 135 136 self.incrementWorkerCount() 137 go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg) 138 139 depth := 0 140 treeSize := self.chunkSize 141 142 // takes lowest depth such that chunksize*HashCount^(depth+1) > size 143 // power series, will find the order of magnitude of the data size in base hashCount or numbers of levels of branching in the resulting tree. 144 for ; treeSize < size; treeSize *= self.branches { 145 depth++ 146 } 147 148 key := make([]byte, self.hashFunc().Size()) 149 // this waitgroup member is released after the root hash is calculated 150 wg.Add(1) 151 //launch actual recursive function passing the waitgroups 152 go self.split(depth, treeSize/self.branches, key, data, size, jobC, chunkC, errC, quitC, wg, swg, wwg) 153 154 // closes internal error channel if all subprocesses in the workgroup finished 155 go func() { 156 // waiting for all threads to finish 157 wg.Wait() 158 // if storage waitgroup is non-nil, we wait for storage to finish too 159 if swg != nil { 160 swg.Wait() 161 } 162 close(errC) 163 }() 164 165 defer close(quitC) 166 select { 167 case err := <-errC: 168 if err != nil { 169 return nil, err 170 } 171 case <-time.NewTimer(splitTimeout).C: 172 return nil, errOperationTimedOut 173 } 174 175 return key, nil 176 } 177 178 func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reader, size int64, jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, parentWg, swg, wwg *sync.WaitGroup) { 179 180 // 181 182 for depth > 0 && size < treeSize { 183 treeSize /= self.branches 184 depth-- 185 } 186 187 if depth == 0 { 188 // leaf nodes -> content chunks 189 chunkData := make([]byte, size+8) 190 binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size)) 191 var readBytes int64 192 for readBytes < size { 193 n, err := data.Read(chunkData[8+readBytes:]) 194 readBytes += int64(n) 195 if err != nil && !(err == io.EOF && readBytes == size) { 196 errC <- err 197 return 198 } 199 } 200 select { 201 case jobC <- &hashJob{key, chunkData, size, parentWg}: 202 case <-quitC: 203 } 204 return 205 } 206 // dept > 0 207 // intermediate chunk containing child nodes hashes 208 branchCnt := (size + treeSize - 1) / treeSize 209 210 var chunk = make([]byte, branchCnt*self.hashSize+8) 211 var pos, i int64 212 213 binary.LittleEndian.PutUint64(chunk[0:8], uint64(size)) 214 215 childrenWg := &sync.WaitGroup{} 216 var secSize int64 217 for i < branchCnt { 218 // the last item can have shorter data 219 if size-pos < treeSize { 220 secSize = size - pos 221 } else { 222 secSize = treeSize 223 } 224 // the hash of that data 225 subTreeKey := chunk[8+i*self.hashSize : 8+(i+1)*self.hashSize] 226 227 childrenWg.Add(1) 228 self.split(depth-1, treeSize/self.branches, subTreeKey, data, secSize, jobC, chunkC, errC, quitC, childrenWg, swg, wwg) 229 230 i++ 231 pos += treeSize 232 } 233 // wait for all the children to complete calculating their hashes and copying them onto sections of the chunk 234 // parentWg.Add(1) 235 // go func() { 236 childrenWg.Wait() 237 238 worker := self.getWorkerCount() 239 if int64(len(jobC)) > worker && worker < ChunkProcessors { 240 if wwg != nil { 241 wwg.Add(1) 242 } 243 self.incrementWorkerCount() 244 go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg) 245 246 } 247 select { 248 case jobC <- &hashJob{key, chunk, size, parentWg}: 249 case <-quitC: 250 } 251 } 252 253 func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, swg, wwg *sync.WaitGroup) { 254 defer self.decrementWorkerCount() 255 256 hasher := self.hashFunc() 257 if wwg != nil { 258 defer wwg.Done() 259 } 260 for { 261 select { 262 263 case job, ok := <-jobC: 264 if !ok { 265 return 266 } 267 // now we got the hashes in the chunk, then hash the chunks 268 self.hashChunk(hasher, job, chunkC, swg) 269 case <-quitC: 270 return 271 } 272 } 273 } 274 275 // The treeChunkers own Hash hashes together 276 // - the size (of the subtree encoded in the Chunk) 277 // - the Chunk, ie. the contents read from the input reader 278 func (self *TreeChunker) hashChunk(hasher SwarmHash, job *hashJob, chunkC chan *Chunk, swg *sync.WaitGroup) { 279 hasher.ResetWithLength(job.chunk[:8]) // 8 bytes of length 280 hasher.Write(job.chunk[8:]) // minus 8 []byte length 281 h := hasher.Sum(nil) 282 283 newChunk := &Chunk{ 284 Key: h, 285 SData: job.chunk, 286 Size: job.size, 287 wg: swg, 288 } 289 290 // report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk) 291 copy(job.key, h) 292 // send off new chunk to storage 293 if chunkC != nil { 294 if swg != nil { 295 swg.Add(1) 296 } 297 } 298 job.parentWg.Done() 299 300 if chunkC != nil { 301 chunkC <- newChunk 302 } 303 } 304 305 func (self *TreeChunker) Append(key Key, data io.Reader, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) { 306 return nil, errAppendOppNotSuported 307 } 308 309 // LazyChunkReader implements LazySectionReader 310 type LazyChunkReader struct { 311 key Key // root key 312 chunkC chan *Chunk // chunk channel to send retrieve requests on 313 chunk *Chunk // size of the entire subtree 314 off int64 // offset 315 chunkSize int64 // inherit from chunker 316 branches int64 // inherit from chunker 317 hashSize int64 // inherit from chunker 318 } 319 320 // implements the Joiner interface 321 func (self *TreeChunker) Join(key Key, chunkC chan *Chunk) LazySectionReader { 322 return &LazyChunkReader{ 323 key: key, 324 chunkC: chunkC, 325 chunkSize: self.chunkSize, 326 branches: self.branches, 327 hashSize: self.hashSize, 328 } 329 } 330 331 // Size is meant to be called on the LazySectionReader 332 func (self *LazyChunkReader) Size(quitC chan bool) (n int64, err error) { 333 if self.chunk != nil { 334 return self.chunk.Size, nil 335 } 336 chunk := retrieve(self.key, self.chunkC, quitC) 337 if chunk == nil { 338 select { 339 case <-quitC: 340 return 0, errors.New("aborted") 341 default: 342 return 0, fmt.Errorf("root chunk not found for %v", self.key.Hex()) 343 } 344 } 345 self.chunk = chunk 346 return chunk.Size, nil 347 } 348 349 // read at can be called numerous times 350 // concurrent reads are allowed 351 // Size() needs to be called synchronously on the LazyChunkReader first 352 func (self *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) { 353 // this is correct, a swarm doc cannot be zero length, so no EOF is expected 354 if len(b) == 0 { 355 return 0, nil 356 } 357 quitC := make(chan bool) 358 size, err := self.Size(quitC) 359 if err != nil { 360 return 0, err 361 } 362 363 errC := make(chan error) 364 365 // } 366 var treeSize int64 367 var depth int 368 // calculate depth and max treeSize 369 treeSize = self.chunkSize 370 for ; treeSize < size; treeSize *= self.branches { 371 depth++ 372 } 373 wg := sync.WaitGroup{} 374 wg.Add(1) 375 go self.join(b, off, off+int64(len(b)), depth, treeSize/self.branches, self.chunk, &wg, errC, quitC) 376 go func() { 377 wg.Wait() 378 close(errC) 379 }() 380 381 err = <-errC 382 if err != nil { 383 close(quitC) 384 385 return 0, err 386 } 387 if off+int64(len(b)) >= size { 388 return len(b), io.EOF 389 } 390 return len(b), nil 391 } 392 393 func (self *LazyChunkReader) join(b []byte, off int64, eoff int64, depth int, treeSize int64, chunk *Chunk, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) { 394 defer parentWg.Done() 395 // return NewDPA(&LocalStore{}) 396 397 // chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8])) 398 399 // find appropriate block level 400 for chunk.Size < treeSize && depth > 0 { 401 treeSize /= self.branches 402 depth-- 403 } 404 405 // leaf chunk found 406 if depth == 0 { 407 extra := 8 + eoff - int64(len(chunk.SData)) 408 if extra > 0 { 409 eoff -= extra 410 } 411 copy(b, chunk.SData[8+off:8+eoff]) 412 return // simply give back the chunks reader for content chunks 413 } 414 415 // subtree 416 start := off / treeSize 417 end := (eoff + treeSize - 1) / treeSize 418 419 wg := &sync.WaitGroup{} 420 defer wg.Wait() 421 422 for i := start; i < end; i++ { 423 soff := i * treeSize 424 roff := soff 425 seoff := soff + treeSize 426 427 if soff < off { 428 soff = off 429 } 430 if seoff > eoff { 431 seoff = eoff 432 } 433 if depth > 1 { 434 wg.Wait() 435 } 436 wg.Add(1) 437 go func(j int64) { 438 childKey := chunk.SData[8+j*self.hashSize : 8+(j+1)*self.hashSize] 439 chunk := retrieve(childKey, self.chunkC, quitC) 440 if chunk == nil { 441 select { 442 case errC <- fmt.Errorf("chunk %v-%v not found", off, off+treeSize): 443 case <-quitC: 444 } 445 return 446 } 447 if soff < off { 448 soff = off 449 } 450 self.join(b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/self.branches, chunk, wg, errC, quitC) 451 }(i) 452 } //for 453 } 454 455 // the helper method submits chunks for a key to a oueue (DPA) and 456 // block until they time out or arrive 457 // abort if quitC is readable 458 func retrieve(key Key, chunkC chan *Chunk, quitC chan bool) *Chunk { 459 chunk := &Chunk{ 460 Key: key, 461 C: make(chan bool), // close channel to signal data delivery 462 } 463 // submit chunk for retrieval 464 select { 465 case chunkC <- chunk: // submit retrieval request, someone should be listening on the other side (or we will time out globally) 466 case <-quitC: 467 return nil 468 } 469 // waiting for the chunk retrieval 470 select { // chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8])) 471 472 case <-quitC: 473 // this is how we control process leakage (quitC is closed once join is finished (after timeout)) 474 return nil 475 case <-chunk.C: // bells are ringing, data have been delivered 476 } 477 if len(chunk.SData) == 0 { 478 return nil // chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8])) 479 480 } 481 return chunk 482 } 483 484 // Read keeps a cursor so cannot be called simulateously, see ReadAt 485 func (self *LazyChunkReader) Read(b []byte) (read int, err error) { 486 read, err = self.ReadAt(b, self.off) 487 488 self.off += int64(read) 489 return 490 } 491 492 // completely analogous to standard SectionReader implementation 493 var errWhence = errors.New("Seek: invalid whence") 494 var errOffset = errors.New("Seek: invalid offset") 495 496 func (s *LazyChunkReader) Seek(offset int64, whence int) (int64, error) { 497 switch whence { 498 default: 499 return 0, errWhence 500 case 0: 501 offset += 0 502 case 1: 503 offset += s.off 504 case 2: 505 if s.chunk == nil { //seek from the end requires rootchunk for size. call Size first 506 _, err := s.Size(nil) 507 if err != nil { 508 return 0, fmt.Errorf("can't get size: %v", err) 509 } 510 } 511 offset += s.chunk.Size 512 } 513 514 if offset < 0 { 515 return 0, errOffset 516 } 517 s.off = offset 518 return offset, nil 519 }