github.com/muhammedhassanm/blockchain@v0.0.0-20200120143007-697261defd4d/go-ethereum-master/swarm/storage/chunker.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 package storage 17 18 import ( 19 "context" 20 "encoding/binary" 21 "errors" 22 "fmt" 23 "io" 24 "sync" 25 "time" 26 27 "github.com/ethereum/go-ethereum/metrics" 28 "github.com/ethereum/go-ethereum/swarm/log" 29 ) 30 31 /* 32 The distributed storage implemented in this package requires fix sized chunks of content. 33 34 Chunker is the interface to a component that is responsible for disassembling and assembling larger data. 35 36 TreeChunker implements a Chunker based on a tree structure defined as follows: 37 38 1 each node in the tree including the root and other branching nodes are stored as a chunk. 39 40 2 branching nodes encode data contents that includes the size of the dataslice covered by its entire subtree under the node as well as the hash keys of all its children : 41 data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1} 42 43 3 Leaf nodes encode an actual subslice of the input data. 44 45 4 if data size is not more than maximum chunksize, the data is stored in a single chunk 46 key = hash(int64(size) + data) 47 48 5 if data size is more than chunksize*branches^l, but no more than chunksize* 49 branches^(l+1), the data vector is split into slices of chunksize* 50 branches^l length (except the last one). 51 key = hash(int64(size) + key(slice0) + key(slice1) + ...) 52 53 The underlying hash function is configurable 54 */ 55 56 /* 57 Tree chunker is a concrete implementation of data chunking. 58 This chunker works in a simple way, it builds a tree out of the document so that each node either represents a chunk of real data or a chunk of data representing an branching non-leaf node of the tree. In particular each such non-leaf chunk will represent is a concatenation of the hash of its respective children. This scheme simultaneously guarantees data integrity as well as self addressing. Abstract nodes are transparent since their represented size component is strictly greater than their maximum data size, since they encode a subtree. 59 60 If all is well it is possible to implement this by simply composing readers so that no extra allocation or buffering is necessary for the data splitting and joining. This means that in principle there can be direct IO between : memory, file system, network socket (bzz peers storage request is read from the socket). In practice there may be need for several stages of internal buffering. 61 The hashing itself does use extra copies and allocation though, since it does need it. 62 */ 63 64 var ( 65 errAppendOppNotSuported = errors.New("Append operation not supported") 66 errOperationTimedOut = errors.New("operation timed out") 67 ) 68 69 const ( 70 DefaultChunkSize int64 = 4096 71 ) 72 73 type ChunkerParams struct { 74 chunkSize int64 75 hashSize int64 76 } 77 78 type SplitterParams struct { 79 ChunkerParams 80 reader io.Reader 81 putter Putter 82 addr Address 83 } 84 85 type TreeSplitterParams struct { 86 SplitterParams 87 size int64 88 } 89 90 type JoinerParams struct { 91 ChunkerParams 92 addr Address 93 getter Getter 94 // TODO: there is a bug, so depth can only be 0 today, see: https://github.com/ethersphere/go-ethereum/issues/344 95 depth int 96 } 97 98 type TreeChunker struct { 99 branches int64 100 hashFunc SwarmHasher 101 dataSize int64 102 data io.Reader 103 // calculated 104 addr Address 105 depth int 106 hashSize int64 // self.hashFunc.New().Size() 107 chunkSize int64 // hashSize* branches 108 workerCount int64 // the number of worker routines used 109 workerLock sync.RWMutex // lock for the worker count 110 jobC chan *hashJob 111 wg *sync.WaitGroup 112 putter Putter 113 getter Getter 114 errC chan error 115 quitC chan bool 116 } 117 118 /* 119 Join reconstructs original content based on a root key. 120 When joining, the caller gets returned a Lazy SectionReader, which is 121 seekable and implements on-demand fetching of chunks as and where it is read. 122 New chunks to retrieve are coming from the getter, which the caller provides. 123 If an error is encountered during joining, it appears as a reader error. 124 The SectionReader. 125 As a result, partial reads from a document are possible even if other parts 126 are corrupt or lost. 127 The chunks are not meant to be validated by the chunker when joining. This 128 is because it is left to the DPA to decide which sources are trusted. 129 */ 130 func TreeJoin(ctx context.Context, addr Address, getter Getter, depth int) *LazyChunkReader { 131 jp := &JoinerParams{ 132 ChunkerParams: ChunkerParams{ 133 chunkSize: DefaultChunkSize, 134 hashSize: int64(len(addr)), 135 }, 136 addr: addr, 137 getter: getter, 138 depth: depth, 139 } 140 141 return NewTreeJoiner(jp).Join(ctx) 142 } 143 144 /* 145 When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Key), the root hash of the entire content will fill this once processing finishes. 146 New chunks to store are store using the putter which the caller provides. 147 */ 148 func TreeSplit(ctx context.Context, data io.Reader, size int64, putter Putter) (k Address, wait func(context.Context) error, err error) { 149 tsp := &TreeSplitterParams{ 150 SplitterParams: SplitterParams{ 151 ChunkerParams: ChunkerParams{ 152 chunkSize: DefaultChunkSize, 153 hashSize: putter.RefSize(), 154 }, 155 reader: data, 156 putter: putter, 157 }, 158 size: size, 159 } 160 return NewTreeSplitter(tsp).Split(ctx) 161 } 162 163 func NewTreeJoiner(params *JoinerParams) *TreeChunker { 164 tc := &TreeChunker{} 165 tc.hashSize = params.hashSize 166 tc.branches = params.chunkSize / params.hashSize 167 tc.addr = params.addr 168 tc.getter = params.getter 169 tc.depth = params.depth 170 tc.chunkSize = params.chunkSize 171 tc.workerCount = 0 172 tc.jobC = make(chan *hashJob, 2*ChunkProcessors) 173 tc.wg = &sync.WaitGroup{} 174 tc.errC = make(chan error) 175 tc.quitC = make(chan bool) 176 177 return tc 178 } 179 180 func NewTreeSplitter(params *TreeSplitterParams) *TreeChunker { 181 tc := &TreeChunker{} 182 tc.data = params.reader 183 tc.dataSize = params.size 184 tc.hashSize = params.hashSize 185 tc.branches = params.chunkSize / params.hashSize 186 tc.addr = params.addr 187 tc.chunkSize = params.chunkSize 188 tc.putter = params.putter 189 tc.workerCount = 0 190 tc.jobC = make(chan *hashJob, 2*ChunkProcessors) 191 tc.wg = &sync.WaitGroup{} 192 tc.errC = make(chan error) 193 tc.quitC = make(chan bool) 194 195 return tc 196 } 197 198 // String() for pretty printing 199 func (c *Chunk) String() string { 200 return fmt.Sprintf("Key: %v TreeSize: %v Chunksize: %v", c.Addr.Log(), c.Size, len(c.SData)) 201 } 202 203 type hashJob struct { 204 key Address 205 chunk []byte 206 size int64 207 parentWg *sync.WaitGroup 208 } 209 210 func (tc *TreeChunker) incrementWorkerCount() { 211 tc.workerLock.Lock() 212 defer tc.workerLock.Unlock() 213 tc.workerCount += 1 214 } 215 216 func (tc *TreeChunker) getWorkerCount() int64 { 217 tc.workerLock.RLock() 218 defer tc.workerLock.RUnlock() 219 return tc.workerCount 220 } 221 222 func (tc *TreeChunker) decrementWorkerCount() { 223 tc.workerLock.Lock() 224 defer tc.workerLock.Unlock() 225 tc.workerCount -= 1 226 } 227 228 func (tc *TreeChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 229 if tc.chunkSize <= 0 { 230 panic("chunker must be initialised") 231 } 232 233 tc.runWorker() 234 235 depth := 0 236 treeSize := tc.chunkSize 237 238 // takes lowest depth such that chunksize*HashCount^(depth+1) > size 239 // power series, will find the order of magnitude of the data size in base hashCount or numbers of levels of branching in the resulting tree. 240 for ; treeSize < tc.dataSize; treeSize *= tc.branches { 241 depth++ 242 } 243 244 key := make([]byte, tc.hashSize) 245 // this waitgroup member is released after the root hash is calculated 246 tc.wg.Add(1) 247 //launch actual recursive function passing the waitgroups 248 go tc.split(depth, treeSize/tc.branches, key, tc.dataSize, tc.wg) 249 250 // closes internal error channel if all subprocesses in the workgroup finished 251 go func() { 252 // waiting for all threads to finish 253 tc.wg.Wait() 254 close(tc.errC) 255 }() 256 257 defer close(tc.quitC) 258 defer tc.putter.Close() 259 select { 260 case err := <-tc.errC: 261 if err != nil { 262 return nil, nil, err 263 } 264 case <-time.NewTimer(splitTimeout).C: 265 return nil, nil, errOperationTimedOut 266 } 267 268 return key, tc.putter.Wait, nil 269 } 270 271 func (tc *TreeChunker) split(depth int, treeSize int64, addr Address, size int64, parentWg *sync.WaitGroup) { 272 273 // 274 275 for depth > 0 && size < treeSize { 276 treeSize /= tc.branches 277 depth-- 278 } 279 280 if depth == 0 { 281 // leaf nodes -> content chunks 282 chunkData := make([]byte, size+8) 283 binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size)) 284 var readBytes int64 285 for readBytes < size { 286 n, err := tc.data.Read(chunkData[8+readBytes:]) 287 readBytes += int64(n) 288 if err != nil && !(err == io.EOF && readBytes == size) { 289 tc.errC <- err 290 return 291 } 292 } 293 select { 294 case tc.jobC <- &hashJob{addr, chunkData, size, parentWg}: 295 case <-tc.quitC: 296 } 297 return 298 } 299 // dept > 0 300 // intermediate chunk containing child nodes hashes 301 branchCnt := (size + treeSize - 1) / treeSize 302 303 var chunk = make([]byte, branchCnt*tc.hashSize+8) 304 var pos, i int64 305 306 binary.LittleEndian.PutUint64(chunk[0:8], uint64(size)) 307 308 childrenWg := &sync.WaitGroup{} 309 var secSize int64 310 for i < branchCnt { 311 // the last item can have shorter data 312 if size-pos < treeSize { 313 secSize = size - pos 314 } else { 315 secSize = treeSize 316 } 317 // the hash of that data 318 subTreeKey := chunk[8+i*tc.hashSize : 8+(i+1)*tc.hashSize] 319 320 childrenWg.Add(1) 321 tc.split(depth-1, treeSize/tc.branches, subTreeKey, secSize, childrenWg) 322 323 i++ 324 pos += treeSize 325 } 326 // wait for all the children to complete calculating their hashes and copying them onto sections of the chunk 327 // parentWg.Add(1) 328 // go func() { 329 childrenWg.Wait() 330 331 worker := tc.getWorkerCount() 332 if int64(len(tc.jobC)) > worker && worker < ChunkProcessors { 333 tc.runWorker() 334 335 } 336 select { 337 case tc.jobC <- &hashJob{addr, chunk, size, parentWg}: 338 case <-tc.quitC: 339 } 340 } 341 342 func (tc *TreeChunker) runWorker() { 343 tc.incrementWorkerCount() 344 go func() { 345 defer tc.decrementWorkerCount() 346 for { 347 select { 348 349 case job, ok := <-tc.jobC: 350 if !ok { 351 return 352 } 353 354 h, err := tc.putter.Put(job.chunk) 355 if err != nil { 356 tc.errC <- err 357 return 358 } 359 copy(job.key, h) 360 job.parentWg.Done() 361 case <-tc.quitC: 362 return 363 } 364 } 365 }() 366 } 367 368 func (tc *TreeChunker) Append() (Address, func(), error) { 369 return nil, nil, errAppendOppNotSuported 370 } 371 372 // LazyChunkReader implements LazySectionReader 373 type LazyChunkReader struct { 374 key Address // root key 375 chunkData ChunkData 376 off int64 // offset 377 chunkSize int64 // inherit from chunker 378 branches int64 // inherit from chunker 379 hashSize int64 // inherit from chunker 380 depth int 381 getter Getter 382 } 383 384 func (tc *TreeChunker) Join(ctx context.Context) *LazyChunkReader { 385 return &LazyChunkReader{ 386 key: tc.addr, 387 chunkSize: tc.chunkSize, 388 branches: tc.branches, 389 hashSize: tc.hashSize, 390 depth: tc.depth, 391 getter: tc.getter, 392 } 393 } 394 395 // Size is meant to be called on the LazySectionReader 396 func (r *LazyChunkReader) Size(quitC chan bool) (n int64, err error) { 397 metrics.GetOrRegisterCounter("lazychunkreader.size", nil).Inc(1) 398 399 log.Debug("lazychunkreader.size", "key", r.key) 400 if r.chunkData == nil { 401 chunkData, err := r.getter.Get(Reference(r.key)) 402 if err != nil { 403 return 0, err 404 } 405 if chunkData == nil { 406 select { 407 case <-quitC: 408 return 0, errors.New("aborted") 409 default: 410 return 0, fmt.Errorf("root chunk not found for %v", r.key.Hex()) 411 } 412 } 413 r.chunkData = chunkData 414 } 415 return r.chunkData.Size(), nil 416 } 417 418 // read at can be called numerous times 419 // concurrent reads are allowed 420 // Size() needs to be called synchronously on the LazyChunkReader first 421 func (r *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) { 422 metrics.GetOrRegisterCounter("lazychunkreader.readat", nil).Inc(1) 423 424 // this is correct, a swarm doc cannot be zero length, so no EOF is expected 425 if len(b) == 0 { 426 return 0, nil 427 } 428 quitC := make(chan bool) 429 size, err := r.Size(quitC) 430 if err != nil { 431 log.Error("lazychunkreader.readat.size", "size", size, "err", err) 432 return 0, err 433 } 434 435 errC := make(chan error) 436 437 // } 438 var treeSize int64 439 var depth int 440 // calculate depth and max treeSize 441 treeSize = r.chunkSize 442 for ; treeSize < size; treeSize *= r.branches { 443 depth++ 444 } 445 wg := sync.WaitGroup{} 446 length := int64(len(b)) 447 for d := 0; d < r.depth; d++ { 448 off *= r.chunkSize 449 length *= r.chunkSize 450 } 451 wg.Add(1) 452 go r.join(b, off, off+length, depth, treeSize/r.branches, r.chunkData, &wg, errC, quitC) 453 go func() { 454 wg.Wait() 455 close(errC) 456 }() 457 458 err = <-errC 459 if err != nil { 460 log.Error("lazychunkreader.readat.errc", "err", err) 461 close(quitC) 462 return 0, err 463 } 464 if off+int64(len(b)) >= size { 465 return int(size - off), io.EOF 466 } 467 return len(b), nil 468 } 469 470 func (r *LazyChunkReader) join(b []byte, off int64, eoff int64, depth int, treeSize int64, chunkData ChunkData, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) { 471 defer parentWg.Done() 472 // find appropriate block level 473 for chunkData.Size() < treeSize && depth > r.depth { 474 treeSize /= r.branches 475 depth-- 476 } 477 478 // leaf chunk found 479 if depth == r.depth { 480 extra := 8 + eoff - int64(len(chunkData)) 481 if extra > 0 { 482 eoff -= extra 483 } 484 copy(b, chunkData[8+off:8+eoff]) 485 return // simply give back the chunks reader for content chunks 486 } 487 488 // subtree 489 start := off / treeSize 490 end := (eoff + treeSize - 1) / treeSize 491 492 // last non-leaf chunk can be shorter than default chunk size, let's not read it further then its end 493 currentBranches := int64(len(chunkData)-8) / r.hashSize 494 if end > currentBranches { 495 end = currentBranches 496 } 497 498 wg := &sync.WaitGroup{} 499 defer wg.Wait() 500 for i := start; i < end; i++ { 501 soff := i * treeSize 502 roff := soff 503 seoff := soff + treeSize 504 505 if soff < off { 506 soff = off 507 } 508 if seoff > eoff { 509 seoff = eoff 510 } 511 if depth > 1 { 512 wg.Wait() 513 } 514 wg.Add(1) 515 go func(j int64) { 516 childKey := chunkData[8+j*r.hashSize : 8+(j+1)*r.hashSize] 517 chunkData, err := r.getter.Get(Reference(childKey)) 518 if err != nil { 519 log.Error("lazychunkreader.join", "key", fmt.Sprintf("%x", childKey), "err", err) 520 select { 521 case errC <- fmt.Errorf("chunk %v-%v not found; key: %s", off, off+treeSize, fmt.Sprintf("%x", childKey)): 522 case <-quitC: 523 } 524 return 525 } 526 if l := len(chunkData); l < 9 { 527 select { 528 case errC <- fmt.Errorf("chunk %v-%v incomplete; key: %s, data length %v", off, off+treeSize, fmt.Sprintf("%x", childKey), l): 529 case <-quitC: 530 } 531 return 532 } 533 if soff < off { 534 soff = off 535 } 536 r.join(b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/r.branches, chunkData, wg, errC, quitC) 537 }(i) 538 } //for 539 } 540 541 // Read keeps a cursor so cannot be called simulateously, see ReadAt 542 func (r *LazyChunkReader) Read(b []byte) (read int, err error) { 543 log.Debug("lazychunkreader.read", "key", r.key) 544 metrics.GetOrRegisterCounter("lazychunkreader.read", nil).Inc(1) 545 546 read, err = r.ReadAt(b, r.off) 547 if err != nil && err != io.EOF { 548 log.Error("lazychunkreader.readat", "read", read, "err", err) 549 metrics.GetOrRegisterCounter("lazychunkreader.read.err", nil).Inc(1) 550 } 551 552 metrics.GetOrRegisterCounter("lazychunkreader.read.bytes", nil).Inc(int64(read)) 553 554 r.off += int64(read) 555 return 556 } 557 558 // completely analogous to standard SectionReader implementation 559 var errWhence = errors.New("Seek: invalid whence") 560 var errOffset = errors.New("Seek: invalid offset") 561 562 func (r *LazyChunkReader) Seek(offset int64, whence int) (int64, error) { 563 log.Debug("lazychunkreader.seek", "key", r.key, "offset", offset) 564 switch whence { 565 default: 566 return 0, errWhence 567 case 0: 568 offset += 0 569 case 1: 570 offset += r.off 571 case 2: 572 if r.chunkData == nil { //seek from the end requires rootchunk for size. call Size first 573 _, err := r.Size(nil) 574 if err != nil { 575 return 0, fmt.Errorf("can't get size: %v", err) 576 } 577 } 578 offset += r.chunkData.Size() 579 } 580 581 if offset < 0 { 582 return 0, errOffset 583 } 584 r.off = offset 585 return offset, nil 586 }