github.com/FUSIONFoundation/efsn@v3.6.2-0.20200916075423-dbb5dd5d2cc7+incompatible/swarm/storage/chunker.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 package storage 17 18 import ( 19 "context" 20 "encoding/binary" 21 "errors" 22 "fmt" 23 "io" 24 "sync" 25 26 "github.com/FusionFoundation/efsn/metrics" 27 ch "github.com/FusionFoundation/efsn/swarm/chunk" 28 "github.com/FusionFoundation/efsn/swarm/log" 29 "github.com/FusionFoundation/efsn/swarm/spancontext" 30 opentracing "github.com/opentracing/opentracing-go" 31 olog "github.com/opentracing/opentracing-go/log" 32 ) 33 34 /* 35 The distributed storage implemented in this package requires fix sized chunks of content. 36 37 Chunker is the interface to a component that is responsible for disassembling and assembling larger data. 38 39 TreeChunker implements a Chunker based on a tree structure defined as follows: 40 41 1 each node in the tree including the root and other branching nodes are stored as a chunk. 42 43 2 branching nodes encode data contents that includes the size of the dataslice covered by its entire subtree under the node as well as the hash keys of all its children : 44 data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1} 45 46 3 Leaf nodes encode an actual subslice of the input data. 47 48 4 if data size is not more than maximum chunksize, the data is stored in a single chunk 49 key = hash(int64(size) + data) 50 51 5 if data size is more than chunksize*branches^l, but no more than chunksize* 52 branches^(l+1), the data vector is split into slices of chunksize* 53 branches^l length (except the last one). 54 key = hash(int64(size) + key(slice0) + key(slice1) + ...) 55 56 The underlying hash function is configurable 57 */ 58 59 /* 60 Tree chunker is a concrete implementation of data chunking. 61 This chunker works in a simple way, it builds a tree out of the document so that each node either represents a chunk of real data or a chunk of data representing an branching non-leaf node of the tree. In particular each such non-leaf chunk will represent is a concatenation of the hash of its respective children. This scheme simultaneously guarantees data integrity as well as self addressing. Abstract nodes are transparent since their represented size component is strictly greater than their maximum data size, since they encode a subtree. 62 63 If all is well it is possible to implement this by simply composing readers so that no extra allocation or buffering is necessary for the data splitting and joining. This means that in principle there can be direct IO between : memory, file system, network socket (bzz peers storage request is read from the socket). In practice there may be need for several stages of internal buffering. 64 The hashing itself does use extra copies and allocation though, since it does need it. 65 */ 66 67 var ( 68 errAppendOppNotSuported = errors.New("Append operation not supported") 69 ) 70 71 type ChunkerParams struct { 72 chunkSize int64 73 hashSize int64 74 } 75 76 type SplitterParams struct { 77 ChunkerParams 78 reader io.Reader 79 putter Putter 80 addr Address 81 } 82 83 type TreeSplitterParams struct { 84 SplitterParams 85 size int64 86 } 87 88 type JoinerParams struct { 89 ChunkerParams 90 addr Address 91 getter Getter 92 // TODO: there is a bug, so depth can only be 0 today, see: https://github.com/ethersphere/go-ethereum/issues/344 93 depth int 94 ctx context.Context 95 } 96 97 type TreeChunker struct { 98 ctx context.Context 99 100 branches int64 101 hashFunc SwarmHasher 102 dataSize int64 103 data io.Reader 104 // calculated 105 addr Address 106 depth int 107 hashSize int64 // self.hashFunc.New().Size() 108 chunkSize int64 // hashSize* branches 109 workerCount int64 // the number of worker routines used 110 workerLock sync.RWMutex // lock for the worker count 111 jobC chan *hashJob 112 wg *sync.WaitGroup 113 putter Putter 114 getter Getter 115 errC chan error 116 quitC chan bool 117 } 118 119 /* 120 Join reconstructs original content based on a root key. 121 When joining, the caller gets returned a Lazy SectionReader, which is 122 seekable and implements on-demand fetching of chunks as and where it is read. 123 New chunks to retrieve are coming from the getter, which the caller provides. 124 If an error is encountered during joining, it appears as a reader error. 125 The SectionReader. 126 As a result, partial reads from a document are possible even if other parts 127 are corrupt or lost. 128 The chunks are not meant to be validated by the chunker when joining. This 129 is because it is left to the DPA to decide which sources are trusted. 130 */ 131 func TreeJoin(ctx context.Context, addr Address, getter Getter, depth int) *LazyChunkReader { 132 jp := &JoinerParams{ 133 ChunkerParams: ChunkerParams{ 134 chunkSize: ch.DefaultSize, 135 hashSize: int64(len(addr)), 136 }, 137 addr: addr, 138 getter: getter, 139 depth: depth, 140 ctx: ctx, 141 } 142 143 return NewTreeJoiner(jp).Join(ctx) 144 } 145 146 /* 147 When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Key), the root hash of the entire content will fill this once processing finishes. 148 New chunks to store are store using the putter which the caller provides. 149 */ 150 func TreeSplit(ctx context.Context, data io.Reader, size int64, putter Putter) (k Address, wait func(context.Context) error, err error) { 151 tsp := &TreeSplitterParams{ 152 SplitterParams: SplitterParams{ 153 ChunkerParams: ChunkerParams{ 154 chunkSize: ch.DefaultSize, 155 hashSize: putter.RefSize(), 156 }, 157 reader: data, 158 putter: putter, 159 }, 160 size: size, 161 } 162 return NewTreeSplitter(tsp).Split(ctx) 163 } 164 165 func NewTreeJoiner(params *JoinerParams) *TreeChunker { 166 tc := &TreeChunker{} 167 tc.hashSize = params.hashSize 168 tc.branches = params.chunkSize / params.hashSize 169 tc.addr = params.addr 170 tc.getter = params.getter 171 tc.depth = params.depth 172 tc.chunkSize = params.chunkSize 173 tc.workerCount = 0 174 tc.jobC = make(chan *hashJob, 2*ChunkProcessors) 175 tc.wg = &sync.WaitGroup{} 176 tc.errC = make(chan error) 177 tc.quitC = make(chan bool) 178 179 tc.ctx = params.ctx 180 181 return tc 182 } 183 184 func NewTreeSplitter(params *TreeSplitterParams) *TreeChunker { 185 tc := &TreeChunker{} 186 tc.data = params.reader 187 tc.dataSize = params.size 188 tc.hashSize = params.hashSize 189 tc.branches = params.chunkSize / params.hashSize 190 tc.addr = params.addr 191 tc.chunkSize = params.chunkSize 192 tc.putter = params.putter 193 tc.workerCount = 0 194 tc.jobC = make(chan *hashJob, 2*ChunkProcessors) 195 tc.wg = &sync.WaitGroup{} 196 tc.errC = make(chan error) 197 tc.quitC = make(chan bool) 198 199 return tc 200 } 201 202 type hashJob struct { 203 key Address 204 chunk []byte 205 size int64 206 parentWg *sync.WaitGroup 207 } 208 209 func (tc *TreeChunker) incrementWorkerCount() { 210 tc.workerLock.Lock() 211 defer tc.workerLock.Unlock() 212 tc.workerCount += 1 213 } 214 215 func (tc *TreeChunker) getWorkerCount() int64 { 216 tc.workerLock.RLock() 217 defer tc.workerLock.RUnlock() 218 return tc.workerCount 219 } 220 221 func (tc *TreeChunker) decrementWorkerCount() { 222 tc.workerLock.Lock() 223 defer tc.workerLock.Unlock() 224 tc.workerCount -= 1 225 } 226 227 func (tc *TreeChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 228 if tc.chunkSize <= 0 { 229 panic("chunker must be initialised") 230 } 231 232 tc.runWorker(ctx) 233 234 depth := 0 235 treeSize := tc.chunkSize 236 237 // takes lowest depth such that chunksize*HashCount^(depth+1) > size 238 // power series, will find the order of magnitude of the data size in base hashCount or numbers of levels of branching in the resulting tree. 239 for ; treeSize < tc.dataSize; treeSize *= tc.branches { 240 depth++ 241 } 242 243 key := make([]byte, tc.hashSize) 244 // this waitgroup member is released after the root hash is calculated 245 tc.wg.Add(1) 246 //launch actual recursive function passing the waitgroups 247 go tc.split(ctx, depth, treeSize/tc.branches, key, tc.dataSize, tc.wg) 248 249 // closes internal error channel if all subprocesses in the workgroup finished 250 go func() { 251 // waiting for all threads to finish 252 tc.wg.Wait() 253 close(tc.errC) 254 }() 255 256 defer close(tc.quitC) 257 defer tc.putter.Close() 258 select { 259 case err := <-tc.errC: 260 if err != nil { 261 return nil, nil, err 262 } 263 case <-ctx.Done(): 264 return nil, nil, ctx.Err() 265 } 266 267 return key, tc.putter.Wait, nil 268 } 269 270 func (tc *TreeChunker) split(ctx context.Context, depth int, treeSize int64, addr Address, size int64, parentWg *sync.WaitGroup) { 271 272 // 273 274 for depth > 0 && size < treeSize { 275 treeSize /= tc.branches 276 depth-- 277 } 278 279 if depth == 0 { 280 // leaf nodes -> content chunks 281 chunkData := make([]byte, size+8) 282 binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size)) 283 var readBytes int64 284 for readBytes < size { 285 n, err := tc.data.Read(chunkData[8+readBytes:]) 286 readBytes += int64(n) 287 if err != nil && !(err == io.EOF && readBytes == size) { 288 tc.errC <- err 289 return 290 } 291 } 292 select { 293 case tc.jobC <- &hashJob{addr, chunkData, size, parentWg}: 294 case <-tc.quitC: 295 } 296 return 297 } 298 // dept > 0 299 // intermediate chunk containing child nodes hashes 300 branchCnt := (size + treeSize - 1) / treeSize 301 302 var chunk = make([]byte, branchCnt*tc.hashSize+8) 303 var pos, i int64 304 305 binary.LittleEndian.PutUint64(chunk[0:8], uint64(size)) 306 307 childrenWg := &sync.WaitGroup{} 308 var secSize int64 309 for i < branchCnt { 310 // the last item can have shorter data 311 if size-pos < treeSize { 312 secSize = size - pos 313 } else { 314 secSize = treeSize 315 } 316 // the hash of that data 317 subTreeAddress := chunk[8+i*tc.hashSize : 8+(i+1)*tc.hashSize] 318 319 childrenWg.Add(1) 320 tc.split(ctx, depth-1, treeSize/tc.branches, subTreeAddress, secSize, childrenWg) 321 322 i++ 323 pos += treeSize 324 } 325 // wait for all the children to complete calculating their hashes and copying them onto sections of the chunk 326 // parentWg.Add(1) 327 // go func() { 328 childrenWg.Wait() 329 330 worker := tc.getWorkerCount() 331 if int64(len(tc.jobC)) > worker && worker < ChunkProcessors { 332 tc.runWorker(ctx) 333 334 } 335 select { 336 case tc.jobC <- &hashJob{addr, chunk, size, parentWg}: 337 case <-tc.quitC: 338 } 339 } 340 341 func (tc *TreeChunker) runWorker(ctx context.Context) { 342 tc.incrementWorkerCount() 343 go func() { 344 defer tc.decrementWorkerCount() 345 for { 346 select { 347 348 case job, ok := <-tc.jobC: 349 if !ok { 350 return 351 } 352 353 h, err := tc.putter.Put(ctx, job.chunk) 354 if err != nil { 355 tc.errC <- err 356 return 357 } 358 copy(job.key, h) 359 job.parentWg.Done() 360 case <-tc.quitC: 361 return 362 } 363 } 364 }() 365 } 366 367 func (tc *TreeChunker) Append() (Address, func(), error) { 368 return nil, nil, errAppendOppNotSuported 369 } 370 371 // LazyChunkReader implements LazySectionReader 372 type LazyChunkReader struct { 373 ctx context.Context 374 addr Address // root address 375 chunkData ChunkData 376 off int64 // offset 377 chunkSize int64 // inherit from chunker 378 branches int64 // inherit from chunker 379 hashSize int64 // inherit from chunker 380 depth int 381 getter Getter 382 } 383 384 func (tc *TreeChunker) Join(ctx context.Context) *LazyChunkReader { 385 return &LazyChunkReader{ 386 addr: tc.addr, 387 chunkSize: tc.chunkSize, 388 branches: tc.branches, 389 hashSize: tc.hashSize, 390 depth: tc.depth, 391 getter: tc.getter, 392 ctx: tc.ctx, 393 } 394 } 395 396 func (r *LazyChunkReader) Context() context.Context { 397 return r.ctx 398 } 399 400 // Size is meant to be called on the LazySectionReader 401 func (r *LazyChunkReader) Size(ctx context.Context, quitC chan bool) (n int64, err error) { 402 metrics.GetOrRegisterCounter("lazychunkreader.size", nil).Inc(1) 403 404 var sp opentracing.Span 405 var cctx context.Context 406 cctx, sp = spancontext.StartSpan( 407 ctx, 408 "lcr.size") 409 defer sp.Finish() 410 411 log.Debug("lazychunkreader.size", "addr", r.addr) 412 if r.chunkData == nil { 413 chunkData, err := r.getter.Get(cctx, Reference(r.addr)) 414 if err != nil { 415 return 0, err 416 } 417 r.chunkData = chunkData 418 s := r.chunkData.Size() 419 log.Debug("lazychunkreader.size", "key", r.addr, "size", s) 420 if s < 0 { 421 return 0, errors.New("corrupt size") 422 } 423 return int64(s), nil 424 } 425 s := r.chunkData.Size() 426 log.Debug("lazychunkreader.size", "key", r.addr, "size", s) 427 428 return int64(s), nil 429 } 430 431 // read at can be called numerous times 432 // concurrent reads are allowed 433 // Size() needs to be called synchronously on the LazyChunkReader first 434 func (r *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) { 435 metrics.GetOrRegisterCounter("lazychunkreader.readat", nil).Inc(1) 436 437 var sp opentracing.Span 438 var cctx context.Context 439 cctx, sp = spancontext.StartSpan( 440 r.ctx, 441 "lcr.read") 442 defer sp.Finish() 443 444 defer func() { 445 sp.LogFields( 446 olog.Int("off", int(off)), 447 olog.Int("read", read)) 448 }() 449 450 // this is correct, a swarm doc cannot be zero length, so no EOF is expected 451 if len(b) == 0 { 452 return 0, nil 453 } 454 quitC := make(chan bool) 455 size, err := r.Size(cctx, quitC) 456 if err != nil { 457 log.Debug("lazychunkreader.readat.size", "size", size, "err", err) 458 return 0, err 459 } 460 461 errC := make(chan error) 462 463 // } 464 var treeSize int64 465 var depth int 466 // calculate depth and max treeSize 467 treeSize = r.chunkSize 468 for ; treeSize < size; treeSize *= r.branches { 469 depth++ 470 } 471 wg := sync.WaitGroup{} 472 length := int64(len(b)) 473 for d := 0; d < r.depth; d++ { 474 off *= r.chunkSize 475 length *= r.chunkSize 476 } 477 wg.Add(1) 478 go r.join(b, off, off+length, depth, treeSize/r.branches, r.chunkData, &wg, errC, quitC) 479 go func() { 480 wg.Wait() 481 close(errC) 482 }() 483 484 err = <-errC 485 if err != nil { 486 log.Debug("lazychunkreader.readat.errc", "err", err) 487 close(quitC) 488 return 0, err 489 } 490 if off+int64(len(b)) >= size { 491 log.Debug("lazychunkreader.readat.return at end", "size", size, "off", off) 492 return int(size - off), io.EOF 493 } 494 log.Debug("lazychunkreader.readat.errc", "buff", len(b)) 495 return len(b), nil 496 } 497 498 func (r *LazyChunkReader) join(b []byte, off int64, eoff int64, depth int, treeSize int64, chunkData ChunkData, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) { 499 defer parentWg.Done() 500 // find appropriate block level 501 for chunkData.Size() < uint64(treeSize) && depth > r.depth { 502 treeSize /= r.branches 503 depth-- 504 } 505 506 // leaf chunk found 507 if depth == r.depth { 508 extra := 8 + eoff - int64(len(chunkData)) 509 if extra > 0 { 510 eoff -= extra 511 } 512 copy(b, chunkData[8+off:8+eoff]) 513 return // simply give back the chunks reader for content chunks 514 } 515 516 // subtree 517 start := off / treeSize 518 end := (eoff + treeSize - 1) / treeSize 519 520 // last non-leaf chunk can be shorter than default chunk size, let's not read it further then its end 521 currentBranches := int64(len(chunkData)-8) / r.hashSize 522 if end > currentBranches { 523 end = currentBranches 524 } 525 526 wg := &sync.WaitGroup{} 527 defer wg.Wait() 528 for i := start; i < end; i++ { 529 soff := i * treeSize 530 roff := soff 531 seoff := soff + treeSize 532 533 if soff < off { 534 soff = off 535 } 536 if seoff > eoff { 537 seoff = eoff 538 } 539 if depth > 1 { 540 wg.Wait() 541 } 542 wg.Add(1) 543 go func(j int64) { 544 childAddress := chunkData[8+j*r.hashSize : 8+(j+1)*r.hashSize] 545 chunkData, err := r.getter.Get(r.ctx, Reference(childAddress)) 546 if err != nil { 547 log.Debug("lazychunkreader.join", "key", fmt.Sprintf("%x", childAddress), "err", err) 548 select { 549 case errC <- fmt.Errorf("chunk %v-%v not found; key: %s", off, off+treeSize, fmt.Sprintf("%x", childAddress)): 550 case <-quitC: 551 } 552 return 553 } 554 if l := len(chunkData); l < 9 { 555 select { 556 case errC <- fmt.Errorf("chunk %v-%v incomplete; key: %s, data length %v", off, off+treeSize, fmt.Sprintf("%x", childAddress), l): 557 case <-quitC: 558 } 559 return 560 } 561 if soff < off { 562 soff = off 563 } 564 r.join(b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/r.branches, chunkData, wg, errC, quitC) 565 }(i) 566 } //for 567 } 568 569 // Read keeps a cursor so cannot be called simulateously, see ReadAt 570 func (r *LazyChunkReader) Read(b []byte) (read int, err error) { 571 log.Debug("lazychunkreader.read", "key", r.addr) 572 metrics.GetOrRegisterCounter("lazychunkreader.read", nil).Inc(1) 573 574 read, err = r.ReadAt(b, r.off) 575 if err != nil && err != io.EOF { 576 log.Debug("lazychunkreader.readat", "read", read, "err", err) 577 metrics.GetOrRegisterCounter("lazychunkreader.read.err", nil).Inc(1) 578 } 579 580 metrics.GetOrRegisterCounter("lazychunkreader.read.bytes", nil).Inc(int64(read)) 581 582 r.off += int64(read) 583 return read, err 584 } 585 586 // completely analogous to standard SectionReader implementation 587 var errWhence = errors.New("Seek: invalid whence") 588 var errOffset = errors.New("Seek: invalid offset") 589 590 func (r *LazyChunkReader) Seek(offset int64, whence int) (int64, error) { 591 log.Debug("lazychunkreader.seek", "key", r.addr, "offset", offset) 592 switch whence { 593 default: 594 return 0, errWhence 595 case 0: 596 offset += 0 597 case 1: 598 offset += r.off 599 case 2: 600 if r.chunkData == nil { //seek from the end requires rootchunk for size. call Size first 601 _, err := r.Size(context.TODO(), nil) 602 if err != nil { 603 return 0, fmt.Errorf("can't get size: %v", err) 604 } 605 } 606 offset += int64(r.chunkData.Size()) 607 } 608 609 if offset < 0 { 610 return 0, errOffset 611 } 612 r.off = offset 613 return offset, nil 614 }