github.com/insight-chain/inb-go@v1.1.3-0.20191221022159-da049980ae38/swarm/storage/chunker.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 package storage 17 18 import ( 19 "context" 20 "encoding/binary" 21 "errors" 22 "fmt" 23 "io" 24 "sync" 25 "time" 26 27 "github.com/insight-chain/inb-go/metrics" 28 ch "github.com/insight-chain/inb-go/swarm/chunk" 29 "github.com/insight-chain/inb-go/swarm/log" 30 "github.com/insight-chain/inb-go/swarm/spancontext" 31 opentracing "github.com/opentracing/opentracing-go" 32 olog "github.com/opentracing/opentracing-go/log" 33 ) 34 35 /* 36 The distributed storage implemented in this package requires fix sized chunks of content. 37 38 Chunker is the interface to a component that is responsible for disassembling and assembling larger data. 39 40 TreeChunker implements a Chunker based on a tree structure defined as follows: 41 42 1 each node in the tree including the root and other branching nodes are stored as a chunk. 43 44 2 branching nodes encode data contents that includes the size of the dataslice covered by its entire subtree under the node as well as the hash keys of all its children : 45 data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1} 46 47 3 Leaf nodes encode an actual subslice of the input data. 48 49 4 if data size is not more than maximum chunksize, the data is stored in a single chunk 50 key = hash(int64(size) + data) 51 52 5 if data size is more than chunksize*branches^l, but no more than chunksize* 53 branches^(l+1), the data vector is split into slices of chunksize* 54 branches^l length (except the last one). 55 key = hash(int64(size) + key(slice0) + key(slice1) + ...) 56 57 The underlying hash function is configurable 58 */ 59 60 /* 61 Tree chunker is a concrete implementation of data chunking. 62 This chunker works in a simple way, it builds a tree out of the document so that each node either represents a chunk of real data or a chunk of data representing an branching non-leaf node of the tree. In particular each such non-leaf chunk will represent is a concatenation of the hash of its respective children. This scheme simultaneously guarantees data integrity as well as self addressing. Abstract nodes are transparent since their represented size component is strictly greater than their maximum data size, since they encode a subtree. 63 64 If all is well it is possible to implement this by simply composing readers so that no extra allocation or buffering is necessary for the data splitting and joining. This means that in principle there can be direct IO between : memory, file system, network socket (bzz peers storage request is read from the socket). In practice there may be need for several stages of internal buffering. 65 The hashing itself does use extra copies and allocation though, since it does need it. 66 */ 67 68 var ( 69 errAppendOppNotSuported = errors.New("Append operation not supported") 70 ) 71 72 type ChunkerParams struct { 73 chunkSize int64 74 hashSize int64 75 } 76 77 type SplitterParams struct { 78 ChunkerParams 79 reader io.Reader 80 putter Putter 81 addr Address 82 } 83 84 type TreeSplitterParams struct { 85 SplitterParams 86 size int64 87 } 88 89 type JoinerParams struct { 90 ChunkerParams 91 addr Address 92 getter Getter 93 // TODO: there is a bug, so depth can only be 0 today, see: https://github.com/ethersphere/go-ethereum/issues/344 94 depth int 95 ctx context.Context 96 } 97 98 type TreeChunker struct { 99 ctx context.Context 100 101 branches int64 102 hashFunc SwarmHasher 103 dataSize int64 104 data io.Reader 105 // calculated 106 addr Address 107 depth int 108 hashSize int64 // self.hashFunc.New().Size() 109 chunkSize int64 // hashSize* branches 110 workerCount int64 // the number of worker routines used 111 workerLock sync.RWMutex // lock for the worker count 112 jobC chan *hashJob 113 wg *sync.WaitGroup 114 putter Putter 115 getter Getter 116 errC chan error 117 quitC chan bool 118 } 119 120 /* 121 Join reconstructs original content based on a root key. 122 When joining, the caller gets returned a Lazy SectionReader, which is 123 seekable and implements on-demand fetching of chunks as and where it is read. 124 New chunks to retrieve are coming from the getter, which the caller provides. 125 If an error is encountered during joining, it appears as a reader error. 126 The SectionReader. 127 As a result, partial reads from a document are possible even if other parts 128 are corrupt or lost. 129 The chunks are not meant to be validated by the chunker when joining. This 130 is because it is left to the DPA to decide which sources are trusted. 131 */ 132 func TreeJoin(ctx context.Context, addr Address, getter Getter, depth int) *LazyChunkReader { 133 jp := &JoinerParams{ 134 ChunkerParams: ChunkerParams{ 135 chunkSize: ch.DefaultSize, 136 hashSize: int64(len(addr)), 137 }, 138 addr: addr, 139 getter: getter, 140 depth: depth, 141 ctx: ctx, 142 } 143 144 return NewTreeJoiner(jp).Join(ctx) 145 } 146 147 /* 148 When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Key), the root hash of the entire content will fill this once processing finishes. 149 New chunks to store are store using the putter which the caller provides. 150 */ 151 func TreeSplit(ctx context.Context, data io.Reader, size int64, putter Putter) (k Address, wait func(context.Context) error, err error) { 152 tsp := &TreeSplitterParams{ 153 SplitterParams: SplitterParams{ 154 ChunkerParams: ChunkerParams{ 155 chunkSize: ch.DefaultSize, 156 hashSize: putter.RefSize(), 157 }, 158 reader: data, 159 putter: putter, 160 }, 161 size: size, 162 } 163 return NewTreeSplitter(tsp).Split(ctx) 164 } 165 166 func NewTreeJoiner(params *JoinerParams) *TreeChunker { 167 tc := &TreeChunker{} 168 tc.hashSize = params.hashSize 169 tc.branches = params.chunkSize / params.hashSize 170 tc.addr = params.addr 171 tc.getter = params.getter 172 tc.depth = params.depth 173 tc.chunkSize = params.chunkSize 174 tc.workerCount = 0 175 tc.jobC = make(chan *hashJob, 2*ChunkProcessors) 176 tc.wg = &sync.WaitGroup{} 177 tc.errC = make(chan error) 178 tc.quitC = make(chan bool) 179 180 tc.ctx = params.ctx 181 182 return tc 183 } 184 185 func NewTreeSplitter(params *TreeSplitterParams) *TreeChunker { 186 tc := &TreeChunker{} 187 tc.data = params.reader 188 tc.dataSize = params.size 189 tc.hashSize = params.hashSize 190 tc.branches = params.chunkSize / params.hashSize 191 tc.addr = params.addr 192 tc.chunkSize = params.chunkSize 193 tc.putter = params.putter 194 tc.workerCount = 0 195 tc.jobC = make(chan *hashJob, 2*ChunkProcessors) 196 tc.wg = &sync.WaitGroup{} 197 tc.errC = make(chan error) 198 tc.quitC = make(chan bool) 199 200 return tc 201 } 202 203 type hashJob struct { 204 key Address 205 chunk []byte 206 size int64 207 parentWg *sync.WaitGroup 208 } 209 210 func (tc *TreeChunker) incrementWorkerCount() { 211 tc.workerLock.Lock() 212 defer tc.workerLock.Unlock() 213 tc.workerCount += 1 214 } 215 216 func (tc *TreeChunker) getWorkerCount() int64 { 217 tc.workerLock.RLock() 218 defer tc.workerLock.RUnlock() 219 return tc.workerCount 220 } 221 222 func (tc *TreeChunker) decrementWorkerCount() { 223 tc.workerLock.Lock() 224 defer tc.workerLock.Unlock() 225 tc.workerCount -= 1 226 } 227 228 func (tc *TreeChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 229 if tc.chunkSize <= 0 { 230 panic("chunker must be initialised") 231 } 232 233 tc.runWorker(ctx) 234 235 depth := 0 236 treeSize := tc.chunkSize 237 238 // takes lowest depth such that chunksize*HashCount^(depth+1) > size 239 // power series, will find the order of magnitude of the data size in base hashCount or numbers of levels of branching in the resulting tree. 240 for ; treeSize < tc.dataSize; treeSize *= tc.branches { 241 depth++ 242 } 243 244 key := make([]byte, tc.hashSize) 245 // this waitgroup member is released after the root hash is calculated 246 tc.wg.Add(1) 247 //launch actual recursive function passing the waitgroups 248 go tc.split(ctx, depth, treeSize/tc.branches, key, tc.dataSize, tc.wg) 249 250 // closes internal error channel if all subprocesses in the workgroup finished 251 go func() { 252 // waiting for all threads to finish 253 tc.wg.Wait() 254 close(tc.errC) 255 }() 256 257 defer close(tc.quitC) 258 defer tc.putter.Close() 259 select { 260 case err := <-tc.errC: 261 if err != nil { 262 return nil, nil, err 263 } 264 case <-ctx.Done(): 265 return nil, nil, ctx.Err() 266 } 267 268 return key, tc.putter.Wait, nil 269 } 270 271 func (tc *TreeChunker) split(ctx context.Context, depth int, treeSize int64, addr Address, size int64, parentWg *sync.WaitGroup) { 272 273 // 274 275 for depth > 0 && size < treeSize { 276 treeSize /= tc.branches 277 depth-- 278 } 279 280 if depth == 0 { 281 // leaf nodes -> content chunks 282 chunkData := make([]byte, size+8) 283 binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size)) 284 var readBytes int64 285 for readBytes < size { 286 n, err := tc.data.Read(chunkData[8+readBytes:]) 287 readBytes += int64(n) 288 if err != nil && !(err == io.EOF && readBytes == size) { 289 tc.errC <- err 290 return 291 } 292 } 293 select { 294 case tc.jobC <- &hashJob{addr, chunkData, size, parentWg}: 295 case <-tc.quitC: 296 } 297 return 298 } 299 // dept > 0 300 // intermediate chunk containing child nodes hashes 301 branchCnt := (size + treeSize - 1) / treeSize 302 303 var chunk = make([]byte, branchCnt*tc.hashSize+8) 304 var pos, i int64 305 306 binary.LittleEndian.PutUint64(chunk[0:8], uint64(size)) 307 308 childrenWg := &sync.WaitGroup{} 309 var secSize int64 310 for i < branchCnt { 311 // the last item can have shorter data 312 if size-pos < treeSize { 313 secSize = size - pos 314 } else { 315 secSize = treeSize 316 } 317 // the hash of that data 318 subTreeAddress := chunk[8+i*tc.hashSize : 8+(i+1)*tc.hashSize] 319 320 childrenWg.Add(1) 321 tc.split(ctx, depth-1, treeSize/tc.branches, subTreeAddress, secSize, childrenWg) 322 323 i++ 324 pos += treeSize 325 } 326 // wait for all the children to complete calculating their hashes and copying them onto sections of the chunk 327 // parentWg.Add(1) 328 // go func() { 329 childrenWg.Wait() 330 331 worker := tc.getWorkerCount() 332 if int64(len(tc.jobC)) > worker && worker < ChunkProcessors { 333 tc.runWorker(ctx) 334 335 } 336 select { 337 case tc.jobC <- &hashJob{addr, chunk, size, parentWg}: 338 case <-tc.quitC: 339 } 340 } 341 342 func (tc *TreeChunker) runWorker(ctx context.Context) { 343 tc.incrementWorkerCount() 344 go func() { 345 defer tc.decrementWorkerCount() 346 for { 347 select { 348 349 case job, ok := <-tc.jobC: 350 if !ok { 351 return 352 } 353 354 h, err := tc.putter.Put(ctx, job.chunk) 355 if err != nil { 356 tc.errC <- err 357 return 358 } 359 copy(job.key, h) 360 job.parentWg.Done() 361 case <-tc.quitC: 362 return 363 } 364 } 365 }() 366 } 367 368 func (tc *TreeChunker) Append() (Address, func(), error) { 369 return nil, nil, errAppendOppNotSuported 370 } 371 372 // LazyChunkReader implements LazySectionReader 373 type LazyChunkReader struct { 374 ctx context.Context 375 addr Address // root address 376 chunkData ChunkData 377 off int64 // offset 378 chunkSize int64 // inherit from chunker 379 branches int64 // inherit from chunker 380 hashSize int64 // inherit from chunker 381 depth int 382 getter Getter 383 } 384 385 func (tc *TreeChunker) Join(ctx context.Context) *LazyChunkReader { 386 return &LazyChunkReader{ 387 addr: tc.addr, 388 chunkSize: tc.chunkSize, 389 branches: tc.branches, 390 hashSize: tc.hashSize, 391 depth: tc.depth, 392 getter: tc.getter, 393 ctx: tc.ctx, 394 } 395 } 396 397 func (r *LazyChunkReader) Context() context.Context { 398 return r.ctx 399 } 400 401 // Size is meant to be called on the LazySectionReader 402 func (r *LazyChunkReader) Size(ctx context.Context, quitC chan bool) (n int64, err error) { 403 metrics.GetOrRegisterCounter("lazychunkreader.size", nil).Inc(1) 404 405 var sp opentracing.Span 406 var cctx context.Context 407 cctx, sp = spancontext.StartSpan( 408 ctx, 409 "lcr.size") 410 defer sp.Finish() 411 412 log.Debug("lazychunkreader.size", "addr", r.addr) 413 if r.chunkData == nil { 414 415 startTime := time.Now() 416 chunkData, err := r.getter.Get(cctx, Reference(r.addr)) 417 if err != nil { 418 metrics.GetOrRegisterResettingTimer("lcr.getter.get.err", nil).UpdateSince(startTime) 419 return 0, err 420 } 421 metrics.GetOrRegisterResettingTimer("lcr.getter.get", nil).UpdateSince(startTime) 422 r.chunkData = chunkData 423 s := r.chunkData.Size() 424 log.Debug("lazychunkreader.size", "key", r.addr, "size", s) 425 if s < 0 { 426 return 0, errors.New("corrupt size") 427 } 428 return int64(s), nil 429 } 430 s := r.chunkData.Size() 431 log.Debug("lazychunkreader.size", "key", r.addr, "size", s) 432 433 return int64(s), nil 434 } 435 436 // read at can be called numerous times 437 // concurrent reads are allowed 438 // Size() needs to be called synchronously on the LazyChunkReader first 439 func (r *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) { 440 metrics.GetOrRegisterCounter("lazychunkreader.readat", nil).Inc(1) 441 442 var sp opentracing.Span 443 var cctx context.Context 444 cctx, sp = spancontext.StartSpan( 445 r.ctx, 446 "lcr.read") 447 defer sp.Finish() 448 449 defer func() { 450 sp.LogFields( 451 olog.Int("off", int(off)), 452 olog.Int("read", read)) 453 }() 454 455 // this is correct, a swarm doc cannot be zero length, so no EOF is expected 456 if len(b) == 0 { 457 return 0, nil 458 } 459 quitC := make(chan bool) 460 size, err := r.Size(cctx, quitC) 461 if err != nil { 462 log.Debug("lazychunkreader.readat.size", "size", size, "err", err) 463 return 0, err 464 } 465 466 errC := make(chan error) 467 468 // } 469 var treeSize int64 470 var depth int 471 // calculate depth and max treeSize 472 treeSize = r.chunkSize 473 for ; treeSize < size; treeSize *= r.branches { 474 depth++ 475 } 476 wg := sync.WaitGroup{} 477 length := int64(len(b)) 478 for d := 0; d < r.depth; d++ { 479 off *= r.chunkSize 480 length *= r.chunkSize 481 } 482 wg.Add(1) 483 go r.join(b, off, off+length, depth, treeSize/r.branches, r.chunkData, &wg, errC, quitC) 484 go func() { 485 wg.Wait() 486 close(errC) 487 }() 488 489 err = <-errC 490 if err != nil { 491 log.Debug("lazychunkreader.readat.errc", "err", err) 492 close(quitC) 493 return 0, err 494 } 495 if off+int64(len(b)) >= size { 496 log.Debug("lazychunkreader.readat.return at end", "size", size, "off", off) 497 return int(size - off), io.EOF 498 } 499 log.Debug("lazychunkreader.readat.errc", "buff", len(b)) 500 return len(b), nil 501 } 502 503 func (r *LazyChunkReader) join(b []byte, off int64, eoff int64, depth int, treeSize int64, chunkData ChunkData, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) { 504 defer parentWg.Done() 505 // find appropriate block level 506 for chunkData.Size() < uint64(treeSize) && depth > r.depth { 507 treeSize /= r.branches 508 depth-- 509 } 510 511 // leaf chunk found 512 if depth == r.depth { 513 extra := 8 + eoff - int64(len(chunkData)) 514 if extra > 0 { 515 eoff -= extra 516 } 517 copy(b, chunkData[8+off:8+eoff]) 518 return // simply give back the chunks reader for content chunks 519 } 520 521 // subtree 522 start := off / treeSize 523 end := (eoff + treeSize - 1) / treeSize 524 525 // last non-leaf chunk can be shorter than default chunk size, let's not read it further then its end 526 currentBranches := int64(len(chunkData)-8) / r.hashSize 527 if end > currentBranches { 528 end = currentBranches 529 } 530 531 wg := &sync.WaitGroup{} 532 defer wg.Wait() 533 for i := start; i < end; i++ { 534 soff := i * treeSize 535 roff := soff 536 seoff := soff + treeSize 537 538 if soff < off { 539 soff = off 540 } 541 if seoff > eoff { 542 seoff = eoff 543 } 544 if depth > 1 { 545 wg.Wait() 546 } 547 wg.Add(1) 548 go func(j int64) { 549 childAddress := chunkData[8+j*r.hashSize : 8+(j+1)*r.hashSize] 550 startTime := time.Now() 551 chunkData, err := r.getter.Get(r.ctx, Reference(childAddress)) 552 if err != nil { 553 metrics.GetOrRegisterResettingTimer("lcr.getter.get.err", nil).UpdateSince(startTime) 554 log.Debug("lazychunkreader.join", "key", fmt.Sprintf("%x", childAddress), "err", err) 555 select { 556 case errC <- fmt.Errorf("chunk %v-%v not found; key: %s", off, off+treeSize, fmt.Sprintf("%x", childAddress)): 557 case <-quitC: 558 } 559 return 560 } 561 metrics.GetOrRegisterResettingTimer("lcr.getter.get", nil).UpdateSince(startTime) 562 if l := len(chunkData); l < 9 { 563 select { 564 case errC <- fmt.Errorf("chunk %v-%v incomplete; key: %s, data length %v", off, off+treeSize, fmt.Sprintf("%x", childAddress), l): 565 case <-quitC: 566 } 567 return 568 } 569 if soff < off { 570 soff = off 571 } 572 r.join(b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/r.branches, chunkData, wg, errC, quitC) 573 }(i) 574 } //for 575 } 576 577 // Read keeps a cursor so cannot be called simulateously, see ReadAt 578 func (r *LazyChunkReader) Read(b []byte) (read int, err error) { 579 log.Debug("lazychunkreader.read", "key", r.addr) 580 metrics.GetOrRegisterCounter("lazychunkreader.read", nil).Inc(1) 581 582 read, err = r.ReadAt(b, r.off) 583 if err != nil && err != io.EOF { 584 log.Debug("lazychunkreader.readat", "read", read, "err", err) 585 metrics.GetOrRegisterCounter("lazychunkreader.read.err", nil).Inc(1) 586 } 587 588 metrics.GetOrRegisterCounter("lazychunkreader.read.bytes", nil).Inc(int64(read)) 589 590 r.off += int64(read) 591 return read, err 592 } 593 594 // completely analogous to standard SectionReader implementation 595 var errWhence = errors.New("Seek: invalid whence") 596 var errOffset = errors.New("Seek: invalid offset") 597 598 func (r *LazyChunkReader) Seek(offset int64, whence int) (int64, error) { 599 log.Debug("lazychunkreader.seek", "key", r.addr, "offset", offset) 600 switch whence { 601 default: 602 return 0, errWhence 603 case 0: 604 offset += 0 605 case 1: 606 offset += r.off 607 case 2: 608 if r.chunkData == nil { //seek from the end requires rootchunk for size. call Size first 609 _, err := r.Size(context.TODO(), nil) 610 if err != nil { 611 return 0, fmt.Errorf("can't get size: %v", err) 612 } 613 } 614 offset += int64(r.chunkData.Size()) 615 } 616 617 if offset < 0 { 618 return 0, errOffset 619 } 620 r.off = offset 621 return offset, nil 622 }