github.com/daragao/go-ethereum@v1.8.14-0.20180809141559-45eaef243198/swarm/storage/chunker.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 package storage 17 18 import ( 19 "context" 20 "encoding/binary" 21 "errors" 22 "fmt" 23 "io" 24 "sync" 25 "time" 26 27 "github.com/ethereum/go-ethereum/metrics" 28 "github.com/ethereum/go-ethereum/swarm/log" 29 "github.com/ethereum/go-ethereum/swarm/spancontext" 30 opentracing "github.com/opentracing/opentracing-go" 31 olog "github.com/opentracing/opentracing-go/log" 32 ) 33 34 /* 35 The distributed storage implemented in this package requires fix sized chunks of content. 36 37 Chunker is the interface to a component that is responsible for disassembling and assembling larger data. 38 39 TreeChunker implements a Chunker based on a tree structure defined as follows: 40 41 1 each node in the tree including the root and other branching nodes are stored as a chunk. 42 43 2 branching nodes encode data contents that includes the size of the dataslice covered by its entire subtree under the node as well as the hash keys of all its children : 44 data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1} 45 46 3 Leaf nodes encode an actual subslice of the input data. 47 48 4 if data size is not more than maximum chunksize, the data is stored in a single chunk 49 key = hash(int64(size) + data) 50 51 5 if data size is more than chunksize*branches^l, but no more than chunksize* 52 branches^(l+1), the data vector is split into slices of chunksize* 53 branches^l length (except the last one). 54 key = hash(int64(size) + key(slice0) + key(slice1) + ...) 55 56 The underlying hash function is configurable 57 */ 58 59 /* 60 Tree chunker is a concrete implementation of data chunking. 61 This chunker works in a simple way, it builds a tree out of the document so that each node either represents a chunk of real data or a chunk of data representing an branching non-leaf node of the tree. In particular each such non-leaf chunk will represent is a concatenation of the hash of its respective children. This scheme simultaneously guarantees data integrity as well as self addressing. Abstract nodes are transparent since their represented size component is strictly greater than their maximum data size, since they encode a subtree. 62 63 If all is well it is possible to implement this by simply composing readers so that no extra allocation or buffering is necessary for the data splitting and joining. This means that in principle there can be direct IO between : memory, file system, network socket (bzz peers storage request is read from the socket). In practice there may be need for several stages of internal buffering. 64 The hashing itself does use extra copies and allocation though, since it does need it. 65 */ 66 67 var ( 68 errAppendOppNotSuported = errors.New("Append operation not supported") 69 errOperationTimedOut = errors.New("operation timed out") 70 ) 71 72 const ( 73 DefaultChunkSize int64 = 4096 74 ) 75 76 type ChunkerParams struct { 77 chunkSize int64 78 hashSize int64 79 } 80 81 type SplitterParams struct { 82 ChunkerParams 83 reader io.Reader 84 putter Putter 85 addr Address 86 } 87 88 type TreeSplitterParams struct { 89 SplitterParams 90 size int64 91 } 92 93 type JoinerParams struct { 94 ChunkerParams 95 addr Address 96 getter Getter 97 // TODO: there is a bug, so depth can only be 0 today, see: https://github.com/ethersphere/go-ethereum/issues/344 98 depth int 99 ctx context.Context 100 } 101 102 type TreeChunker struct { 103 ctx context.Context 104 105 branches int64 106 hashFunc SwarmHasher 107 dataSize int64 108 data io.Reader 109 // calculated 110 addr Address 111 depth int 112 hashSize int64 // self.hashFunc.New().Size() 113 chunkSize int64 // hashSize* branches 114 workerCount int64 // the number of worker routines used 115 workerLock sync.RWMutex // lock for the worker count 116 jobC chan *hashJob 117 wg *sync.WaitGroup 118 putter Putter 119 getter Getter 120 errC chan error 121 quitC chan bool 122 } 123 124 /* 125 Join reconstructs original content based on a root key. 126 When joining, the caller gets returned a Lazy SectionReader, which is 127 seekable and implements on-demand fetching of chunks as and where it is read. 128 New chunks to retrieve are coming from the getter, which the caller provides. 129 If an error is encountered during joining, it appears as a reader error. 130 The SectionReader. 131 As a result, partial reads from a document are possible even if other parts 132 are corrupt or lost. 133 The chunks are not meant to be validated by the chunker when joining. This 134 is because it is left to the DPA to decide which sources are trusted. 135 */ 136 func TreeJoin(ctx context.Context, addr Address, getter Getter, depth int) *LazyChunkReader { 137 jp := &JoinerParams{ 138 ChunkerParams: ChunkerParams{ 139 chunkSize: DefaultChunkSize, 140 hashSize: int64(len(addr)), 141 }, 142 addr: addr, 143 getter: getter, 144 depth: depth, 145 ctx: ctx, 146 } 147 148 return NewTreeJoiner(jp).Join(ctx) 149 } 150 151 /* 152 When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Key), the root hash of the entire content will fill this once processing finishes. 153 New chunks to store are store using the putter which the caller provides. 154 */ 155 func TreeSplit(ctx context.Context, data io.Reader, size int64, putter Putter) (k Address, wait func(context.Context) error, err error) { 156 tsp := &TreeSplitterParams{ 157 SplitterParams: SplitterParams{ 158 ChunkerParams: ChunkerParams{ 159 chunkSize: DefaultChunkSize, 160 hashSize: putter.RefSize(), 161 }, 162 reader: data, 163 putter: putter, 164 }, 165 size: size, 166 } 167 return NewTreeSplitter(tsp).Split(ctx) 168 } 169 170 func NewTreeJoiner(params *JoinerParams) *TreeChunker { 171 tc := &TreeChunker{} 172 tc.hashSize = params.hashSize 173 tc.branches = params.chunkSize / params.hashSize 174 tc.addr = params.addr 175 tc.getter = params.getter 176 tc.depth = params.depth 177 tc.chunkSize = params.chunkSize 178 tc.workerCount = 0 179 tc.jobC = make(chan *hashJob, 2*ChunkProcessors) 180 tc.wg = &sync.WaitGroup{} 181 tc.errC = make(chan error) 182 tc.quitC = make(chan bool) 183 184 tc.ctx = params.ctx 185 186 return tc 187 } 188 189 func NewTreeSplitter(params *TreeSplitterParams) *TreeChunker { 190 tc := &TreeChunker{} 191 tc.data = params.reader 192 tc.dataSize = params.size 193 tc.hashSize = params.hashSize 194 tc.branches = params.chunkSize / params.hashSize 195 tc.addr = params.addr 196 tc.chunkSize = params.chunkSize 197 tc.putter = params.putter 198 tc.workerCount = 0 199 tc.jobC = make(chan *hashJob, 2*ChunkProcessors) 200 tc.wg = &sync.WaitGroup{} 201 tc.errC = make(chan error) 202 tc.quitC = make(chan bool) 203 204 return tc 205 } 206 207 // String() for pretty printing 208 func (c *Chunk) String() string { 209 return fmt.Sprintf("Key: %v TreeSize: %v Chunksize: %v", c.Addr.Log(), c.Size, len(c.SData)) 210 } 211 212 type hashJob struct { 213 key Address 214 chunk []byte 215 size int64 216 parentWg *sync.WaitGroup 217 } 218 219 func (tc *TreeChunker) incrementWorkerCount() { 220 tc.workerLock.Lock() 221 defer tc.workerLock.Unlock() 222 tc.workerCount += 1 223 } 224 225 func (tc *TreeChunker) getWorkerCount() int64 { 226 tc.workerLock.RLock() 227 defer tc.workerLock.RUnlock() 228 return tc.workerCount 229 } 230 231 func (tc *TreeChunker) decrementWorkerCount() { 232 tc.workerLock.Lock() 233 defer tc.workerLock.Unlock() 234 tc.workerCount -= 1 235 } 236 237 func (tc *TreeChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 238 if tc.chunkSize <= 0 { 239 panic("chunker must be initialised") 240 } 241 242 tc.runWorker() 243 244 depth := 0 245 treeSize := tc.chunkSize 246 247 // takes lowest depth such that chunksize*HashCount^(depth+1) > size 248 // power series, will find the order of magnitude of the data size in base hashCount or numbers of levels of branching in the resulting tree. 249 for ; treeSize < tc.dataSize; treeSize *= tc.branches { 250 depth++ 251 } 252 253 key := make([]byte, tc.hashSize) 254 // this waitgroup member is released after the root hash is calculated 255 tc.wg.Add(1) 256 //launch actual recursive function passing the waitgroups 257 go tc.split(depth, treeSize/tc.branches, key, tc.dataSize, tc.wg) 258 259 // closes internal error channel if all subprocesses in the workgroup finished 260 go func() { 261 // waiting for all threads to finish 262 tc.wg.Wait() 263 close(tc.errC) 264 }() 265 266 defer close(tc.quitC) 267 defer tc.putter.Close() 268 select { 269 case err := <-tc.errC: 270 if err != nil { 271 return nil, nil, err 272 } 273 case <-time.NewTimer(splitTimeout).C: 274 return nil, nil, errOperationTimedOut 275 } 276 277 return key, tc.putter.Wait, nil 278 } 279 280 func (tc *TreeChunker) split(depth int, treeSize int64, addr Address, size int64, parentWg *sync.WaitGroup) { 281 282 // 283 284 for depth > 0 && size < treeSize { 285 treeSize /= tc.branches 286 depth-- 287 } 288 289 if depth == 0 { 290 // leaf nodes -> content chunks 291 chunkData := make([]byte, size+8) 292 binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size)) 293 var readBytes int64 294 for readBytes < size { 295 n, err := tc.data.Read(chunkData[8+readBytes:]) 296 readBytes += int64(n) 297 if err != nil && !(err == io.EOF && readBytes == size) { 298 tc.errC <- err 299 return 300 } 301 } 302 select { 303 case tc.jobC <- &hashJob{addr, chunkData, size, parentWg}: 304 case <-tc.quitC: 305 } 306 return 307 } 308 // dept > 0 309 // intermediate chunk containing child nodes hashes 310 branchCnt := (size + treeSize - 1) / treeSize 311 312 var chunk = make([]byte, branchCnt*tc.hashSize+8) 313 var pos, i int64 314 315 binary.LittleEndian.PutUint64(chunk[0:8], uint64(size)) 316 317 childrenWg := &sync.WaitGroup{} 318 var secSize int64 319 for i < branchCnt { 320 // the last item can have shorter data 321 if size-pos < treeSize { 322 secSize = size - pos 323 } else { 324 secSize = treeSize 325 } 326 // the hash of that data 327 subTreeKey := chunk[8+i*tc.hashSize : 8+(i+1)*tc.hashSize] 328 329 childrenWg.Add(1) 330 tc.split(depth-1, treeSize/tc.branches, subTreeKey, secSize, childrenWg) 331 332 i++ 333 pos += treeSize 334 } 335 // wait for all the children to complete calculating their hashes and copying them onto sections of the chunk 336 // parentWg.Add(1) 337 // go func() { 338 childrenWg.Wait() 339 340 worker := tc.getWorkerCount() 341 if int64(len(tc.jobC)) > worker && worker < ChunkProcessors { 342 tc.runWorker() 343 344 } 345 select { 346 case tc.jobC <- &hashJob{addr, chunk, size, parentWg}: 347 case <-tc.quitC: 348 } 349 } 350 351 func (tc *TreeChunker) runWorker() { 352 tc.incrementWorkerCount() 353 go func() { 354 defer tc.decrementWorkerCount() 355 for { 356 select { 357 358 case job, ok := <-tc.jobC: 359 if !ok { 360 return 361 } 362 363 h, err := tc.putter.Put(tc.ctx, job.chunk) 364 if err != nil { 365 tc.errC <- err 366 return 367 } 368 copy(job.key, h) 369 job.parentWg.Done() 370 case <-tc.quitC: 371 return 372 } 373 } 374 }() 375 } 376 377 func (tc *TreeChunker) Append() (Address, func(), error) { 378 return nil, nil, errAppendOppNotSuported 379 } 380 381 // LazyChunkReader implements LazySectionReader 382 type LazyChunkReader struct { 383 Ctx context.Context 384 key Address // root key 385 chunkData ChunkData 386 off int64 // offset 387 chunkSize int64 // inherit from chunker 388 branches int64 // inherit from chunker 389 hashSize int64 // inherit from chunker 390 depth int 391 getter Getter 392 } 393 394 func (tc *TreeChunker) Join(ctx context.Context) *LazyChunkReader { 395 return &LazyChunkReader{ 396 key: tc.addr, 397 chunkSize: tc.chunkSize, 398 branches: tc.branches, 399 hashSize: tc.hashSize, 400 depth: tc.depth, 401 getter: tc.getter, 402 Ctx: tc.ctx, 403 } 404 } 405 406 func (r *LazyChunkReader) Context() context.Context { 407 return r.Ctx 408 } 409 410 // Size is meant to be called on the LazySectionReader 411 func (r *LazyChunkReader) Size(ctx context.Context, quitC chan bool) (n int64, err error) { 412 metrics.GetOrRegisterCounter("lazychunkreader.size", nil).Inc(1) 413 414 var sp opentracing.Span 415 var cctx context.Context 416 cctx, sp = spancontext.StartSpan( 417 ctx, 418 "lcr.size") 419 defer sp.Finish() 420 421 log.Debug("lazychunkreader.size", "key", r.key) 422 if r.chunkData == nil { 423 chunkData, err := r.getter.Get(cctx, Reference(r.key)) 424 if err != nil { 425 return 0, err 426 } 427 if chunkData == nil { 428 select { 429 case <-quitC: 430 return 0, errors.New("aborted") 431 default: 432 return 0, fmt.Errorf("root chunk not found for %v", r.key.Hex()) 433 } 434 } 435 r.chunkData = chunkData 436 } 437 return r.chunkData.Size(), nil 438 } 439 440 // read at can be called numerous times 441 // concurrent reads are allowed 442 // Size() needs to be called synchronously on the LazyChunkReader first 443 func (r *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) { 444 metrics.GetOrRegisterCounter("lazychunkreader.readat", nil).Inc(1) 445 446 var sp opentracing.Span 447 var cctx context.Context 448 cctx, sp = spancontext.StartSpan( 449 r.Ctx, 450 "lcr.read") 451 defer sp.Finish() 452 453 defer func() { 454 sp.LogFields( 455 olog.Int("off", int(off)), 456 olog.Int("read", read)) 457 }() 458 459 // this is correct, a swarm doc cannot be zero length, so no EOF is expected 460 if len(b) == 0 { 461 return 0, nil 462 } 463 quitC := make(chan bool) 464 size, err := r.Size(cctx, quitC) 465 if err != nil { 466 log.Error("lazychunkreader.readat.size", "size", size, "err", err) 467 return 0, err 468 } 469 470 errC := make(chan error) 471 472 // } 473 var treeSize int64 474 var depth int 475 // calculate depth and max treeSize 476 treeSize = r.chunkSize 477 for ; treeSize < size; treeSize *= r.branches { 478 depth++ 479 } 480 wg := sync.WaitGroup{} 481 length := int64(len(b)) 482 for d := 0; d < r.depth; d++ { 483 off *= r.chunkSize 484 length *= r.chunkSize 485 } 486 wg.Add(1) 487 go r.join(cctx, b, off, off+length, depth, treeSize/r.branches, r.chunkData, &wg, errC, quitC) 488 go func() { 489 wg.Wait() 490 close(errC) 491 }() 492 493 err = <-errC 494 if err != nil { 495 log.Error("lazychunkreader.readat.errc", "err", err) 496 close(quitC) 497 return 0, err 498 } 499 if off+int64(len(b)) >= size { 500 return int(size - off), io.EOF 501 } 502 return len(b), nil 503 } 504 505 func (r *LazyChunkReader) join(ctx context.Context, b []byte, off int64, eoff int64, depth int, treeSize int64, chunkData ChunkData, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) { 506 defer parentWg.Done() 507 // find appropriate block level 508 for chunkData.Size() < treeSize && depth > r.depth { 509 treeSize /= r.branches 510 depth-- 511 } 512 513 // leaf chunk found 514 if depth == r.depth { 515 extra := 8 + eoff - int64(len(chunkData)) 516 if extra > 0 { 517 eoff -= extra 518 } 519 copy(b, chunkData[8+off:8+eoff]) 520 return // simply give back the chunks reader for content chunks 521 } 522 523 // subtree 524 start := off / treeSize 525 end := (eoff + treeSize - 1) / treeSize 526 527 // last non-leaf chunk can be shorter than default chunk size, let's not read it further then its end 528 currentBranches := int64(len(chunkData)-8) / r.hashSize 529 if end > currentBranches { 530 end = currentBranches 531 } 532 533 wg := &sync.WaitGroup{} 534 defer wg.Wait() 535 for i := start; i < end; i++ { 536 soff := i * treeSize 537 roff := soff 538 seoff := soff + treeSize 539 540 if soff < off { 541 soff = off 542 } 543 if seoff > eoff { 544 seoff = eoff 545 } 546 if depth > 1 { 547 wg.Wait() 548 } 549 wg.Add(1) 550 go func(j int64) { 551 childKey := chunkData[8+j*r.hashSize : 8+(j+1)*r.hashSize] 552 chunkData, err := r.getter.Get(ctx, Reference(childKey)) 553 if err != nil { 554 log.Error("lazychunkreader.join", "key", fmt.Sprintf("%x", childKey), "err", err) 555 select { 556 case errC <- fmt.Errorf("chunk %v-%v not found; key: %s", off, off+treeSize, fmt.Sprintf("%x", childKey)): 557 case <-quitC: 558 } 559 return 560 } 561 if l := len(chunkData); l < 9 { 562 select { 563 case errC <- fmt.Errorf("chunk %v-%v incomplete; key: %s, data length %v", off, off+treeSize, fmt.Sprintf("%x", childKey), l): 564 case <-quitC: 565 } 566 return 567 } 568 if soff < off { 569 soff = off 570 } 571 r.join(ctx, b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/r.branches, chunkData, wg, errC, quitC) 572 }(i) 573 } //for 574 } 575 576 // Read keeps a cursor so cannot be called simulateously, see ReadAt 577 func (r *LazyChunkReader) Read(b []byte) (read int, err error) { 578 log.Debug("lazychunkreader.read", "key", r.key) 579 metrics.GetOrRegisterCounter("lazychunkreader.read", nil).Inc(1) 580 581 read, err = r.ReadAt(b, r.off) 582 if err != nil && err != io.EOF { 583 log.Error("lazychunkreader.readat", "read", read, "err", err) 584 metrics.GetOrRegisterCounter("lazychunkreader.read.err", nil).Inc(1) 585 } 586 587 metrics.GetOrRegisterCounter("lazychunkreader.read.bytes", nil).Inc(int64(read)) 588 589 r.off += int64(read) 590 return 591 } 592 593 // completely analogous to standard SectionReader implementation 594 var errWhence = errors.New("Seek: invalid whence") 595 var errOffset = errors.New("Seek: invalid offset") 596 597 func (r *LazyChunkReader) Seek(offset int64, whence int) (int64, error) { 598 log.Debug("lazychunkreader.seek", "key", r.key, "offset", offset) 599 switch whence { 600 default: 601 return 0, errWhence 602 case 0: 603 offset += 0 604 case 1: 605 offset += r.off 606 case 2: 607 if r.chunkData == nil { //seek from the end requires rootchunk for size. call Size first 608 _, err := r.Size(context.TODO(), nil) 609 if err != nil { 610 return 0, fmt.Errorf("can't get size: %v", err) 611 } 612 } 613 offset += r.chunkData.Size() 614 } 615 616 if offset < 0 { 617 return 0, errOffset 618 } 619 r.off = offset 620 return offset, nil 621 }