github.com/gobitfly/go-ethereum@v1.8.12/swarm/storage/chunker.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 package storage 17 18 import ( 19 "encoding/binary" 20 "errors" 21 "fmt" 22 "io" 23 "sync" 24 "time" 25 26 "github.com/ethereum/go-ethereum/metrics" 27 "github.com/ethereum/go-ethereum/swarm/log" 28 ) 29 30 /* 31 The distributed storage implemented in this package requires fix sized chunks of content. 32 33 Chunker is the interface to a component that is responsible for disassembling and assembling larger data. 34 35 TreeChunker implements a Chunker based on a tree structure defined as follows: 36 37 1 each node in the tree including the root and other branching nodes are stored as a chunk. 38 39 2 branching nodes encode data contents that includes the size of the dataslice covered by its entire subtree under the node as well as the hash keys of all its children : 40 data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1} 41 42 3 Leaf nodes encode an actual subslice of the input data. 43 44 4 if data size is not more than maximum chunksize, the data is stored in a single chunk 45 key = hash(int64(size) + data) 46 47 5 if data size is more than chunksize*branches^l, but no more than chunksize* 48 branches^(l+1), the data vector is split into slices of chunksize* 49 branches^l length (except the last one). 50 key = hash(int64(size) + key(slice0) + key(slice1) + ...) 51 52 The underlying hash function is configurable 53 */ 54 55 /* 56 Tree chunker is a concrete implementation of data chunking. 57 This chunker works in a simple way, it builds a tree out of the document so that each node either represents a chunk of real data or a chunk of data representing an branching non-leaf node of the tree. In particular each such non-leaf chunk will represent is a concatenation of the hash of its respective children. This scheme simultaneously guarantees data integrity as well as self addressing. Abstract nodes are transparent since their represented size component is strictly greater than their maximum data size, since they encode a subtree. 58 59 If all is well it is possible to implement this by simply composing readers so that no extra allocation or buffering is necessary for the data splitting and joining. This means that in principle there can be direct IO between : memory, file system, network socket (bzz peers storage request is read from the socket). In practice there may be need for several stages of internal buffering. 60 The hashing itself does use extra copies and allocation though, since it does need it. 61 */ 62 63 var ( 64 errAppendOppNotSuported = errors.New("Append operation not supported") 65 errOperationTimedOut = errors.New("operation timed out") 66 ) 67 68 const ( 69 DefaultChunkSize int64 = 4096 70 ) 71 72 type ChunkerParams struct { 73 chunkSize int64 74 hashSize int64 75 } 76 77 type SplitterParams struct { 78 ChunkerParams 79 reader io.Reader 80 putter Putter 81 addr Address 82 } 83 84 type TreeSplitterParams struct { 85 SplitterParams 86 size int64 87 } 88 89 type JoinerParams struct { 90 ChunkerParams 91 addr Address 92 getter Getter 93 // TODO: there is a bug, so depth can only be 0 today, see: https://github.com/ethersphere/go-ethereum/issues/344 94 depth int 95 } 96 97 type TreeChunker struct { 98 branches int64 99 hashFunc SwarmHasher 100 dataSize int64 101 data io.Reader 102 // calculated 103 addr Address 104 depth int 105 hashSize int64 // self.hashFunc.New().Size() 106 chunkSize int64 // hashSize* branches 107 workerCount int64 // the number of worker routines used 108 workerLock sync.RWMutex // lock for the worker count 109 jobC chan *hashJob 110 wg *sync.WaitGroup 111 putter Putter 112 getter Getter 113 errC chan error 114 quitC chan bool 115 } 116 117 /* 118 Join reconstructs original content based on a root key. 119 When joining, the caller gets returned a Lazy SectionReader, which is 120 seekable and implements on-demand fetching of chunks as and where it is read. 121 New chunks to retrieve are coming from the getter, which the caller provides. 122 If an error is encountered during joining, it appears as a reader error. 123 The SectionReader. 124 As a result, partial reads from a document are possible even if other parts 125 are corrupt or lost. 126 The chunks are not meant to be validated by the chunker when joining. This 127 is because it is left to the DPA to decide which sources are trusted. 128 */ 129 func TreeJoin(addr Address, getter Getter, depth int) *LazyChunkReader { 130 jp := &JoinerParams{ 131 ChunkerParams: ChunkerParams{ 132 chunkSize: DefaultChunkSize, 133 hashSize: int64(len(addr)), 134 }, 135 addr: addr, 136 getter: getter, 137 depth: depth, 138 } 139 140 return NewTreeJoiner(jp).Join() 141 } 142 143 /* 144 When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Key), the root hash of the entire content will fill this once processing finishes. 145 New chunks to store are store using the putter which the caller provides. 146 */ 147 func TreeSplit(data io.Reader, size int64, putter Putter) (k Address, wait func(), err error) { 148 tsp := &TreeSplitterParams{ 149 SplitterParams: SplitterParams{ 150 ChunkerParams: ChunkerParams{ 151 chunkSize: DefaultChunkSize, 152 hashSize: putter.RefSize(), 153 }, 154 reader: data, 155 putter: putter, 156 }, 157 size: size, 158 } 159 return NewTreeSplitter(tsp).Split() 160 } 161 162 func NewTreeJoiner(params *JoinerParams) *TreeChunker { 163 tc := &TreeChunker{} 164 tc.hashSize = params.hashSize 165 tc.branches = params.chunkSize / params.hashSize 166 tc.addr = params.addr 167 tc.getter = params.getter 168 tc.depth = params.depth 169 tc.chunkSize = params.chunkSize 170 tc.workerCount = 0 171 tc.jobC = make(chan *hashJob, 2*ChunkProcessors) 172 tc.wg = &sync.WaitGroup{} 173 tc.errC = make(chan error) 174 tc.quitC = make(chan bool) 175 176 return tc 177 } 178 179 func NewTreeSplitter(params *TreeSplitterParams) *TreeChunker { 180 tc := &TreeChunker{} 181 tc.data = params.reader 182 tc.dataSize = params.size 183 tc.hashSize = params.hashSize 184 tc.branches = params.chunkSize / params.hashSize 185 tc.addr = params.addr 186 tc.chunkSize = params.chunkSize 187 tc.putter = params.putter 188 tc.workerCount = 0 189 tc.jobC = make(chan *hashJob, 2*ChunkProcessors) 190 tc.wg = &sync.WaitGroup{} 191 tc.errC = make(chan error) 192 tc.quitC = make(chan bool) 193 194 return tc 195 } 196 197 // String() for pretty printing 198 func (c *Chunk) String() string { 199 return fmt.Sprintf("Key: %v TreeSize: %v Chunksize: %v", c.Addr.Log(), c.Size, len(c.SData)) 200 } 201 202 type hashJob struct { 203 key Address 204 chunk []byte 205 size int64 206 parentWg *sync.WaitGroup 207 } 208 209 func (tc *TreeChunker) incrementWorkerCount() { 210 tc.workerLock.Lock() 211 defer tc.workerLock.Unlock() 212 tc.workerCount += 1 213 } 214 215 func (tc *TreeChunker) getWorkerCount() int64 { 216 tc.workerLock.RLock() 217 defer tc.workerLock.RUnlock() 218 return tc.workerCount 219 } 220 221 func (tc *TreeChunker) decrementWorkerCount() { 222 tc.workerLock.Lock() 223 defer tc.workerLock.Unlock() 224 tc.workerCount -= 1 225 } 226 227 func (tc *TreeChunker) Split() (k Address, wait func(), err error) { 228 if tc.chunkSize <= 0 { 229 panic("chunker must be initialised") 230 } 231 232 tc.runWorker() 233 234 depth := 0 235 treeSize := tc.chunkSize 236 237 // takes lowest depth such that chunksize*HashCount^(depth+1) > size 238 // power series, will find the order of magnitude of the data size in base hashCount or numbers of levels of branching in the resulting tree. 239 for ; treeSize < tc.dataSize; treeSize *= tc.branches { 240 depth++ 241 } 242 243 key := make([]byte, tc.hashSize) 244 // this waitgroup member is released after the root hash is calculated 245 tc.wg.Add(1) 246 //launch actual recursive function passing the waitgroups 247 go tc.split(depth, treeSize/tc.branches, key, tc.dataSize, tc.wg) 248 249 // closes internal error channel if all subprocesses in the workgroup finished 250 go func() { 251 // waiting for all threads to finish 252 tc.wg.Wait() 253 close(tc.errC) 254 }() 255 256 defer close(tc.quitC) 257 defer tc.putter.Close() 258 select { 259 case err := <-tc.errC: 260 if err != nil { 261 return nil, nil, err 262 } 263 case <-time.NewTimer(splitTimeout).C: 264 return nil, nil, errOperationTimedOut 265 } 266 267 return key, tc.putter.Wait, nil 268 } 269 270 func (tc *TreeChunker) split(depth int, treeSize int64, addr Address, size int64, parentWg *sync.WaitGroup) { 271 272 // 273 274 for depth > 0 && size < treeSize { 275 treeSize /= tc.branches 276 depth-- 277 } 278 279 if depth == 0 { 280 // leaf nodes -> content chunks 281 chunkData := make([]byte, size+8) 282 binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size)) 283 var readBytes int64 284 for readBytes < size { 285 n, err := tc.data.Read(chunkData[8+readBytes:]) 286 readBytes += int64(n) 287 if err != nil && !(err == io.EOF && readBytes == size) { 288 tc.errC <- err 289 return 290 } 291 } 292 select { 293 case tc.jobC <- &hashJob{addr, chunkData, size, parentWg}: 294 case <-tc.quitC: 295 } 296 return 297 } 298 // dept > 0 299 // intermediate chunk containing child nodes hashes 300 branchCnt := (size + treeSize - 1) / treeSize 301 302 var chunk = make([]byte, branchCnt*tc.hashSize+8) 303 var pos, i int64 304 305 binary.LittleEndian.PutUint64(chunk[0:8], uint64(size)) 306 307 childrenWg := &sync.WaitGroup{} 308 var secSize int64 309 for i < branchCnt { 310 // the last item can have shorter data 311 if size-pos < treeSize { 312 secSize = size - pos 313 } else { 314 secSize = treeSize 315 } 316 // the hash of that data 317 subTreeKey := chunk[8+i*tc.hashSize : 8+(i+1)*tc.hashSize] 318 319 childrenWg.Add(1) 320 tc.split(depth-1, treeSize/tc.branches, subTreeKey, secSize, childrenWg) 321 322 i++ 323 pos += treeSize 324 } 325 // wait for all the children to complete calculating their hashes and copying them onto sections of the chunk 326 // parentWg.Add(1) 327 // go func() { 328 childrenWg.Wait() 329 330 worker := tc.getWorkerCount() 331 if int64(len(tc.jobC)) > worker && worker < ChunkProcessors { 332 tc.runWorker() 333 334 } 335 select { 336 case tc.jobC <- &hashJob{addr, chunk, size, parentWg}: 337 case <-tc.quitC: 338 } 339 } 340 341 func (tc *TreeChunker) runWorker() { 342 tc.incrementWorkerCount() 343 go func() { 344 defer tc.decrementWorkerCount() 345 for { 346 select { 347 348 case job, ok := <-tc.jobC: 349 if !ok { 350 return 351 } 352 353 h, err := tc.putter.Put(job.chunk) 354 if err != nil { 355 tc.errC <- err 356 return 357 } 358 copy(job.key, h) 359 job.parentWg.Done() 360 case <-tc.quitC: 361 return 362 } 363 } 364 }() 365 } 366 367 func (tc *TreeChunker) Append() (Address, func(), error) { 368 return nil, nil, errAppendOppNotSuported 369 } 370 371 // LazyChunkReader implements LazySectionReader 372 type LazyChunkReader struct { 373 key Address // root key 374 chunkData ChunkData 375 off int64 // offset 376 chunkSize int64 // inherit from chunker 377 branches int64 // inherit from chunker 378 hashSize int64 // inherit from chunker 379 depth int 380 getter Getter 381 } 382 383 func (tc *TreeChunker) Join() *LazyChunkReader { 384 return &LazyChunkReader{ 385 key: tc.addr, 386 chunkSize: tc.chunkSize, 387 branches: tc.branches, 388 hashSize: tc.hashSize, 389 depth: tc.depth, 390 getter: tc.getter, 391 } 392 } 393 394 // Size is meant to be called on the LazySectionReader 395 func (r *LazyChunkReader) Size(quitC chan bool) (n int64, err error) { 396 metrics.GetOrRegisterCounter("lazychunkreader.size", nil).Inc(1) 397 398 log.Debug("lazychunkreader.size", "key", r.key) 399 if r.chunkData == nil { 400 chunkData, err := r.getter.Get(Reference(r.key)) 401 if err != nil { 402 return 0, err 403 } 404 if chunkData == nil { 405 select { 406 case <-quitC: 407 return 0, errors.New("aborted") 408 default: 409 return 0, fmt.Errorf("root chunk not found for %v", r.key.Hex()) 410 } 411 } 412 r.chunkData = chunkData 413 } 414 return r.chunkData.Size(), nil 415 } 416 417 // read at can be called numerous times 418 // concurrent reads are allowed 419 // Size() needs to be called synchronously on the LazyChunkReader first 420 func (r *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) { 421 metrics.GetOrRegisterCounter("lazychunkreader.readat", nil).Inc(1) 422 423 // this is correct, a swarm doc cannot be zero length, so no EOF is expected 424 if len(b) == 0 { 425 return 0, nil 426 } 427 quitC := make(chan bool) 428 size, err := r.Size(quitC) 429 if err != nil { 430 log.Error("lazychunkreader.readat.size", "size", size, "err", err) 431 return 0, err 432 } 433 434 errC := make(chan error) 435 436 // } 437 var treeSize int64 438 var depth int 439 // calculate depth and max treeSize 440 treeSize = r.chunkSize 441 for ; treeSize < size; treeSize *= r.branches { 442 depth++ 443 } 444 wg := sync.WaitGroup{} 445 length := int64(len(b)) 446 for d := 0; d < r.depth; d++ { 447 off *= r.chunkSize 448 length *= r.chunkSize 449 } 450 wg.Add(1) 451 go r.join(b, off, off+length, depth, treeSize/r.branches, r.chunkData, &wg, errC, quitC) 452 go func() { 453 wg.Wait() 454 close(errC) 455 }() 456 457 err = <-errC 458 if err != nil { 459 log.Error("lazychunkreader.readat.errc", "err", err) 460 close(quitC) 461 return 0, err 462 } 463 if off+int64(len(b)) >= size { 464 return int(size - off), io.EOF 465 } 466 return len(b), nil 467 } 468 469 func (r *LazyChunkReader) join(b []byte, off int64, eoff int64, depth int, treeSize int64, chunkData ChunkData, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) { 470 defer parentWg.Done() 471 // find appropriate block level 472 for chunkData.Size() < treeSize && depth > r.depth { 473 treeSize /= r.branches 474 depth-- 475 } 476 477 // leaf chunk found 478 if depth == r.depth { 479 extra := 8 + eoff - int64(len(chunkData)) 480 if extra > 0 { 481 eoff -= extra 482 } 483 copy(b, chunkData[8+off:8+eoff]) 484 return // simply give back the chunks reader for content chunks 485 } 486 487 // subtree 488 start := off / treeSize 489 end := (eoff + treeSize - 1) / treeSize 490 491 // last non-leaf chunk can be shorter than default chunk size, let's not read it further then its end 492 currentBranches := int64(len(chunkData)-8) / r.hashSize 493 if end > currentBranches { 494 end = currentBranches 495 } 496 497 wg := &sync.WaitGroup{} 498 defer wg.Wait() 499 for i := start; i < end; i++ { 500 soff := i * treeSize 501 roff := soff 502 seoff := soff + treeSize 503 504 if soff < off { 505 soff = off 506 } 507 if seoff > eoff { 508 seoff = eoff 509 } 510 if depth > 1 { 511 wg.Wait() 512 } 513 wg.Add(1) 514 go func(j int64) { 515 childKey := chunkData[8+j*r.hashSize : 8+(j+1)*r.hashSize] 516 chunkData, err := r.getter.Get(Reference(childKey)) 517 if err != nil { 518 log.Error("lazychunkreader.join", "key", fmt.Sprintf("%x", childKey), "err", err) 519 select { 520 case errC <- fmt.Errorf("chunk %v-%v not found; key: %s", off, off+treeSize, fmt.Sprintf("%x", childKey)): 521 case <-quitC: 522 } 523 return 524 } 525 if l := len(chunkData); l < 9 { 526 select { 527 case errC <- fmt.Errorf("chunk %v-%v incomplete; key: %s, data length %v", off, off+treeSize, fmt.Sprintf("%x", childKey), l): 528 case <-quitC: 529 } 530 return 531 } 532 if soff < off { 533 soff = off 534 } 535 r.join(b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/r.branches, chunkData, wg, errC, quitC) 536 }(i) 537 } //for 538 } 539 540 // Read keeps a cursor so cannot be called simulateously, see ReadAt 541 func (r *LazyChunkReader) Read(b []byte) (read int, err error) { 542 log.Debug("lazychunkreader.read", "key", r.key) 543 metrics.GetOrRegisterCounter("lazychunkreader.read", nil).Inc(1) 544 545 read, err = r.ReadAt(b, r.off) 546 if err != nil && err != io.EOF { 547 log.Error("lazychunkreader.readat", "read", read, "err", err) 548 metrics.GetOrRegisterCounter("lazychunkreader.read.err", nil).Inc(1) 549 } 550 551 metrics.GetOrRegisterCounter("lazychunkreader.read.bytes", nil).Inc(int64(read)) 552 553 r.off += int64(read) 554 return 555 } 556 557 // completely analogous to standard SectionReader implementation 558 var errWhence = errors.New("Seek: invalid whence") 559 var errOffset = errors.New("Seek: invalid offset") 560 561 func (r *LazyChunkReader) Seek(offset int64, whence int) (int64, error) { 562 log.Debug("lazychunkreader.seek", "key", r.key, "offset", offset) 563 switch whence { 564 default: 565 return 0, errWhence 566 case 0: 567 offset += 0 568 case 1: 569 offset += r.off 570 case 2: 571 if r.chunkData == nil { //seek from the end requires rootchunk for size. call Size first 572 _, err := r.Size(nil) 573 if err != nil { 574 return 0, fmt.Errorf("can't get size: %v", err) 575 } 576 } 577 offset += r.chunkData.Size() 578 } 579 580 if offset < 0 { 581 return 0, errOffset 582 } 583 r.off = offset 584 return offset, nil 585 }