github.com/linapex/ethereum-dpos-chinese@v0.0.0-20190316121959-b78b3a4a1ece/swarm/storage/chunker.go (about) 1 2 //<developer> 3 // <name>linapex 曹一峰</name> 4 // <email>linapex@163.com</email> 5 // <wx>superexc</wx> 6 // <qqgroup>128148617</qqgroup> 7 // <url>https://jsq.ink</url> 8 // <role>pku engineer</role> 9 // <date>2019-03-16 12:09:49</date> 10 //</624342680276570112> 11 12 // 13 // 14 // 15 // 16 // 17 // 18 // 19 // 20 // 21 // 22 // 23 // 24 // 25 // 26 // 27 package storage 28 29 import ( 30 "context" 31 "encoding/binary" 32 "errors" 33 "fmt" 34 "io" 35 "sync" 36 "time" 37 38 "github.com/ethereum/go-ethereum/metrics" 39 "github.com/ethereum/go-ethereum/swarm/chunk" 40 "github.com/ethereum/go-ethereum/swarm/log" 41 "github.com/ethereum/go-ethereum/swarm/spancontext" 42 opentracing "github.com/opentracing/opentracing-go" 43 olog "github.com/opentracing/opentracing-go/log" 44 ) 45 46 /* 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 */ 70 71 72 /* 73 74 75 76 77 78 */ 79 80 81 var ( 82 errAppendOppNotSuported = errors.New("Append operation not supported") 83 errOperationTimedOut = errors.New("operation timed out") 84 ) 85 86 type ChunkerParams struct { 87 chunkSize int64 88 hashSize int64 89 } 90 91 type SplitterParams struct { 92 ChunkerParams 93 reader io.Reader 94 putter Putter 95 addr Address 96 } 97 98 type TreeSplitterParams struct { 99 SplitterParams 100 size int64 101 } 102 103 type JoinerParams struct { 104 ChunkerParams 105 addr Address 106 getter Getter 107 // 108 depth int 109 ctx context.Context 110 } 111 112 type TreeChunker struct { 113 ctx context.Context 114 115 branches int64 116 hashFunc SwarmHasher 117 dataSize int64 118 data io.Reader 119 // 120 addr Address 121 depth int 122 hashSize int64 // 123 chunkSize int64 // 124 workerCount int64 // 125 workerLock sync.RWMutex // 126 jobC chan *hashJob 127 wg *sync.WaitGroup 128 putter Putter 129 getter Getter 130 errC chan error 131 quitC chan bool 132 } 133 134 /* 135 136 137 138 139 140 141 142 143 144 145 */ 146 147 func TreeJoin(ctx context.Context, addr Address, getter Getter, depth int) *LazyChunkReader { 148 jp := &JoinerParams{ 149 ChunkerParams: ChunkerParams{ 150 chunkSize: chunk.DefaultSize, 151 hashSize: int64(len(addr)), 152 }, 153 addr: addr, 154 getter: getter, 155 depth: depth, 156 ctx: ctx, 157 } 158 159 return NewTreeJoiner(jp).Join(ctx) 160 } 161 162 /* 163 164 165 */ 166 167 func TreeSplit(ctx context.Context, data io.Reader, size int64, putter Putter) (k Address, wait func(context.Context) error, err error) { 168 tsp := &TreeSplitterParams{ 169 SplitterParams: SplitterParams{ 170 ChunkerParams: ChunkerParams{ 171 chunkSize: chunk.DefaultSize, 172 hashSize: putter.RefSize(), 173 }, 174 reader: data, 175 putter: putter, 176 }, 177 size: size, 178 } 179 return NewTreeSplitter(tsp).Split(ctx) 180 } 181 182 func NewTreeJoiner(params *JoinerParams) *TreeChunker { 183 tc := &TreeChunker{} 184 tc.hashSize = params.hashSize 185 tc.branches = params.chunkSize / params.hashSize 186 tc.addr = params.addr 187 tc.getter = params.getter 188 tc.depth = params.depth 189 tc.chunkSize = params.chunkSize 190 tc.workerCount = 0 191 tc.jobC = make(chan *hashJob, 2*ChunkProcessors) 192 tc.wg = &sync.WaitGroup{} 193 tc.errC = make(chan error) 194 tc.quitC = make(chan bool) 195 196 tc.ctx = params.ctx 197 198 return tc 199 } 200 201 func NewTreeSplitter(params *TreeSplitterParams) *TreeChunker { 202 tc := &TreeChunker{} 203 tc.data = params.reader 204 tc.dataSize = params.size 205 tc.hashSize = params.hashSize 206 tc.branches = params.chunkSize / params.hashSize 207 tc.addr = params.addr 208 tc.chunkSize = params.chunkSize 209 tc.putter = params.putter 210 tc.workerCount = 0 211 tc.jobC = make(chan *hashJob, 2*ChunkProcessors) 212 tc.wg = &sync.WaitGroup{} 213 tc.errC = make(chan error) 214 tc.quitC = make(chan bool) 215 216 return tc 217 } 218 219 // 220 func (c *Chunk) String() string { 221 return fmt.Sprintf("Key: %v TreeSize: %v Chunksize: %v", c.Addr.Log(), c.Size, len(c.SData)) 222 } 223 224 type hashJob struct { 225 key Address 226 chunk []byte 227 size int64 228 parentWg *sync.WaitGroup 229 } 230 231 func (tc *TreeChunker) incrementWorkerCount() { 232 tc.workerLock.Lock() 233 defer tc.workerLock.Unlock() 234 tc.workerCount += 1 235 } 236 237 func (tc *TreeChunker) getWorkerCount() int64 { 238 tc.workerLock.RLock() 239 defer tc.workerLock.RUnlock() 240 return tc.workerCount 241 } 242 243 func (tc *TreeChunker) decrementWorkerCount() { 244 tc.workerLock.Lock() 245 defer tc.workerLock.Unlock() 246 tc.workerCount -= 1 247 } 248 249 func (tc *TreeChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 250 if tc.chunkSize <= 0 { 251 panic("chunker must be initialised") 252 } 253 254 tc.runWorker() 255 256 depth := 0 257 treeSize := tc.chunkSize 258 259 // 260 // 261 for ; treeSize < tc.dataSize; treeSize *= tc.branches { 262 depth++ 263 } 264 265 key := make([]byte, tc.hashSize) 266 // 267 tc.wg.Add(1) 268 // 269 go tc.split(depth, treeSize/tc.branches, key, tc.dataSize, tc.wg) 270 271 // 272 go func() { 273 // 274 tc.wg.Wait() 275 close(tc.errC) 276 }() 277 278 defer close(tc.quitC) 279 defer tc.putter.Close() 280 select { 281 case err := <-tc.errC: 282 if err != nil { 283 return nil, nil, err 284 } 285 case <-time.NewTimer(splitTimeout).C: 286 return nil, nil, errOperationTimedOut 287 } 288 289 return key, tc.putter.Wait, nil 290 } 291 292 func (tc *TreeChunker) split(depth int, treeSize int64, addr Address, size int64, parentWg *sync.WaitGroup) { 293 294 // 295 296 for depth > 0 && size < treeSize { 297 treeSize /= tc.branches 298 depth-- 299 } 300 301 if depth == 0 { 302 // 303 chunkData := make([]byte, size+8) 304 binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size)) 305 var readBytes int64 306 for readBytes < size { 307 n, err := tc.data.Read(chunkData[8+readBytes:]) 308 readBytes += int64(n) 309 if err != nil && !(err == io.EOF && readBytes == size) { 310 tc.errC <- err 311 return 312 } 313 } 314 select { 315 case tc.jobC <- &hashJob{addr, chunkData, size, parentWg}: 316 case <-tc.quitC: 317 } 318 return 319 } 320 // 321 // 322 branchCnt := (size + treeSize - 1) / treeSize 323 324 var chunk = make([]byte, branchCnt*tc.hashSize+8) 325 var pos, i int64 326 327 binary.LittleEndian.PutUint64(chunk[0:8], uint64(size)) 328 329 childrenWg := &sync.WaitGroup{} 330 var secSize int64 331 for i < branchCnt { 332 // 333 if size-pos < treeSize { 334 secSize = size - pos 335 } else { 336 secSize = treeSize 337 } 338 // 339 subTreeKey := chunk[8+i*tc.hashSize : 8+(i+1)*tc.hashSize] 340 341 childrenWg.Add(1) 342 tc.split(depth-1, treeSize/tc.branches, subTreeKey, secSize, childrenWg) 343 344 i++ 345 pos += treeSize 346 } 347 // 348 // 349 // 350 childrenWg.Wait() 351 352 worker := tc.getWorkerCount() 353 if int64(len(tc.jobC)) > worker && worker < ChunkProcessors { 354 tc.runWorker() 355 356 } 357 select { 358 case tc.jobC <- &hashJob{addr, chunk, size, parentWg}: 359 case <-tc.quitC: 360 } 361 } 362 363 func (tc *TreeChunker) runWorker() { 364 tc.incrementWorkerCount() 365 go func() { 366 defer tc.decrementWorkerCount() 367 for { 368 select { 369 370 case job, ok := <-tc.jobC: 371 if !ok { 372 return 373 } 374 375 h, err := tc.putter.Put(tc.ctx, job.chunk) 376 if err != nil { 377 tc.errC <- err 378 return 379 } 380 copy(job.key, h) 381 job.parentWg.Done() 382 case <-tc.quitC: 383 return 384 } 385 } 386 }() 387 } 388 389 func (tc *TreeChunker) Append() (Address, func(), error) { 390 return nil, nil, errAppendOppNotSuported 391 } 392 393 // 394 type LazyChunkReader struct { 395 Ctx context.Context 396 key Address // 397 chunkData ChunkData 398 off int64 // 399 chunkSize int64 // 400 branches int64 // 401 hashSize int64 // 402 depth int 403 getter Getter 404 } 405 406 func (tc *TreeChunker) Join(ctx context.Context) *LazyChunkReader { 407 return &LazyChunkReader{ 408 key: tc.addr, 409 chunkSize: tc.chunkSize, 410 branches: tc.branches, 411 hashSize: tc.hashSize, 412 depth: tc.depth, 413 getter: tc.getter, 414 Ctx: tc.ctx, 415 } 416 } 417 418 func (r *LazyChunkReader) Context() context.Context { 419 return r.Ctx 420 } 421 422 // 423 func (r *LazyChunkReader) Size(ctx context.Context, quitC chan bool) (n int64, err error) { 424 metrics.GetOrRegisterCounter("lazychunkreader.size", nil).Inc(1) 425 426 var sp opentracing.Span 427 var cctx context.Context 428 cctx, sp = spancontext.StartSpan( 429 ctx, 430 "lcr.size") 431 defer sp.Finish() 432 433 log.Debug("lazychunkreader.size", "key", r.key) 434 if r.chunkData == nil { 435 chunkData, err := r.getter.Get(cctx, Reference(r.key)) 436 if err != nil { 437 return 0, err 438 } 439 if chunkData == nil { 440 select { 441 case <-quitC: 442 return 0, errors.New("aborted") 443 default: 444 return 0, fmt.Errorf("root chunk not found for %v", r.key.Hex()) 445 } 446 } 447 r.chunkData = chunkData 448 } 449 return r.chunkData.Size(), nil 450 } 451 452 // 453 // 454 // 455 func (r *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) { 456 metrics.GetOrRegisterCounter("lazychunkreader.readat", nil).Inc(1) 457 458 var sp opentracing.Span 459 var cctx context.Context 460 cctx, sp = spancontext.StartSpan( 461 r.Ctx, 462 "lcr.read") 463 defer sp.Finish() 464 465 defer func() { 466 sp.LogFields( 467 olog.Int("off", int(off)), 468 olog.Int("read", read)) 469 }() 470 471 // 472 if len(b) == 0 { 473 return 0, nil 474 } 475 quitC := make(chan bool) 476 size, err := r.Size(cctx, quitC) 477 if err != nil { 478 log.Error("lazychunkreader.readat.size", "size", size, "err", err) 479 return 0, err 480 } 481 482 errC := make(chan error) 483 484 // 485 var treeSize int64 486 var depth int 487 // 488 treeSize = r.chunkSize 489 for ; treeSize < size; treeSize *= r.branches { 490 depth++ 491 } 492 wg := sync.WaitGroup{} 493 length := int64(len(b)) 494 for d := 0; d < r.depth; d++ { 495 off *= r.chunkSize 496 length *= r.chunkSize 497 } 498 wg.Add(1) 499 go r.join(cctx, b, off, off+length, depth, treeSize/r.branches, r.chunkData, &wg, errC, quitC) 500 go func() { 501 wg.Wait() 502 close(errC) 503 }() 504 505 err = <-errC 506 if err != nil { 507 log.Error("lazychunkreader.readat.errc", "err", err) 508 close(quitC) 509 return 0, err 510 } 511 if off+int64(len(b)) >= size { 512 return int(size - off), io.EOF 513 } 514 return len(b), nil 515 } 516 517 func (r *LazyChunkReader) join(ctx context.Context, b []byte, off int64, eoff int64, depth int, treeSize int64, chunkData ChunkData, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) { 518 defer parentWg.Done() 519 // 520 for chunkData.Size() < treeSize && depth > r.depth { 521 treeSize /= r.branches 522 depth-- 523 } 524 525 // 526 if depth == r.depth { 527 extra := 8 + eoff - int64(len(chunkData)) 528 if extra > 0 { 529 eoff -= extra 530 } 531 copy(b, chunkData[8+off:8+eoff]) 532 return // 533 } 534 535 // 536 start := off / treeSize 537 end := (eoff + treeSize - 1) / treeSize 538 539 // 540 currentBranches := int64(len(chunkData)-8) / r.hashSize 541 if end > currentBranches { 542 end = currentBranches 543 } 544 545 wg := &sync.WaitGroup{} 546 defer wg.Wait() 547 for i := start; i < end; i++ { 548 soff := i * treeSize 549 roff := soff 550 seoff := soff + treeSize 551 552 if soff < off { 553 soff = off 554 } 555 if seoff > eoff { 556 seoff = eoff 557 } 558 if depth > 1 { 559 wg.Wait() 560 } 561 wg.Add(1) 562 go func(j int64) { 563 childKey := chunkData[8+j*r.hashSize : 8+(j+1)*r.hashSize] 564 chunkData, err := r.getter.Get(ctx, Reference(childKey)) 565 if err != nil { 566 log.Error("lazychunkreader.join", "key", fmt.Sprintf("%x", childKey), "err", err) 567 select { 568 case errC <- fmt.Errorf("chunk %v-%v not found; key: %s", off, off+treeSize, fmt.Sprintf("%x", childKey)): 569 case <-quitC: 570 } 571 return 572 } 573 if l := len(chunkData); l < 9 { 574 select { 575 case errC <- fmt.Errorf("chunk %v-%v incomplete; key: %s, data length %v", off, off+treeSize, fmt.Sprintf("%x", childKey), l): 576 case <-quitC: 577 } 578 return 579 } 580 if soff < off { 581 soff = off 582 } 583 r.join(ctx, b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/r.branches, chunkData, wg, errC, quitC) 584 }(i) 585 } // 586 } 587 588 // 589 func (r *LazyChunkReader) Read(b []byte) (read int, err error) { 590 log.Debug("lazychunkreader.read", "key", r.key) 591 metrics.GetOrRegisterCounter("lazychunkreader.read", nil).Inc(1) 592 593 read, err = r.ReadAt(b, r.off) 594 if err != nil && err != io.EOF { 595 log.Error("lazychunkreader.readat", "read", read, "err", err) 596 metrics.GetOrRegisterCounter("lazychunkreader.read.err", nil).Inc(1) 597 } 598 599 metrics.GetOrRegisterCounter("lazychunkreader.read.bytes", nil).Inc(int64(read)) 600 601 r.off += int64(read) 602 return 603 } 604 605 // 606 var errWhence = errors.New("Seek: invalid whence") 607 var errOffset = errors.New("Seek: invalid offset") 608 609 func (r *LazyChunkReader) Seek(offset int64, whence int) (int64, error) { 610 log.Debug("lazychunkreader.seek", "key", r.key, "offset", offset) 611 switch whence { 612 default: 613 return 0, errWhence 614 case 0: 615 offset += 0 616 case 1: 617 offset += r.off 618 case 2: 619 if r.chunkData == nil { // 620 _, err := r.Size(context.TODO(), nil) 621 if err != nil { 622 return 0, fmt.Errorf("can't get size: %v", err) 623 } 624 } 625 offset += r.chunkData.Size() 626 } 627 628 if offset < 0 { 629 return 0, errOffset 630 } 631 r.off = offset 632 return offset, nil 633 } 634