github.com/yinchengtsinghua/golang-Eos-dpos-Ethereum@v0.0.0-20190121132951-92cc4225ed8e/swarm/storage/chunker.go (about) 1 2 //此源码被清华学神尹成大魔王专业翻译分析并修改 3 //尹成QQ77025077 4 //尹成微信18510341407 5 //尹成所在QQ群721929980 6 //尹成邮箱 yinc13@mails.tsinghua.edu.cn 7 //尹成毕业于清华大学,微软区块链领域全球最有价值专家 8 //https://mvp.microsoft.com/zh-cn/PublicProfile/4033620 9 // 10 // 11 // 12 // 13 // 14 // 15 // 16 // 17 // 18 // 19 // 20 // 21 // 22 // 23 // 24 package storage 25 26 import ( 27 "context" 28 "encoding/binary" 29 "errors" 30 "fmt" 31 "io" 32 "sync" 33 "time" 34 35 "github.com/ethereum/go-ethereum/metrics" 36 "github.com/ethereum/go-ethereum/swarm/chunk" 37 "github.com/ethereum/go-ethereum/swarm/log" 38 "github.com/ethereum/go-ethereum/swarm/spancontext" 39 opentracing "github.com/opentracing/opentracing-go" 40 olog "github.com/opentracing/opentracing-go/log" 41 ) 42 43 /* 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 */ 67 68 69 /* 70 71 72 73 74 75 */ 76 77 78 var ( 79 errAppendOppNotSuported = errors.New("Append operation not supported") 80 errOperationTimedOut = errors.New("operation timed out") 81 ) 82 83 type ChunkerParams struct { 84 chunkSize int64 85 hashSize int64 86 } 87 88 type SplitterParams struct { 89 ChunkerParams 90 reader io.Reader 91 putter Putter 92 addr Address 93 } 94 95 type TreeSplitterParams struct { 96 SplitterParams 97 size int64 98 } 99 100 type JoinerParams struct { 101 ChunkerParams 102 addr Address 103 getter Getter 104 // 105 depth int 106 ctx context.Context 107 } 108 109 type TreeChunker struct { 110 ctx context.Context 111 112 branches int64 113 hashFunc SwarmHasher 114 dataSize int64 115 data io.Reader 116 // 117 addr Address 118 depth int 119 hashSize int64 // 120 chunkSize int64 // 121 workerCount int64 // 122 workerLock sync.RWMutex // 123 jobC chan *hashJob 124 wg *sync.WaitGroup 125 putter Putter 126 getter Getter 127 errC chan error 128 quitC chan bool 129 } 130 131 /* 132 133 134 135 136 137 138 139 140 141 142 */ 143 144 func TreeJoin(ctx context.Context, addr Address, getter Getter, depth int) *LazyChunkReader { 145 jp := &JoinerParams{ 146 ChunkerParams: ChunkerParams{ 147 chunkSize: chunk.DefaultSize, 148 hashSize: int64(len(addr)), 149 }, 150 addr: addr, 151 getter: getter, 152 depth: depth, 153 ctx: ctx, 154 } 155 156 return NewTreeJoiner(jp).Join(ctx) 157 } 158 159 /* 160 161 162 */ 163 164 func TreeSplit(ctx context.Context, data io.Reader, size int64, putter Putter) (k Address, wait func(context.Context) error, err error) { 165 tsp := &TreeSplitterParams{ 166 SplitterParams: SplitterParams{ 167 ChunkerParams: ChunkerParams{ 168 chunkSize: chunk.DefaultSize, 169 hashSize: putter.RefSize(), 170 }, 171 reader: data, 172 putter: putter, 173 }, 174 size: size, 175 } 176 return NewTreeSplitter(tsp).Split(ctx) 177 } 178 179 func NewTreeJoiner(params *JoinerParams) *TreeChunker { 180 tc := &TreeChunker{} 181 tc.hashSize = params.hashSize 182 tc.branches = params.chunkSize / params.hashSize 183 tc.addr = params.addr 184 tc.getter = params.getter 185 tc.depth = params.depth 186 tc.chunkSize = params.chunkSize 187 tc.workerCount = 0 188 tc.jobC = make(chan *hashJob, 2*ChunkProcessors) 189 tc.wg = &sync.WaitGroup{} 190 tc.errC = make(chan error) 191 tc.quitC = make(chan bool) 192 193 tc.ctx = params.ctx 194 195 return tc 196 } 197 198 func NewTreeSplitter(params *TreeSplitterParams) *TreeChunker { 199 tc := &TreeChunker{} 200 tc.data = params.reader 201 tc.dataSize = params.size 202 tc.hashSize = params.hashSize 203 tc.branches = params.chunkSize / params.hashSize 204 tc.addr = params.addr 205 tc.chunkSize = params.chunkSize 206 tc.putter = params.putter 207 tc.workerCount = 0 208 tc.jobC = make(chan *hashJob, 2*ChunkProcessors) 209 tc.wg = &sync.WaitGroup{} 210 tc.errC = make(chan error) 211 tc.quitC = make(chan bool) 212 213 return tc 214 } 215 216 // 217 func (c *Chunk) String() string { 218 return fmt.Sprintf("Key: %v TreeSize: %v Chunksize: %v", c.Addr.Log(), c.Size, len(c.SData)) 219 } 220 221 type hashJob struct { 222 key Address 223 chunk []byte 224 size int64 225 parentWg *sync.WaitGroup 226 } 227 228 func (tc *TreeChunker) incrementWorkerCount() { 229 tc.workerLock.Lock() 230 defer tc.workerLock.Unlock() 231 tc.workerCount += 1 232 } 233 234 func (tc *TreeChunker) getWorkerCount() int64 { 235 tc.workerLock.RLock() 236 defer tc.workerLock.RUnlock() 237 return tc.workerCount 238 } 239 240 func (tc *TreeChunker) decrementWorkerCount() { 241 tc.workerLock.Lock() 242 defer tc.workerLock.Unlock() 243 tc.workerCount -= 1 244 } 245 246 func (tc *TreeChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) { 247 if tc.chunkSize <= 0 { 248 panic("chunker must be initialised") 249 } 250 251 tc.runWorker() 252 253 depth := 0 254 treeSize := tc.chunkSize 255 256 // 257 // 258 for ; treeSize < tc.dataSize; treeSize *= tc.branches { 259 depth++ 260 } 261 262 key := make([]byte, tc.hashSize) 263 // 264 tc.wg.Add(1) 265 // 266 go tc.split(depth, treeSize/tc.branches, key, tc.dataSize, tc.wg) 267 268 // 269 go func() { 270 // 271 tc.wg.Wait() 272 close(tc.errC) 273 }() 274 275 defer close(tc.quitC) 276 defer tc.putter.Close() 277 select { 278 case err := <-tc.errC: 279 if err != nil { 280 return nil, nil, err 281 } 282 case <-time.NewTimer(splitTimeout).C: 283 return nil, nil, errOperationTimedOut 284 } 285 286 return key, tc.putter.Wait, nil 287 } 288 289 func (tc *TreeChunker) split(depth int, treeSize int64, addr Address, size int64, parentWg *sync.WaitGroup) { 290 291 // 292 293 for depth > 0 && size < treeSize { 294 treeSize /= tc.branches 295 depth-- 296 } 297 298 if depth == 0 { 299 // 300 chunkData := make([]byte, size+8) 301 binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size)) 302 var readBytes int64 303 for readBytes < size { 304 n, err := tc.data.Read(chunkData[8+readBytes:]) 305 readBytes += int64(n) 306 if err != nil && !(err == io.EOF && readBytes == size) { 307 tc.errC <- err 308 return 309 } 310 } 311 select { 312 case tc.jobC <- &hashJob{addr, chunkData, size, parentWg}: 313 case <-tc.quitC: 314 } 315 return 316 } 317 // 318 // 319 branchCnt := (size + treeSize - 1) / treeSize 320 321 var chunk = make([]byte, branchCnt*tc.hashSize+8) 322 var pos, i int64 323 324 binary.LittleEndian.PutUint64(chunk[0:8], uint64(size)) 325 326 childrenWg := &sync.WaitGroup{} 327 var secSize int64 328 for i < branchCnt { 329 // 330 if size-pos < treeSize { 331 secSize = size - pos 332 } else { 333 secSize = treeSize 334 } 335 // 336 subTreeKey := chunk[8+i*tc.hashSize : 8+(i+1)*tc.hashSize] 337 338 childrenWg.Add(1) 339 tc.split(depth-1, treeSize/tc.branches, subTreeKey, secSize, childrenWg) 340 341 i++ 342 pos += treeSize 343 } 344 // 345 // 346 // 347 childrenWg.Wait() 348 349 worker := tc.getWorkerCount() 350 if int64(len(tc.jobC)) > worker && worker < ChunkProcessors { 351 tc.runWorker() 352 353 } 354 select { 355 case tc.jobC <- &hashJob{addr, chunk, size, parentWg}: 356 case <-tc.quitC: 357 } 358 } 359 360 func (tc *TreeChunker) runWorker() { 361 tc.incrementWorkerCount() 362 go func() { 363 defer tc.decrementWorkerCount() 364 for { 365 select { 366 367 case job, ok := <-tc.jobC: 368 if !ok { 369 return 370 } 371 372 h, err := tc.putter.Put(tc.ctx, job.chunk) 373 if err != nil { 374 tc.errC <- err 375 return 376 } 377 copy(job.key, h) 378 job.parentWg.Done() 379 case <-tc.quitC: 380 return 381 } 382 } 383 }() 384 } 385 386 func (tc *TreeChunker) Append() (Address, func(), error) { 387 return nil, nil, errAppendOppNotSuported 388 } 389 390 // 391 type LazyChunkReader struct { 392 Ctx context.Context 393 key Address // 394 chunkData ChunkData 395 off int64 // 396 chunkSize int64 // 397 branches int64 // 398 hashSize int64 // 399 depth int 400 getter Getter 401 } 402 403 func (tc *TreeChunker) Join(ctx context.Context) *LazyChunkReader { 404 return &LazyChunkReader{ 405 key: tc.addr, 406 chunkSize: tc.chunkSize, 407 branches: tc.branches, 408 hashSize: tc.hashSize, 409 depth: tc.depth, 410 getter: tc.getter, 411 Ctx: tc.ctx, 412 } 413 } 414 415 func (r *LazyChunkReader) Context() context.Context { 416 return r.Ctx 417 } 418 419 // 420 func (r *LazyChunkReader) Size(ctx context.Context, quitC chan bool) (n int64, err error) { 421 metrics.GetOrRegisterCounter("lazychunkreader.size", nil).Inc(1) 422 423 var sp opentracing.Span 424 var cctx context.Context 425 cctx, sp = spancontext.StartSpan( 426 ctx, 427 "lcr.size") 428 defer sp.Finish() 429 430 log.Debug("lazychunkreader.size", "key", r.key) 431 if r.chunkData == nil { 432 chunkData, err := r.getter.Get(cctx, Reference(r.key)) 433 if err != nil { 434 return 0, err 435 } 436 if chunkData == nil { 437 select { 438 case <-quitC: 439 return 0, errors.New("aborted") 440 default: 441 return 0, fmt.Errorf("root chunk not found for %v", r.key.Hex()) 442 } 443 } 444 r.chunkData = chunkData 445 } 446 return r.chunkData.Size(), nil 447 } 448 449 // 450 // 451 // 452 func (r *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) { 453 metrics.GetOrRegisterCounter("lazychunkreader.readat", nil).Inc(1) 454 455 var sp opentracing.Span 456 var cctx context.Context 457 cctx, sp = spancontext.StartSpan( 458 r.Ctx, 459 "lcr.read") 460 defer sp.Finish() 461 462 defer func() { 463 sp.LogFields( 464 olog.Int("off", int(off)), 465 olog.Int("read", read)) 466 }() 467 468 // 469 if len(b) == 0 { 470 return 0, nil 471 } 472 quitC := make(chan bool) 473 size, err := r.Size(cctx, quitC) 474 if err != nil { 475 log.Error("lazychunkreader.readat.size", "size", size, "err", err) 476 return 0, err 477 } 478 479 errC := make(chan error) 480 481 // 482 var treeSize int64 483 var depth int 484 // 485 treeSize = r.chunkSize 486 for ; treeSize < size; treeSize *= r.branches { 487 depth++ 488 } 489 wg := sync.WaitGroup{} 490 length := int64(len(b)) 491 for d := 0; d < r.depth; d++ { 492 off *= r.chunkSize 493 length *= r.chunkSize 494 } 495 wg.Add(1) 496 go r.join(cctx, b, off, off+length, depth, treeSize/r.branches, r.chunkData, &wg, errC, quitC) 497 go func() { 498 wg.Wait() 499 close(errC) 500 }() 501 502 err = <-errC 503 if err != nil { 504 log.Error("lazychunkreader.readat.errc", "err", err) 505 close(quitC) 506 return 0, err 507 } 508 if off+int64(len(b)) >= size { 509 return int(size - off), io.EOF 510 } 511 return len(b), nil 512 } 513 514 func (r *LazyChunkReader) join(ctx context.Context, b []byte, off int64, eoff int64, depth int, treeSize int64, chunkData ChunkData, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) { 515 defer parentWg.Done() 516 // 517 for chunkData.Size() < treeSize && depth > r.depth { 518 treeSize /= r.branches 519 depth-- 520 } 521 522 // 523 if depth == r.depth { 524 extra := 8 + eoff - int64(len(chunkData)) 525 if extra > 0 { 526 eoff -= extra 527 } 528 copy(b, chunkData[8+off:8+eoff]) 529 return // 530 } 531 532 // 533 start := off / treeSize 534 end := (eoff + treeSize - 1) / treeSize 535 536 // 537 currentBranches := int64(len(chunkData)-8) / r.hashSize 538 if end > currentBranches { 539 end = currentBranches 540 } 541 542 wg := &sync.WaitGroup{} 543 defer wg.Wait() 544 for i := start; i < end; i++ { 545 soff := i * treeSize 546 roff := soff 547 seoff := soff + treeSize 548 549 if soff < off { 550 soff = off 551 } 552 if seoff > eoff { 553 seoff = eoff 554 } 555 if depth > 1 { 556 wg.Wait() 557 } 558 wg.Add(1) 559 go func(j int64) { 560 childKey := chunkData[8+j*r.hashSize : 8+(j+1)*r.hashSize] 561 chunkData, err := r.getter.Get(ctx, Reference(childKey)) 562 if err != nil { 563 log.Error("lazychunkreader.join", "key", fmt.Sprintf("%x", childKey), "err", err) 564 select { 565 case errC <- fmt.Errorf("chunk %v-%v not found; key: %s", off, off+treeSize, fmt.Sprintf("%x", childKey)): 566 case <-quitC: 567 } 568 return 569 } 570 if l := len(chunkData); l < 9 { 571 select { 572 case errC <- fmt.Errorf("chunk %v-%v incomplete; key: %s, data length %v", off, off+treeSize, fmt.Sprintf("%x", childKey), l): 573 case <-quitC: 574 } 575 return 576 } 577 if soff < off { 578 soff = off 579 } 580 r.join(ctx, b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/r.branches, chunkData, wg, errC, quitC) 581 }(i) 582 } // 583 } 584 585 // 586 func (r *LazyChunkReader) Read(b []byte) (read int, err error) { 587 log.Debug("lazychunkreader.read", "key", r.key) 588 metrics.GetOrRegisterCounter("lazychunkreader.read", nil).Inc(1) 589 590 read, err = r.ReadAt(b, r.off) 591 if err != nil && err != io.EOF { 592 log.Error("lazychunkreader.readat", "read", read, "err", err) 593 metrics.GetOrRegisterCounter("lazychunkreader.read.err", nil).Inc(1) 594 } 595 596 metrics.GetOrRegisterCounter("lazychunkreader.read.bytes", nil).Inc(int64(read)) 597 598 r.off += int64(read) 599 return 600 } 601 602 // 603 var errWhence = errors.New("Seek: invalid whence") 604 var errOffset = errors.New("Seek: invalid offset") 605 606 func (r *LazyChunkReader) Seek(offset int64, whence int) (int64, error) { 607 log.Debug("lazychunkreader.seek", "key", r.key, "offset", offset) 608 switch whence { 609 default: 610 return 0, errWhence 611 case 0: 612 offset += 0 613 case 1: 614 offset += r.off 615 case 2: 616 if r.chunkData == nil { // 617 _, err := r.Size(context.TODO(), nil) 618 if err != nil { 619 return 0, fmt.Errorf("can't get size: %v", err) 620 } 621 } 622 offset += r.chunkData.Size() 623 } 624 625 if offset < 0 { 626 return 0, errOffset 627 } 628 r.off = offset 629 return offset, nil 630 }