github.com/linapex/ethereum-dpos-chinese@v0.0.0-20190316121959-b78b3a4a1ece/swarm/storage/chunker.go (about)

     1  
     2  //<developer>
     3  //    <name>linapex 曹一峰</name>
     4  //    <email>linapex@163.com</email>
     5  //    <wx>superexc</wx>
     6  //    <qqgroup>128148617</qqgroup>
     7  //    <url>https://jsq.ink</url>
     8  //    <role>pku engineer</role>
     9  //    <date>2019-03-16 12:09:49</date>
    10  //</624342680276570112>
    11  
    12  //
    13  //
    14  //
    15  //
    16  //
    17  //
    18  //
    19  //
    20  //
    21  //
    22  //
    23  //
    24  //
    25  //
    26  //
    27  package storage
    28  
    29  import (
    30  	"context"
    31  	"encoding/binary"
    32  	"errors"
    33  	"fmt"
    34  	"io"
    35  	"sync"
    36  	"time"
    37  
    38  	"github.com/ethereum/go-ethereum/metrics"
    39  	"github.com/ethereum/go-ethereum/swarm/chunk"
    40  	"github.com/ethereum/go-ethereum/swarm/log"
    41  	"github.com/ethereum/go-ethereum/swarm/spancontext"
    42  	opentracing "github.com/opentracing/opentracing-go"
    43  	olog "github.com/opentracing/opentracing-go/log"
    44  )
    45  
    46  /*
    47  
    48  
    49  
    50  
    51  
    52  
    53  
    54  
    55  
    56  
    57  
    58  
    59  
    60  
    61    
    62  
    63  
    64    
    65    
    66    
    67  
    68   
    69  */
    70  
    71  
    72  /*
    73  
    74  
    75  
    76  
    77  
    78  */
    79  
    80  
    81  var (
    82  	errAppendOppNotSuported = errors.New("Append operation not supported")
    83  	errOperationTimedOut    = errors.New("operation timed out")
    84  )
    85  
    86  type ChunkerParams struct {
    87  	chunkSize int64
    88  	hashSize  int64
    89  }
    90  
    91  type SplitterParams struct {
    92  	ChunkerParams
    93  	reader io.Reader
    94  	putter Putter
    95  	addr   Address
    96  }
    97  
    98  type TreeSplitterParams struct {
    99  	SplitterParams
   100  	size int64
   101  }
   102  
   103  type JoinerParams struct {
   104  	ChunkerParams
   105  	addr   Address
   106  	getter Getter
   107  //
   108  	depth int
   109  	ctx   context.Context
   110  }
   111  
   112  type TreeChunker struct {
   113  	ctx context.Context
   114  
   115  	branches int64
   116  	hashFunc SwarmHasher
   117  	dataSize int64
   118  	data     io.Reader
   119  //
   120  	addr        Address
   121  	depth       int
   122  hashSize    int64        //
   123  chunkSize   int64        //
   124  workerCount int64        //
   125  workerLock  sync.RWMutex //
   126  	jobC        chan *hashJob
   127  	wg          *sync.WaitGroup
   128  	putter      Putter
   129  	getter      Getter
   130  	errC        chan error
   131  	quitC       chan bool
   132  }
   133  
   134  /*
   135   
   136   
   137   
   138   
   139   
   140   
   141   
   142   
   143   
   144   
   145  */
   146  
   147  func TreeJoin(ctx context.Context, addr Address, getter Getter, depth int) *LazyChunkReader {
   148  	jp := &JoinerParams{
   149  		ChunkerParams: ChunkerParams{
   150  			chunkSize: chunk.DefaultSize,
   151  			hashSize:  int64(len(addr)),
   152  		},
   153  		addr:   addr,
   154  		getter: getter,
   155  		depth:  depth,
   156  		ctx:    ctx,
   157  	}
   158  
   159  	return NewTreeJoiner(jp).Join(ctx)
   160  }
   161  
   162  /*
   163   
   164   
   165  */
   166  
   167  func TreeSplit(ctx context.Context, data io.Reader, size int64, putter Putter) (k Address, wait func(context.Context) error, err error) {
   168  	tsp := &TreeSplitterParams{
   169  		SplitterParams: SplitterParams{
   170  			ChunkerParams: ChunkerParams{
   171  				chunkSize: chunk.DefaultSize,
   172  				hashSize:  putter.RefSize(),
   173  			},
   174  			reader: data,
   175  			putter: putter,
   176  		},
   177  		size: size,
   178  	}
   179  	return NewTreeSplitter(tsp).Split(ctx)
   180  }
   181  
   182  func NewTreeJoiner(params *JoinerParams) *TreeChunker {
   183  	tc := &TreeChunker{}
   184  	tc.hashSize = params.hashSize
   185  	tc.branches = params.chunkSize / params.hashSize
   186  	tc.addr = params.addr
   187  	tc.getter = params.getter
   188  	tc.depth = params.depth
   189  	tc.chunkSize = params.chunkSize
   190  	tc.workerCount = 0
   191  	tc.jobC = make(chan *hashJob, 2*ChunkProcessors)
   192  	tc.wg = &sync.WaitGroup{}
   193  	tc.errC = make(chan error)
   194  	tc.quitC = make(chan bool)
   195  
   196  	tc.ctx = params.ctx
   197  
   198  	return tc
   199  }
   200  
   201  func NewTreeSplitter(params *TreeSplitterParams) *TreeChunker {
   202  	tc := &TreeChunker{}
   203  	tc.data = params.reader
   204  	tc.dataSize = params.size
   205  	tc.hashSize = params.hashSize
   206  	tc.branches = params.chunkSize / params.hashSize
   207  	tc.addr = params.addr
   208  	tc.chunkSize = params.chunkSize
   209  	tc.putter = params.putter
   210  	tc.workerCount = 0
   211  	tc.jobC = make(chan *hashJob, 2*ChunkProcessors)
   212  	tc.wg = &sync.WaitGroup{}
   213  	tc.errC = make(chan error)
   214  	tc.quitC = make(chan bool)
   215  
   216  	return tc
   217  }
   218  
   219  //
   220  func (c *Chunk) String() string {
   221  	return fmt.Sprintf("Key: %v TreeSize: %v Chunksize: %v", c.Addr.Log(), c.Size, len(c.SData))
   222  }
   223  
   224  type hashJob struct {
   225  	key      Address
   226  	chunk    []byte
   227  	size     int64
   228  	parentWg *sync.WaitGroup
   229  }
   230  
   231  func (tc *TreeChunker) incrementWorkerCount() {
   232  	tc.workerLock.Lock()
   233  	defer tc.workerLock.Unlock()
   234  	tc.workerCount += 1
   235  }
   236  
   237  func (tc *TreeChunker) getWorkerCount() int64 {
   238  	tc.workerLock.RLock()
   239  	defer tc.workerLock.RUnlock()
   240  	return tc.workerCount
   241  }
   242  
   243  func (tc *TreeChunker) decrementWorkerCount() {
   244  	tc.workerLock.Lock()
   245  	defer tc.workerLock.Unlock()
   246  	tc.workerCount -= 1
   247  }
   248  
   249  func (tc *TreeChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) {
   250  	if tc.chunkSize <= 0 {
   251  		panic("chunker must be initialised")
   252  	}
   253  
   254  	tc.runWorker()
   255  
   256  	depth := 0
   257  	treeSize := tc.chunkSize
   258  
   259  //
   260  //
   261  	for ; treeSize < tc.dataSize; treeSize *= tc.branches {
   262  		depth++
   263  	}
   264  
   265  	key := make([]byte, tc.hashSize)
   266  //
   267  	tc.wg.Add(1)
   268  //
   269  	go tc.split(depth, treeSize/tc.branches, key, tc.dataSize, tc.wg)
   270  
   271  //
   272  	go func() {
   273  //
   274  		tc.wg.Wait()
   275  		close(tc.errC)
   276  	}()
   277  
   278  	defer close(tc.quitC)
   279  	defer tc.putter.Close()
   280  	select {
   281  	case err := <-tc.errC:
   282  		if err != nil {
   283  			return nil, nil, err
   284  		}
   285  	case <-time.NewTimer(splitTimeout).C:
   286  		return nil, nil, errOperationTimedOut
   287  	}
   288  
   289  	return key, tc.putter.Wait, nil
   290  }
   291  
   292  func (tc *TreeChunker) split(depth int, treeSize int64, addr Address, size int64, parentWg *sync.WaitGroup) {
   293  
   294  //
   295  
   296  	for depth > 0 && size < treeSize {
   297  		treeSize /= tc.branches
   298  		depth--
   299  	}
   300  
   301  	if depth == 0 {
   302  //
   303  		chunkData := make([]byte, size+8)
   304  		binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size))
   305  		var readBytes int64
   306  		for readBytes < size {
   307  			n, err := tc.data.Read(chunkData[8+readBytes:])
   308  			readBytes += int64(n)
   309  			if err != nil && !(err == io.EOF && readBytes == size) {
   310  				tc.errC <- err
   311  				return
   312  			}
   313  		}
   314  		select {
   315  		case tc.jobC <- &hashJob{addr, chunkData, size, parentWg}:
   316  		case <-tc.quitC:
   317  		}
   318  		return
   319  	}
   320  //
   321  //
   322  	branchCnt := (size + treeSize - 1) / treeSize
   323  
   324  	var chunk = make([]byte, branchCnt*tc.hashSize+8)
   325  	var pos, i int64
   326  
   327  	binary.LittleEndian.PutUint64(chunk[0:8], uint64(size))
   328  
   329  	childrenWg := &sync.WaitGroup{}
   330  	var secSize int64
   331  	for i < branchCnt {
   332  //
   333  		if size-pos < treeSize {
   334  			secSize = size - pos
   335  		} else {
   336  			secSize = treeSize
   337  		}
   338  //
   339  		subTreeKey := chunk[8+i*tc.hashSize : 8+(i+1)*tc.hashSize]
   340  
   341  		childrenWg.Add(1)
   342  		tc.split(depth-1, treeSize/tc.branches, subTreeKey, secSize, childrenWg)
   343  
   344  		i++
   345  		pos += treeSize
   346  	}
   347  //
   348  //
   349  //
   350  	childrenWg.Wait()
   351  
   352  	worker := tc.getWorkerCount()
   353  	if int64(len(tc.jobC)) > worker && worker < ChunkProcessors {
   354  		tc.runWorker()
   355  
   356  	}
   357  	select {
   358  	case tc.jobC <- &hashJob{addr, chunk, size, parentWg}:
   359  	case <-tc.quitC:
   360  	}
   361  }
   362  
   363  func (tc *TreeChunker) runWorker() {
   364  	tc.incrementWorkerCount()
   365  	go func() {
   366  		defer tc.decrementWorkerCount()
   367  		for {
   368  			select {
   369  
   370  			case job, ok := <-tc.jobC:
   371  				if !ok {
   372  					return
   373  				}
   374  
   375  				h, err := tc.putter.Put(tc.ctx, job.chunk)
   376  				if err != nil {
   377  					tc.errC <- err
   378  					return
   379  				}
   380  				copy(job.key, h)
   381  				job.parentWg.Done()
   382  			case <-tc.quitC:
   383  				return
   384  			}
   385  		}
   386  	}()
   387  }
   388  
   389  func (tc *TreeChunker) Append() (Address, func(), error) {
   390  	return nil, nil, errAppendOppNotSuported
   391  }
   392  
   393  //
   394  type LazyChunkReader struct {
   395  	Ctx       context.Context
   396  key       Address //
   397  	chunkData ChunkData
   398  off       int64 //
   399  chunkSize int64 //
   400  branches  int64 //
   401  hashSize  int64 //
   402  	depth     int
   403  	getter    Getter
   404  }
   405  
   406  func (tc *TreeChunker) Join(ctx context.Context) *LazyChunkReader {
   407  	return &LazyChunkReader{
   408  		key:       tc.addr,
   409  		chunkSize: tc.chunkSize,
   410  		branches:  tc.branches,
   411  		hashSize:  tc.hashSize,
   412  		depth:     tc.depth,
   413  		getter:    tc.getter,
   414  		Ctx:       tc.ctx,
   415  	}
   416  }
   417  
   418  func (r *LazyChunkReader) Context() context.Context {
   419  	return r.Ctx
   420  }
   421  
   422  //
   423  func (r *LazyChunkReader) Size(ctx context.Context, quitC chan bool) (n int64, err error) {
   424  	metrics.GetOrRegisterCounter("lazychunkreader.size", nil).Inc(1)
   425  
   426  	var sp opentracing.Span
   427  	var cctx context.Context
   428  	cctx, sp = spancontext.StartSpan(
   429  		ctx,
   430  		"lcr.size")
   431  	defer sp.Finish()
   432  
   433  	log.Debug("lazychunkreader.size", "key", r.key)
   434  	if r.chunkData == nil {
   435  		chunkData, err := r.getter.Get(cctx, Reference(r.key))
   436  		if err != nil {
   437  			return 0, err
   438  		}
   439  		if chunkData == nil {
   440  			select {
   441  			case <-quitC:
   442  				return 0, errors.New("aborted")
   443  			default:
   444  				return 0, fmt.Errorf("root chunk not found for %v", r.key.Hex())
   445  			}
   446  		}
   447  		r.chunkData = chunkData
   448  	}
   449  	return r.chunkData.Size(), nil
   450  }
   451  
   452  //
   453  //
   454  //
   455  func (r *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) {
   456  	metrics.GetOrRegisterCounter("lazychunkreader.readat", nil).Inc(1)
   457  
   458  	var sp opentracing.Span
   459  	var cctx context.Context
   460  	cctx, sp = spancontext.StartSpan(
   461  		r.Ctx,
   462  		"lcr.read")
   463  	defer sp.Finish()
   464  
   465  	defer func() {
   466  		sp.LogFields(
   467  			olog.Int("off", int(off)),
   468  			olog.Int("read", read))
   469  	}()
   470  
   471  //
   472  	if len(b) == 0 {
   473  		return 0, nil
   474  	}
   475  	quitC := make(chan bool)
   476  	size, err := r.Size(cctx, quitC)
   477  	if err != nil {
   478  		log.Error("lazychunkreader.readat.size", "size", size, "err", err)
   479  		return 0, err
   480  	}
   481  
   482  	errC := make(chan error)
   483  
   484  //
   485  	var treeSize int64
   486  	var depth int
   487  //
   488  	treeSize = r.chunkSize
   489  	for ; treeSize < size; treeSize *= r.branches {
   490  		depth++
   491  	}
   492  	wg := sync.WaitGroup{}
   493  	length := int64(len(b))
   494  	for d := 0; d < r.depth; d++ {
   495  		off *= r.chunkSize
   496  		length *= r.chunkSize
   497  	}
   498  	wg.Add(1)
   499  	go r.join(cctx, b, off, off+length, depth, treeSize/r.branches, r.chunkData, &wg, errC, quitC)
   500  	go func() {
   501  		wg.Wait()
   502  		close(errC)
   503  	}()
   504  
   505  	err = <-errC
   506  	if err != nil {
   507  		log.Error("lazychunkreader.readat.errc", "err", err)
   508  		close(quitC)
   509  		return 0, err
   510  	}
   511  	if off+int64(len(b)) >= size {
   512  		return int(size - off), io.EOF
   513  	}
   514  	return len(b), nil
   515  }
   516  
   517  func (r *LazyChunkReader) join(ctx context.Context, b []byte, off int64, eoff int64, depth int, treeSize int64, chunkData ChunkData, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) {
   518  	defer parentWg.Done()
   519  //
   520  	for chunkData.Size() < treeSize && depth > r.depth {
   521  		treeSize /= r.branches
   522  		depth--
   523  	}
   524  
   525  //
   526  	if depth == r.depth {
   527  		extra := 8 + eoff - int64(len(chunkData))
   528  		if extra > 0 {
   529  			eoff -= extra
   530  		}
   531  		copy(b, chunkData[8+off:8+eoff])
   532  return //
   533  	}
   534  
   535  //
   536  	start := off / treeSize
   537  	end := (eoff + treeSize - 1) / treeSize
   538  
   539  //
   540  	currentBranches := int64(len(chunkData)-8) / r.hashSize
   541  	if end > currentBranches {
   542  		end = currentBranches
   543  	}
   544  
   545  	wg := &sync.WaitGroup{}
   546  	defer wg.Wait()
   547  	for i := start; i < end; i++ {
   548  		soff := i * treeSize
   549  		roff := soff
   550  		seoff := soff + treeSize
   551  
   552  		if soff < off {
   553  			soff = off
   554  		}
   555  		if seoff > eoff {
   556  			seoff = eoff
   557  		}
   558  		if depth > 1 {
   559  			wg.Wait()
   560  		}
   561  		wg.Add(1)
   562  		go func(j int64) {
   563  			childKey := chunkData[8+j*r.hashSize : 8+(j+1)*r.hashSize]
   564  			chunkData, err := r.getter.Get(ctx, Reference(childKey))
   565  			if err != nil {
   566  				log.Error("lazychunkreader.join", "key", fmt.Sprintf("%x", childKey), "err", err)
   567  				select {
   568  				case errC <- fmt.Errorf("chunk %v-%v not found; key: %s", off, off+treeSize, fmt.Sprintf("%x", childKey)):
   569  				case <-quitC:
   570  				}
   571  				return
   572  			}
   573  			if l := len(chunkData); l < 9 {
   574  				select {
   575  				case errC <- fmt.Errorf("chunk %v-%v incomplete; key: %s, data length %v", off, off+treeSize, fmt.Sprintf("%x", childKey), l):
   576  				case <-quitC:
   577  				}
   578  				return
   579  			}
   580  			if soff < off {
   581  				soff = off
   582  			}
   583  			r.join(ctx, b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/r.branches, chunkData, wg, errC, quitC)
   584  		}(i)
   585  } //
   586  }
   587  
   588  //
   589  func (r *LazyChunkReader) Read(b []byte) (read int, err error) {
   590  	log.Debug("lazychunkreader.read", "key", r.key)
   591  	metrics.GetOrRegisterCounter("lazychunkreader.read", nil).Inc(1)
   592  
   593  	read, err = r.ReadAt(b, r.off)
   594  	if err != nil && err != io.EOF {
   595  		log.Error("lazychunkreader.readat", "read", read, "err", err)
   596  		metrics.GetOrRegisterCounter("lazychunkreader.read.err", nil).Inc(1)
   597  	}
   598  
   599  	metrics.GetOrRegisterCounter("lazychunkreader.read.bytes", nil).Inc(int64(read))
   600  
   601  	r.off += int64(read)
   602  	return
   603  }
   604  
   605  //
   606  var errWhence = errors.New("Seek: invalid whence")
   607  var errOffset = errors.New("Seek: invalid offset")
   608  
   609  func (r *LazyChunkReader) Seek(offset int64, whence int) (int64, error) {
   610  	log.Debug("lazychunkreader.seek", "key", r.key, "offset", offset)
   611  	switch whence {
   612  	default:
   613  		return 0, errWhence
   614  	case 0:
   615  		offset += 0
   616  	case 1:
   617  		offset += r.off
   618  	case 2:
   619  if r.chunkData == nil { //
   620  			_, err := r.Size(context.TODO(), nil)
   621  			if err != nil {
   622  				return 0, fmt.Errorf("can't get size: %v", err)
   623  			}
   624  		}
   625  		offset += r.chunkData.Size()
   626  	}
   627  
   628  	if offset < 0 {
   629  		return 0, errOffset
   630  	}
   631  	r.off = offset
   632  	return offset, nil
   633  }
   634