github.com/linapex/ethereum-go-chinese@v0.0.0-20190316121929-f8b7a73c3fa1/swarm/storage/chunker.go

github.com/linapex/ethereum-go-chinese@v0.0.0-20190316121929-f8b7a73c3fa1/swarm/storage/chunker.go (about)

     1  
     2  //<developer>
     3  //    <name>linapex 曹一峰</name>
     4  //    <email>linapex@163.com</email>
     5  //    <wx>superexc</wx>
     6  //    <qqgroup>128148617</qqgroup>
     7  //    <url>https://jsq.ink</url>
     8  //    <role>pku engineer</role>
     9  //    <date>2019-03-16 19:16:44</date>
    10  //</624450118287888384>
    11  
    12  package storage
    13  
    14  import (
    15  	"context"
    16  	"encoding/binary"
    17  	"errors"
    18  	"fmt"
    19  	"io"
    20  	"sync"
    21  	"time"
    22  
    23  	"github.com/ethereum/go-ethereum/metrics"
    24  	ch "github.com/ethereum/go-ethereum/swarm/chunk"
    25  	"github.com/ethereum/go-ethereum/swarm/log"
    26  	"github.com/ethereum/go-ethereum/swarm/spancontext"
    27  	opentracing "github.com/opentracing/opentracing-go"
    28  	olog "github.com/opentracing/opentracing-go/log"
    29  )
    30  
    31  /*
    32  此包中实现的分布式存储需要固定大小的内容块。
    33  
    34  chunker是一个组件的接口，该组件负责分解和组装较大的数据。
    35  
    36  TreeChunker基于树结构实现一个chunker，定义如下：
    37  
    38  1树中的每个节点（包括根节点和其他分支节点）都存储为一个块。
    39  
    40  2个分支节点对数据内容进行编码，这些内容包括节点下的整个子树所覆盖的数据切片大小及其所有子节点的哈希键：
    41  数据i：=大小（子树i）键j键j+1…..Ki{{J+N-1}
    42  
    43  3个叶节点对输入数据的实际子片进行编码。
    44  
    45  4如果数据大小不超过最大chunk size，则数据存储在单个块中。
    46    键=哈希（Int64（大小）+数据）
    47  
    48  5如果数据大小大于chunksize*分支^l，但不大于chunksize*
    49    分支^（L+1），数据向量被分割成块大小*
    50    分支长度（最后一个除外）。
    51    key=hash（int64（大小）+key（slice0）+key（slice1）+…）
    52  
    53   底层哈希函数是可配置的
    54  **/
    55  
    56  
    57  /*
    58  树分块器是数据分块的具体实现。
    59  这个chunker以一种简单的方式工作，它从文档中构建一个树，这样每个节点要么表示一块实际数据，要么表示一块表示树的分支非叶节点的数据。尤其是，每个这样的非叶块将表示其各自子块的散列的串联。该方案同时保证了数据的完整性和自寻址能力。抽象节点是透明的，因为其表示的大小组件严格大于其最大数据大小，因为它们编码子树。
    60  
    61  如果一切正常，可以通过简单地编写读卡器来实现这一点，这样就不需要额外的分配或缓冲来进行数据拆分和连接。这意味着原则上，内存、文件系统、网络套接字之间可以有直接的IO（BZZ对等机存储请求是从套接字读取的）。实际上，可能需要几个阶段的内部缓冲。
    62  不过，散列本身确实使用了额外的副本和分配，因为它确实需要它。
    63  **/
    64  
    65  
    66  type ChunkerParams struct {
    67  	chunkSize int64
    68  	hashSize  int64
    69  }
    70  
    71  type SplitterParams struct {
    72  	ChunkerParams
    73  	reader io.Reader
    74  	putter Putter
    75  	addr   Address
    76  }
    77  
    78  type TreeSplitterParams struct {
    79  	SplitterParams
    80  	size int64
    81  }
    82  
    83  type JoinerParams struct {
    84  	ChunkerParams
    85  	addr   Address
    86  	getter Getter
    87  //TODO:有一个bug，所以深度今天只能是0，请参见：https://github.com/ethersphere/go-ethereum/issues/344
    88  	depth int
    89  	ctx   context.Context
    90  }
    91  
    92  type TreeChunker struct {
    93  	ctx context.Context
    94  
    95  	branches int64
    96  	dataSize int64
    97  	data     io.Reader
    98  //计算
    99  	addr        Address
   100  	depth       int
   101  hashSize    int64        //self.hashfunc.new（）.size（）。
   102  chunkSize   int64        //哈希大小*分支
   103  workerCount int64        //使用的工作程序数
   104  workerLock  sync.RWMutex //锁定工人计数
   105  	jobC        chan *hashJob
   106  	wg          *sync.WaitGroup
   107  	putter      Putter
   108  	getter      Getter
   109  	errC        chan error
   110  	quitC       chan bool
   111  }
   112  
   113  /*
   114   join基于根键重新构造原始内容。
   115   加入时，调用方将返回一个惰性的区段读取器，即
   116   可查找并实现按需获取块以及读取块的位置。
   117   要检索的新块来自调用方提供的getter。
   118   如果在联接过程中遇到错误，则显示为读卡器错误。
   119   区段阅读器。
   120   因此，即使其他部分也可以部分读取文档
   121   损坏或丢失。
   122   块在加入时不会被chunker验证。这个
   123   是因为由DPA决定哪些来源是可信的。
   124  **/
   125  
   126  func TreeJoin(ctx context.Context, addr Address, getter Getter, depth int) *LazyChunkReader {
   127  	jp := &JoinerParams{
   128  		ChunkerParams: ChunkerParams{
   129  			chunkSize: ch.DefaultSize,
   130  			hashSize:  int64(len(addr)),
   131  		},
   132  		addr:   addr,
   133  		getter: getter,
   134  		depth:  depth,
   135  		ctx:    ctx,
   136  	}
   137  
   138  	return NewTreeJoiner(jp).Join(ctx)
   139  }
   140  
   141  /*
   142   拆分时，数据作为一个节阅读器提供，键是一个哈希大小的长字节片（键），一旦处理完成，整个内容的根哈希将填充此内容。
   143   要存储的新块是使用调用方提供的推杆存储的。
   144  **/
   145  
   146  func TreeSplit(ctx context.Context, data io.Reader, size int64, putter Putter) (k Address, wait func(context.Context) error, err error) {
   147  	tsp := &TreeSplitterParams{
   148  		SplitterParams: SplitterParams{
   149  			ChunkerParams: ChunkerParams{
   150  				chunkSize: ch.DefaultSize,
   151  				hashSize:  putter.RefSize(),
   152  			},
   153  			reader: data,
   154  			putter: putter,
   155  		},
   156  		size: size,
   157  	}
   158  	return NewTreeSplitter(tsp).Split(ctx)
   159  }
   160  
   161  func NewTreeJoiner(params *JoinerParams) *TreeChunker {
   162  	tc := &TreeChunker{}
   163  	tc.hashSize = params.hashSize
   164  	tc.branches = params.chunkSize / params.hashSize
   165  	tc.addr = params.addr
   166  	tc.getter = params.getter
   167  	tc.depth = params.depth
   168  	tc.chunkSize = params.chunkSize
   169  	tc.workerCount = 0
   170  	tc.jobC = make(chan *hashJob, 2*ChunkProcessors)
   171  	tc.wg = &sync.WaitGroup{}
   172  	tc.errC = make(chan error)
   173  	tc.quitC = make(chan bool)
   174  
   175  	tc.ctx = params.ctx
   176  
   177  	return tc
   178  }
   179  
   180  func NewTreeSplitter(params *TreeSplitterParams) *TreeChunker {
   181  	tc := &TreeChunker{}
   182  	tc.data = params.reader
   183  	tc.dataSize = params.size
   184  	tc.hashSize = params.hashSize
   185  	tc.branches = params.chunkSize / params.hashSize
   186  	tc.addr = params.addr
   187  	tc.chunkSize = params.chunkSize
   188  	tc.putter = params.putter
   189  	tc.workerCount = 0
   190  	tc.jobC = make(chan *hashJob, 2*ChunkProcessors)
   191  	tc.wg = &sync.WaitGroup{}
   192  	tc.errC = make(chan error)
   193  	tc.quitC = make(chan bool)
   194  
   195  	return tc
   196  }
   197  
   198  type hashJob struct {
   199  	key      Address
   200  	chunk    []byte
   201  	size     int64
   202  	parentWg *sync.WaitGroup
   203  }
   204  
   205  func (tc *TreeChunker) incrementWorkerCount() {
   206  	tc.workerLock.Lock()
   207  	defer tc.workerLock.Unlock()
   208  	tc.workerCount += 1
   209  }
   210  
   211  func (tc *TreeChunker) getWorkerCount() int64 {
   212  	tc.workerLock.RLock()
   213  	defer tc.workerLock.RUnlock()
   214  	return tc.workerCount
   215  }
   216  
   217  func (tc *TreeChunker) decrementWorkerCount() {
   218  	tc.workerLock.Lock()
   219  	defer tc.workerLock.Unlock()
   220  	tc.workerCount -= 1
   221  }
   222  
   223  func (tc *TreeChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) {
   224  	if tc.chunkSize <= 0 {
   225  		panic("chunker must be initialised")
   226  	}
   227  
   228  	tc.runWorker(ctx)
   229  
   230  	depth := 0
   231  	treeSize := tc.chunkSize
   232  
   233  //取最小深度，使chunkSize*hashCount^（depth+1）>size
   234  //幂级数，将找出数据大小在基散列计数中的数量级或结果树中分支级别的数量级。
   235  	for ; treeSize < tc.dataSize; treeSize *= tc.branches {
   236  		depth++
   237  	}
   238  
   239  	key := make([]byte, tc.hashSize)
   240  //此waitgroup成员在计算根哈希之后释放
   241  	tc.wg.Add(1)
   242  //启动传递等待组的实际递归函数
   243  	go tc.split(ctx, depth, treeSize/tc.branches, key, tc.dataSize, tc.wg)
   244  
   245  //如果工作组中的所有子进程都已完成，则关闭内部错误通道
   246  	go func() {
   247  //等待所有线程完成
   248  		tc.wg.Wait()
   249  		close(tc.errC)
   250  	}()
   251  
   252  	defer close(tc.quitC)
   253  	defer tc.putter.Close()
   254  	select {
   255  	case err := <-tc.errC:
   256  		if err != nil {
   257  			return nil, nil, err
   258  		}
   259  	case <-ctx.Done():
   260  		return nil, nil, ctx.Err()
   261  	}
   262  
   263  	return key, tc.putter.Wait, nil
   264  }
   265  
   266  func (tc *TreeChunker) split(ctx context.Context, depth int, treeSize int64, addr Address, size int64, parentWg *sync.WaitGroup) {
   267  
   268  //
   269  
   270  	for depth > 0 && size < treeSize {
   271  		treeSize /= tc.branches
   272  		depth--
   273  	}
   274  
   275  	if depth == 0 {
   276  //叶节点->内容块
   277  		chunkData := make([]byte, size+8)
   278  		binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size))
   279  		var readBytes int64
   280  		for readBytes < size {
   281  			n, err := tc.data.Read(chunkData[8+readBytes:])
   282  			readBytes += int64(n)
   283  			if err != nil && !(err == io.EOF && readBytes == size) {
   284  				tc.errC <- err
   285  				return
   286  			}
   287  		}
   288  		select {
   289  		case tc.jobC <- &hashJob{addr, chunkData, size, parentWg}:
   290  		case <-tc.quitC:
   291  		}
   292  		return
   293  	}
   294  //部门> 0
   295  //包含子节点哈希的中间块
   296  	branchCnt := (size + treeSize - 1) / treeSize
   297  
   298  	var chunk = make([]byte, branchCnt*tc.hashSize+8)
   299  	var pos, i int64
   300  
   301  	binary.LittleEndian.PutUint64(chunk[0:8], uint64(size))
   302  
   303  	childrenWg := &sync.WaitGroup{}
   304  	var secSize int64
   305  	for i < branchCnt {
   306  //最后一项可以有较短的数据
   307  		if size-pos < treeSize {
   308  			secSize = size - pos
   309  		} else {
   310  			secSize = treeSize
   311  		}
   312  //数据的散列值
   313  		subTreeAddress := chunk[8+i*tc.hashSize : 8+(i+1)*tc.hashSize]
   314  
   315  		childrenWg.Add(1)
   316  		tc.split(ctx, depth-1, treeSize/tc.branches, subTreeAddress, secSize, childrenWg)
   317  
   318  		i++
   319  		pos += treeSize
   320  	}
   321  //等待所有子元素完成哈希计算并将其复制到块的各个部分
   322  //添加（1）
   323  //转到函数（）
   324  	childrenWg.Wait()
   325  
   326  	worker := tc.getWorkerCount()
   327  	if int64(len(tc.jobC)) > worker && worker < ChunkProcessors {
   328  		tc.runWorker(ctx)
   329  
   330  	}
   331  	select {
   332  	case tc.jobC <- &hashJob{addr, chunk, size, parentWg}:
   333  	case <-tc.quitC:
   334  	}
   335  }
   336  
   337  func (tc *TreeChunker) runWorker(ctx context.Context) {
   338  	tc.incrementWorkerCount()
   339  	go func() {
   340  		defer tc.decrementWorkerCount()
   341  		for {
   342  			select {
   343  
   344  			case job, ok := <-tc.jobC:
   345  				if !ok {
   346  					return
   347  				}
   348  
   349  				h, err := tc.putter.Put(ctx, job.chunk)
   350  				if err != nil {
   351  					tc.errC <- err
   352  					return
   353  				}
   354  				copy(job.key, h)
   355  				job.parentWg.Done()
   356  			case <-tc.quitC:
   357  				return
   358  			}
   359  		}
   360  	}()
   361  }
   362  
   363  //Lazychunkreader实现LazySectionReader
   364  type LazyChunkReader struct {
   365  	ctx       context.Context
   366  addr      Address //根地址
   367  	chunkData ChunkData
   368  off       int64 //抵消
   369  chunkSize int64 //从Chunker继承
   370  branches  int64 //从Chunker继承
   371  hashSize  int64 //从Chunker继承
   372  	depth     int
   373  	getter    Getter
   374  }
   375  
   376  func (tc *TreeChunker) Join(ctx context.Context) *LazyChunkReader {
   377  	return &LazyChunkReader{
   378  		addr:      tc.addr,
   379  		chunkSize: tc.chunkSize,
   380  		branches:  tc.branches,
   381  		hashSize:  tc.hashSize,
   382  		depth:     tc.depth,
   383  		getter:    tc.getter,
   384  		ctx:       tc.ctx,
   385  	}
   386  }
   387  
   388  func (r *LazyChunkReader) Context() context.Context {
   389  	return r.ctx
   390  }
   391  
   392  //大小将在LazySectionReader上调用
   393  func (r *LazyChunkReader) Size(ctx context.Context, quitC chan bool) (n int64, err error) {
   394  	metrics.GetOrRegisterCounter("lazychunkreader.size", nil).Inc(1)
   395  
   396  	var sp opentracing.Span
   397  	var cctx context.Context
   398  	cctx, sp = spancontext.StartSpan(
   399  		ctx,
   400  		"lcr.size")
   401  	defer sp.Finish()
   402  
   403  	log.Debug("lazychunkreader.size", "addr", r.addr)
   404  	if r.chunkData == nil {
   405  		startTime := time.Now()
   406  		chunkData, err := r.getter.Get(cctx, Reference(r.addr))
   407  		if err != nil {
   408  			metrics.GetOrRegisterResettingTimer("lcr.getter.get.err", nil).UpdateSince(startTime)
   409  			return 0, err
   410  		}
   411  		metrics.GetOrRegisterResettingTimer("lcr.getter.get", nil).UpdateSince(startTime)
   412  		r.chunkData = chunkData
   413  	}
   414  
   415  	s := r.chunkData.Size()
   416  	log.Debug("lazychunkreader.size", "key", r.addr, "size", s)
   417  
   418  	return int64(s), nil
   419  }
   420  
   421  //读在可以被称为无数次
   422  //允许并发读取
   423  //首先需要在Lazychunkreader上同步调用size（）。
   424  func (r *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) {
   425  	metrics.GetOrRegisterCounter("lazychunkreader.readat", nil).Inc(1)
   426  
   427  	var sp opentracing.Span
   428  	var cctx context.Context
   429  	cctx, sp = spancontext.StartSpan(
   430  		r.ctx,
   431  		"lcr.read")
   432  	defer sp.Finish()
   433  
   434  	defer func() {
   435  		sp.LogFields(
   436  			olog.Int("off", int(off)),
   437  			olog.Int("read", read))
   438  	}()
   439  
   440  //这是正确的，swarm文档不能是零长度，因此不需要EOF
   441  	if len(b) == 0 {
   442  		return 0, nil
   443  	}
   444  	quitC := make(chan bool)
   445  	size, err := r.Size(cctx, quitC)
   446  	if err != nil {
   447  		log.Debug("lazychunkreader.readat.size", "size", size, "err", err)
   448  		return 0, err
   449  	}
   450  
   451  	errC := make(chan error)
   452  
   453  //}
   454  	var treeSize int64
   455  	var depth int
   456  //计算深度和最大树尺寸
   457  	treeSize = r.chunkSize
   458  	for ; treeSize < size; treeSize *= r.branches {
   459  		depth++
   460  	}
   461  	wg := sync.WaitGroup{}
   462  	length := int64(len(b))
   463  	for d := 0; d < r.depth; d++ {
   464  		off *= r.chunkSize
   465  		length *= r.chunkSize
   466  	}
   467  	wg.Add(1)
   468  	go r.join(b, off, off+length, depth, treeSize/r.branches, r.chunkData, &wg, errC, quitC)
   469  	go func() {
   470  		wg.Wait()
   471  		close(errC)
   472  	}()
   473  
   474  	err = <-errC
   475  	if err != nil {
   476  		log.Debug("lazychunkreader.readat.errc", "err", err)
   477  		close(quitC)
   478  		return 0, err
   479  	}
   480  	if off+int64(len(b)) >= size {
   481  		log.Debug("lazychunkreader.readat.return at end", "size", size, "off", off)
   482  		return int(size - off), io.EOF
   483  	}
   484  	log.Debug("lazychunkreader.readat.errc", "buff", len(b))
   485  	return len(b), nil
   486  }
   487  
   488  func (r *LazyChunkReader) join(b []byte, off int64, eoff int64, depth int, treeSize int64, chunkData ChunkData, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) {
   489  	defer parentWg.Done()
   490  //查找适当的块级别
   491  	for chunkData.Size() < uint64(treeSize) && depth > r.depth {
   492  		treeSize /= r.branches
   493  		depth--
   494  	}
   495  
   496  //找到叶块
   497  	if depth == r.depth {
   498  		extra := 8 + eoff - int64(len(chunkData))
   499  		if extra > 0 {
   500  			eoff -= extra
   501  		}
   502  		copy(b, chunkData[8+off:8+eoff])
   503  return //只需将内容块返回给块阅读器
   504  	}
   505  
   506  //子树
   507  	start := off / treeSize
   508  	end := (eoff + treeSize - 1) / treeSize
   509  
   510  //最后一个非叶块可以短于默认块大小，我们不要再进一步读取它的结尾
   511  	currentBranches := int64(len(chunkData)-8) / r.hashSize
   512  	if end > currentBranches {
   513  		end = currentBranches
   514  	}
   515  
   516  	wg := &sync.WaitGroup{}
   517  	defer wg.Wait()
   518  	for i := start; i < end; i++ {
   519  		soff := i * treeSize
   520  		roff := soff
   521  		seoff := soff + treeSize
   522  
   523  		if soff < off {
   524  			soff = off
   525  		}
   526  		if seoff > eoff {
   527  			seoff = eoff
   528  		}
   529  		if depth > 1 {
   530  			wg.Wait()
   531  		}
   532  		wg.Add(1)
   533  		go func(j int64) {
   534  			childAddress := chunkData[8+j*r.hashSize : 8+(j+1)*r.hashSize]
   535  			startTime := time.Now()
   536  			chunkData, err := r.getter.Get(r.ctx, Reference(childAddress))
   537  			if err != nil {
   538  				metrics.GetOrRegisterResettingTimer("lcr.getter.get.err", nil).UpdateSince(startTime)
   539  				log.Debug("lazychunkreader.join", "key", fmt.Sprintf("%x", childAddress), "err", err)
   540  				select {
   541  				case errC <- fmt.Errorf("chunk %v-%v not found; key: %s", off, off+treeSize, fmt.Sprintf("%x", childAddress)):
   542  				case <-quitC:
   543  				}
   544  				return
   545  			}
   546  			metrics.GetOrRegisterResettingTimer("lcr.getter.get", nil).UpdateSince(startTime)
   547  			if l := len(chunkData); l < 9 {
   548  				select {
   549  				case errC <- fmt.Errorf("chunk %v-%v incomplete; key: %s, data length %v", off, off+treeSize, fmt.Sprintf("%x", childAddress), l):
   550  				case <-quitC:
   551  				}
   552  				return
   553  			}
   554  			if soff < off {
   555  				soff = off
   556  			}
   557  			r.join(b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/r.branches, chunkData, wg, errC, quitC)
   558  		}(i)
   559  } //对于
   560  }
   561  
   562  //read保留一个光标，因此不能同时调用，请参阅readat
   563  func (r *LazyChunkReader) Read(b []byte) (read int, err error) {
   564  	log.Debug("lazychunkreader.read", "key", r.addr)
   565  	metrics.GetOrRegisterCounter("lazychunkreader.read", nil).Inc(1)
   566  
   567  	read, err = r.ReadAt(b, r.off)
   568  	if err != nil && err != io.EOF {
   569  		log.Debug("lazychunkreader.readat", "read", read, "err", err)
   570  		metrics.GetOrRegisterCounter("lazychunkreader.read.err", nil).Inc(1)
   571  	}
   572  
   573  	metrics.GetOrRegisterCounter("lazychunkreader.read.bytes", nil).Inc(int64(read))
   574  
   575  	r.off += int64(read)
   576  	return read, err
   577  }
   578  
   579  //完全类似于标准的sectionreader实现
   580  var errWhence = errors.New("Seek: invalid whence")
   581  var errOffset = errors.New("Seek: invalid offset")
   582  
   583  func (r *LazyChunkReader) Seek(offset int64, whence int) (int64, error) {
   584  	log.Debug("lazychunkreader.seek", "key", r.addr, "offset", offset)
   585  	switch whence {
   586  	default:
   587  		return 0, errWhence
   588  	case 0:
   589  		offset += 0
   590  	case 1:
   591  		offset += r.off
   592  	case 2:
   593  if r.chunkData == nil { //从结尾搜索要求rootchunk的大小。先调用大小
   594  			_, err := r.Size(context.TODO(), nil)
   595  			if err != nil {
   596  				return 0, fmt.Errorf("can't get size: %v", err)
   597  			}
   598  		}
   599  		offset += int64(r.chunkData.Size())
   600  	}
   601  
   602  	if offset < 0 {
   603  		return 0, errOffset
   604  	}
   605  	r.off = offset
   606  	return offset, nil
   607  }
   608