github.com/muhammedhassanm/blockchain@v0.0.0-20200120143007-697261defd4d/go-ethereum-master/swarm/storage/chunker.go (about)

     1  // Copyright 2016 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  package storage
    17  
    18  import (
    19  	"context"
    20  	"encoding/binary"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"sync"
    25  	"time"
    26  
    27  	"github.com/ethereum/go-ethereum/metrics"
    28  	"github.com/ethereum/go-ethereum/swarm/log"
    29  )
    30  
    31  /*
    32  The distributed storage implemented in this package requires fix sized chunks of content.
    33  
    34  Chunker is the interface to a component that is responsible for disassembling and assembling larger data.
    35  
    36  TreeChunker implements a Chunker based on a tree structure defined as follows:
    37  
    38  1 each node in the tree including the root and other branching nodes are stored as a chunk.
    39  
    40  2 branching nodes encode data contents that includes the size of the dataslice covered by its entire subtree under the node as well as the hash keys of all its children :
    41  data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1}
    42  
    43  3 Leaf nodes encode an actual subslice of the input data.
    44  
    45  4 if data size is not more than maximum chunksize, the data is stored in a single chunk
    46    key = hash(int64(size) + data)
    47  
    48  5 if data size is more than chunksize*branches^l, but no more than chunksize*
    49    branches^(l+1), the data vector is split into slices of chunksize*
    50    branches^l length (except the last one).
    51    key = hash(int64(size) + key(slice0) + key(slice1) + ...)
    52  
    53   The underlying hash function is configurable
    54  */
    55  
    56  /*
    57  Tree chunker is a concrete implementation of data chunking.
    58  This chunker works in a simple way, it builds a tree out of the document so that each node either represents a chunk of real data or a chunk of data representing an branching non-leaf node of the tree. In particular each such non-leaf chunk will represent is a concatenation of the hash of its respective children. This scheme simultaneously guarantees data integrity as well as self addressing. Abstract nodes are transparent since their represented size component is strictly greater than their maximum data size, since they encode a subtree.
    59  
    60  If all is well it is possible to implement this by simply composing readers so that no extra allocation or buffering is necessary for the data splitting and joining. This means that in principle there can be direct IO between : memory, file system, network socket (bzz peers storage request is read from the socket). In practice there may be need for several stages of internal buffering.
    61  The hashing itself does use extra copies and allocation though, since it does need it.
    62  */
    63  
    64  var (
    65  	errAppendOppNotSuported = errors.New("Append operation not supported")
    66  	errOperationTimedOut    = errors.New("operation timed out")
    67  )
    68  
    69  const (
    70  	DefaultChunkSize int64 = 4096
    71  )
    72  
    73  type ChunkerParams struct {
    74  	chunkSize int64
    75  	hashSize  int64
    76  }
    77  
    78  type SplitterParams struct {
    79  	ChunkerParams
    80  	reader io.Reader
    81  	putter Putter
    82  	addr   Address
    83  }
    84  
    85  type TreeSplitterParams struct {
    86  	SplitterParams
    87  	size int64
    88  }
    89  
    90  type JoinerParams struct {
    91  	ChunkerParams
    92  	addr   Address
    93  	getter Getter
    94  	// TODO: there is a bug, so depth can only be 0 today, see: https://github.com/ethersphere/go-ethereum/issues/344
    95  	depth int
    96  }
    97  
    98  type TreeChunker struct {
    99  	branches int64
   100  	hashFunc SwarmHasher
   101  	dataSize int64
   102  	data     io.Reader
   103  	// calculated
   104  	addr        Address
   105  	depth       int
   106  	hashSize    int64        // self.hashFunc.New().Size()
   107  	chunkSize   int64        // hashSize* branches
   108  	workerCount int64        // the number of worker routines used
   109  	workerLock  sync.RWMutex // lock for the worker count
   110  	jobC        chan *hashJob
   111  	wg          *sync.WaitGroup
   112  	putter      Putter
   113  	getter      Getter
   114  	errC        chan error
   115  	quitC       chan bool
   116  }
   117  
   118  /*
   119  	Join reconstructs original content based on a root key.
   120  	When joining, the caller gets returned a Lazy SectionReader, which is
   121  	seekable and implements on-demand fetching of chunks as and where it is read.
   122  	New chunks to retrieve are coming from the getter, which the caller provides.
   123  	If an error is encountered during joining, it appears as a reader error.
   124  	The SectionReader.
   125  	As a result, partial reads from a document are possible even if other parts
   126  	are corrupt or lost.
   127  	The chunks are not meant to be validated by the chunker when joining. This
   128  	is because it is left to the DPA to decide which sources are trusted.
   129  */
   130  func TreeJoin(ctx context.Context, addr Address, getter Getter, depth int) *LazyChunkReader {
   131  	jp := &JoinerParams{
   132  		ChunkerParams: ChunkerParams{
   133  			chunkSize: DefaultChunkSize,
   134  			hashSize:  int64(len(addr)),
   135  		},
   136  		addr:   addr,
   137  		getter: getter,
   138  		depth:  depth,
   139  	}
   140  
   141  	return NewTreeJoiner(jp).Join(ctx)
   142  }
   143  
   144  /*
   145  	When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Key), the root hash of the entire content will fill this once processing finishes.
   146  	New chunks to store are store using the putter which the caller provides.
   147  */
   148  func TreeSplit(ctx context.Context, data io.Reader, size int64, putter Putter) (k Address, wait func(context.Context) error, err error) {
   149  	tsp := &TreeSplitterParams{
   150  		SplitterParams: SplitterParams{
   151  			ChunkerParams: ChunkerParams{
   152  				chunkSize: DefaultChunkSize,
   153  				hashSize:  putter.RefSize(),
   154  			},
   155  			reader: data,
   156  			putter: putter,
   157  		},
   158  		size: size,
   159  	}
   160  	return NewTreeSplitter(tsp).Split(ctx)
   161  }
   162  
   163  func NewTreeJoiner(params *JoinerParams) *TreeChunker {
   164  	tc := &TreeChunker{}
   165  	tc.hashSize = params.hashSize
   166  	tc.branches = params.chunkSize / params.hashSize
   167  	tc.addr = params.addr
   168  	tc.getter = params.getter
   169  	tc.depth = params.depth
   170  	tc.chunkSize = params.chunkSize
   171  	tc.workerCount = 0
   172  	tc.jobC = make(chan *hashJob, 2*ChunkProcessors)
   173  	tc.wg = &sync.WaitGroup{}
   174  	tc.errC = make(chan error)
   175  	tc.quitC = make(chan bool)
   176  
   177  	return tc
   178  }
   179  
   180  func NewTreeSplitter(params *TreeSplitterParams) *TreeChunker {
   181  	tc := &TreeChunker{}
   182  	tc.data = params.reader
   183  	tc.dataSize = params.size
   184  	tc.hashSize = params.hashSize
   185  	tc.branches = params.chunkSize / params.hashSize
   186  	tc.addr = params.addr
   187  	tc.chunkSize = params.chunkSize
   188  	tc.putter = params.putter
   189  	tc.workerCount = 0
   190  	tc.jobC = make(chan *hashJob, 2*ChunkProcessors)
   191  	tc.wg = &sync.WaitGroup{}
   192  	tc.errC = make(chan error)
   193  	tc.quitC = make(chan bool)
   194  
   195  	return tc
   196  }
   197  
   198  // String() for pretty printing
   199  func (c *Chunk) String() string {
   200  	return fmt.Sprintf("Key: %v TreeSize: %v Chunksize: %v", c.Addr.Log(), c.Size, len(c.SData))
   201  }
   202  
   203  type hashJob struct {
   204  	key      Address
   205  	chunk    []byte
   206  	size     int64
   207  	parentWg *sync.WaitGroup
   208  }
   209  
   210  func (tc *TreeChunker) incrementWorkerCount() {
   211  	tc.workerLock.Lock()
   212  	defer tc.workerLock.Unlock()
   213  	tc.workerCount += 1
   214  }
   215  
   216  func (tc *TreeChunker) getWorkerCount() int64 {
   217  	tc.workerLock.RLock()
   218  	defer tc.workerLock.RUnlock()
   219  	return tc.workerCount
   220  }
   221  
   222  func (tc *TreeChunker) decrementWorkerCount() {
   223  	tc.workerLock.Lock()
   224  	defer tc.workerLock.Unlock()
   225  	tc.workerCount -= 1
   226  }
   227  
   228  func (tc *TreeChunker) Split(ctx context.Context) (k Address, wait func(context.Context) error, err error) {
   229  	if tc.chunkSize <= 0 {
   230  		panic("chunker must be initialised")
   231  	}
   232  
   233  	tc.runWorker()
   234  
   235  	depth := 0
   236  	treeSize := tc.chunkSize
   237  
   238  	// takes lowest depth such that chunksize*HashCount^(depth+1) > size
   239  	// power series, will find the order of magnitude of the data size in base hashCount or numbers of levels of branching in the resulting tree.
   240  	for ; treeSize < tc.dataSize; treeSize *= tc.branches {
   241  		depth++
   242  	}
   243  
   244  	key := make([]byte, tc.hashSize)
   245  	// this waitgroup member is released after the root hash is calculated
   246  	tc.wg.Add(1)
   247  	//launch actual recursive function passing the waitgroups
   248  	go tc.split(depth, treeSize/tc.branches, key, tc.dataSize, tc.wg)
   249  
   250  	// closes internal error channel if all subprocesses in the workgroup finished
   251  	go func() {
   252  		// waiting for all threads to finish
   253  		tc.wg.Wait()
   254  		close(tc.errC)
   255  	}()
   256  
   257  	defer close(tc.quitC)
   258  	defer tc.putter.Close()
   259  	select {
   260  	case err := <-tc.errC:
   261  		if err != nil {
   262  			return nil, nil, err
   263  		}
   264  	case <-time.NewTimer(splitTimeout).C:
   265  		return nil, nil, errOperationTimedOut
   266  	}
   267  
   268  	return key, tc.putter.Wait, nil
   269  }
   270  
   271  func (tc *TreeChunker) split(depth int, treeSize int64, addr Address, size int64, parentWg *sync.WaitGroup) {
   272  
   273  	//
   274  
   275  	for depth > 0 && size < treeSize {
   276  		treeSize /= tc.branches
   277  		depth--
   278  	}
   279  
   280  	if depth == 0 {
   281  		// leaf nodes -> content chunks
   282  		chunkData := make([]byte, size+8)
   283  		binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size))
   284  		var readBytes int64
   285  		for readBytes < size {
   286  			n, err := tc.data.Read(chunkData[8+readBytes:])
   287  			readBytes += int64(n)
   288  			if err != nil && !(err == io.EOF && readBytes == size) {
   289  				tc.errC <- err
   290  				return
   291  			}
   292  		}
   293  		select {
   294  		case tc.jobC <- &hashJob{addr, chunkData, size, parentWg}:
   295  		case <-tc.quitC:
   296  		}
   297  		return
   298  	}
   299  	// dept > 0
   300  	// intermediate chunk containing child nodes hashes
   301  	branchCnt := (size + treeSize - 1) / treeSize
   302  
   303  	var chunk = make([]byte, branchCnt*tc.hashSize+8)
   304  	var pos, i int64
   305  
   306  	binary.LittleEndian.PutUint64(chunk[0:8], uint64(size))
   307  
   308  	childrenWg := &sync.WaitGroup{}
   309  	var secSize int64
   310  	for i < branchCnt {
   311  		// the last item can have shorter data
   312  		if size-pos < treeSize {
   313  			secSize = size - pos
   314  		} else {
   315  			secSize = treeSize
   316  		}
   317  		// the hash of that data
   318  		subTreeKey := chunk[8+i*tc.hashSize : 8+(i+1)*tc.hashSize]
   319  
   320  		childrenWg.Add(1)
   321  		tc.split(depth-1, treeSize/tc.branches, subTreeKey, secSize, childrenWg)
   322  
   323  		i++
   324  		pos += treeSize
   325  	}
   326  	// wait for all the children to complete calculating their hashes and copying them onto sections of the chunk
   327  	// parentWg.Add(1)
   328  	// go func() {
   329  	childrenWg.Wait()
   330  
   331  	worker := tc.getWorkerCount()
   332  	if int64(len(tc.jobC)) > worker && worker < ChunkProcessors {
   333  		tc.runWorker()
   334  
   335  	}
   336  	select {
   337  	case tc.jobC <- &hashJob{addr, chunk, size, parentWg}:
   338  	case <-tc.quitC:
   339  	}
   340  }
   341  
   342  func (tc *TreeChunker) runWorker() {
   343  	tc.incrementWorkerCount()
   344  	go func() {
   345  		defer tc.decrementWorkerCount()
   346  		for {
   347  			select {
   348  
   349  			case job, ok := <-tc.jobC:
   350  				if !ok {
   351  					return
   352  				}
   353  
   354  				h, err := tc.putter.Put(job.chunk)
   355  				if err != nil {
   356  					tc.errC <- err
   357  					return
   358  				}
   359  				copy(job.key, h)
   360  				job.parentWg.Done()
   361  			case <-tc.quitC:
   362  				return
   363  			}
   364  		}
   365  	}()
   366  }
   367  
   368  func (tc *TreeChunker) Append() (Address, func(), error) {
   369  	return nil, nil, errAppendOppNotSuported
   370  }
   371  
   372  // LazyChunkReader implements LazySectionReader
   373  type LazyChunkReader struct {
   374  	key       Address // root key
   375  	chunkData ChunkData
   376  	off       int64 // offset
   377  	chunkSize int64 // inherit from chunker
   378  	branches  int64 // inherit from chunker
   379  	hashSize  int64 // inherit from chunker
   380  	depth     int
   381  	getter    Getter
   382  }
   383  
   384  func (tc *TreeChunker) Join(ctx context.Context) *LazyChunkReader {
   385  	return &LazyChunkReader{
   386  		key:       tc.addr,
   387  		chunkSize: tc.chunkSize,
   388  		branches:  tc.branches,
   389  		hashSize:  tc.hashSize,
   390  		depth:     tc.depth,
   391  		getter:    tc.getter,
   392  	}
   393  }
   394  
   395  // Size is meant to be called on the LazySectionReader
   396  func (r *LazyChunkReader) Size(quitC chan bool) (n int64, err error) {
   397  	metrics.GetOrRegisterCounter("lazychunkreader.size", nil).Inc(1)
   398  
   399  	log.Debug("lazychunkreader.size", "key", r.key)
   400  	if r.chunkData == nil {
   401  		chunkData, err := r.getter.Get(Reference(r.key))
   402  		if err != nil {
   403  			return 0, err
   404  		}
   405  		if chunkData == nil {
   406  			select {
   407  			case <-quitC:
   408  				return 0, errors.New("aborted")
   409  			default:
   410  				return 0, fmt.Errorf("root chunk not found for %v", r.key.Hex())
   411  			}
   412  		}
   413  		r.chunkData = chunkData
   414  	}
   415  	return r.chunkData.Size(), nil
   416  }
   417  
   418  // read at can be called numerous times
   419  // concurrent reads are allowed
   420  // Size() needs to be called synchronously on the LazyChunkReader first
   421  func (r *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) {
   422  	metrics.GetOrRegisterCounter("lazychunkreader.readat", nil).Inc(1)
   423  
   424  	// this is correct, a swarm doc cannot be zero length, so no EOF is expected
   425  	if len(b) == 0 {
   426  		return 0, nil
   427  	}
   428  	quitC := make(chan bool)
   429  	size, err := r.Size(quitC)
   430  	if err != nil {
   431  		log.Error("lazychunkreader.readat.size", "size", size, "err", err)
   432  		return 0, err
   433  	}
   434  
   435  	errC := make(chan error)
   436  
   437  	// }
   438  	var treeSize int64
   439  	var depth int
   440  	// calculate depth and max treeSize
   441  	treeSize = r.chunkSize
   442  	for ; treeSize < size; treeSize *= r.branches {
   443  		depth++
   444  	}
   445  	wg := sync.WaitGroup{}
   446  	length := int64(len(b))
   447  	for d := 0; d < r.depth; d++ {
   448  		off *= r.chunkSize
   449  		length *= r.chunkSize
   450  	}
   451  	wg.Add(1)
   452  	go r.join(b, off, off+length, depth, treeSize/r.branches, r.chunkData, &wg, errC, quitC)
   453  	go func() {
   454  		wg.Wait()
   455  		close(errC)
   456  	}()
   457  
   458  	err = <-errC
   459  	if err != nil {
   460  		log.Error("lazychunkreader.readat.errc", "err", err)
   461  		close(quitC)
   462  		return 0, err
   463  	}
   464  	if off+int64(len(b)) >= size {
   465  		return int(size - off), io.EOF
   466  	}
   467  	return len(b), nil
   468  }
   469  
   470  func (r *LazyChunkReader) join(b []byte, off int64, eoff int64, depth int, treeSize int64, chunkData ChunkData, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) {
   471  	defer parentWg.Done()
   472  	// find appropriate block level
   473  	for chunkData.Size() < treeSize && depth > r.depth {
   474  		treeSize /= r.branches
   475  		depth--
   476  	}
   477  
   478  	// leaf chunk found
   479  	if depth == r.depth {
   480  		extra := 8 + eoff - int64(len(chunkData))
   481  		if extra > 0 {
   482  			eoff -= extra
   483  		}
   484  		copy(b, chunkData[8+off:8+eoff])
   485  		return // simply give back the chunks reader for content chunks
   486  	}
   487  
   488  	// subtree
   489  	start := off / treeSize
   490  	end := (eoff + treeSize - 1) / treeSize
   491  
   492  	// last non-leaf chunk can be shorter than default chunk size, let's not read it further then its end
   493  	currentBranches := int64(len(chunkData)-8) / r.hashSize
   494  	if end > currentBranches {
   495  		end = currentBranches
   496  	}
   497  
   498  	wg := &sync.WaitGroup{}
   499  	defer wg.Wait()
   500  	for i := start; i < end; i++ {
   501  		soff := i * treeSize
   502  		roff := soff
   503  		seoff := soff + treeSize
   504  
   505  		if soff < off {
   506  			soff = off
   507  		}
   508  		if seoff > eoff {
   509  			seoff = eoff
   510  		}
   511  		if depth > 1 {
   512  			wg.Wait()
   513  		}
   514  		wg.Add(1)
   515  		go func(j int64) {
   516  			childKey := chunkData[8+j*r.hashSize : 8+(j+1)*r.hashSize]
   517  			chunkData, err := r.getter.Get(Reference(childKey))
   518  			if err != nil {
   519  				log.Error("lazychunkreader.join", "key", fmt.Sprintf("%x", childKey), "err", err)
   520  				select {
   521  				case errC <- fmt.Errorf("chunk %v-%v not found; key: %s", off, off+treeSize, fmt.Sprintf("%x", childKey)):
   522  				case <-quitC:
   523  				}
   524  				return
   525  			}
   526  			if l := len(chunkData); l < 9 {
   527  				select {
   528  				case errC <- fmt.Errorf("chunk %v-%v incomplete; key: %s, data length %v", off, off+treeSize, fmt.Sprintf("%x", childKey), l):
   529  				case <-quitC:
   530  				}
   531  				return
   532  			}
   533  			if soff < off {
   534  				soff = off
   535  			}
   536  			r.join(b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/r.branches, chunkData, wg, errC, quitC)
   537  		}(i)
   538  	} //for
   539  }
   540  
   541  // Read keeps a cursor so cannot be called simulateously, see ReadAt
   542  func (r *LazyChunkReader) Read(b []byte) (read int, err error) {
   543  	log.Debug("lazychunkreader.read", "key", r.key)
   544  	metrics.GetOrRegisterCounter("lazychunkreader.read", nil).Inc(1)
   545  
   546  	read, err = r.ReadAt(b, r.off)
   547  	if err != nil && err != io.EOF {
   548  		log.Error("lazychunkreader.readat", "read", read, "err", err)
   549  		metrics.GetOrRegisterCounter("lazychunkreader.read.err", nil).Inc(1)
   550  	}
   551  
   552  	metrics.GetOrRegisterCounter("lazychunkreader.read.bytes", nil).Inc(int64(read))
   553  
   554  	r.off += int64(read)
   555  	return
   556  }
   557  
   558  // completely analogous to standard SectionReader implementation
   559  var errWhence = errors.New("Seek: invalid whence")
   560  var errOffset = errors.New("Seek: invalid offset")
   561  
   562  func (r *LazyChunkReader) Seek(offset int64, whence int) (int64, error) {
   563  	log.Debug("lazychunkreader.seek", "key", r.key, "offset", offset)
   564  	switch whence {
   565  	default:
   566  		return 0, errWhence
   567  	case 0:
   568  		offset += 0
   569  	case 1:
   570  		offset += r.off
   571  	case 2:
   572  		if r.chunkData == nil { //seek from the end requires rootchunk for size. call Size first
   573  			_, err := r.Size(nil)
   574  			if err != nil {
   575  				return 0, fmt.Errorf("can't get size: %v", err)
   576  			}
   577  		}
   578  		offset += r.chunkData.Size()
   579  	}
   580  
   581  	if offset < 0 {
   582  		return 0, errOffset
   583  	}
   584  	r.off = offset
   585  	return offset, nil
   586  }