github.com/gobitfly/go-ethereum@v1.8.12/swarm/storage/chunker.go

github.com/gobitfly/go-ethereum@v1.8.12/swarm/storage/chunker.go (about)

     1  // Copyright 2016 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  package storage
    17  
    18  import (
    19  	"encoding/binary"
    20  	"errors"
    21  	"fmt"
    22  	"io"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/ethereum/go-ethereum/metrics"
    27  	"github.com/ethereum/go-ethereum/swarm/log"
    28  )
    29  
    30  /*
    31  The distributed storage implemented in this package requires fix sized chunks of content.
    32  
    33  Chunker is the interface to a component that is responsible for disassembling and assembling larger data.
    34  
    35  TreeChunker implements a Chunker based on a tree structure defined as follows:
    36  
    37  1 each node in the tree including the root and other branching nodes are stored as a chunk.
    38  
    39  2 branching nodes encode data contents that includes the size of the dataslice covered by its entire subtree under the node as well as the hash keys of all its children :
    40  data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1}
    41  
    42  3 Leaf nodes encode an actual subslice of the input data.
    43  
    44  4 if data size is not more than maximum chunksize, the data is stored in a single chunk
    45    key = hash(int64(size) + data)
    46  
    47  5 if data size is more than chunksize*branches^l, but no more than chunksize*
    48    branches^(l+1), the data vector is split into slices of chunksize*
    49    branches^l length (except the last one).
    50    key = hash(int64(size) + key(slice0) + key(slice1) + ...)
    51  
    52   The underlying hash function is configurable
    53  */
    54  
    55  /*
    56  Tree chunker is a concrete implementation of data chunking.
    57  This chunker works in a simple way, it builds a tree out of the document so that each node either represents a chunk of real data or a chunk of data representing an branching non-leaf node of the tree. In particular each such non-leaf chunk will represent is a concatenation of the hash of its respective children. This scheme simultaneously guarantees data integrity as well as self addressing. Abstract nodes are transparent since their represented size component is strictly greater than their maximum data size, since they encode a subtree.
    58  
    59  If all is well it is possible to implement this by simply composing readers so that no extra allocation or buffering is necessary for the data splitting and joining. This means that in principle there can be direct IO between : memory, file system, network socket (bzz peers storage request is read from the socket). In practice there may be need for several stages of internal buffering.
    60  The hashing itself does use extra copies and allocation though, since it does need it.
    61  */
    62  
    63  var (
    64  	errAppendOppNotSuported = errors.New("Append operation not supported")
    65  	errOperationTimedOut    = errors.New("operation timed out")
    66  )
    67  
    68  const (
    69  	DefaultChunkSize int64 = 4096
    70  )
    71  
    72  type ChunkerParams struct {
    73  	chunkSize int64
    74  	hashSize  int64
    75  }
    76  
    77  type SplitterParams struct {
    78  	ChunkerParams
    79  	reader io.Reader
    80  	putter Putter
    81  	addr   Address
    82  }
    83  
    84  type TreeSplitterParams struct {
    85  	SplitterParams
    86  	size int64
    87  }
    88  
    89  type JoinerParams struct {
    90  	ChunkerParams
    91  	addr   Address
    92  	getter Getter
    93  	// TODO: there is a bug, so depth can only be 0 today, see: https://github.com/ethersphere/go-ethereum/issues/344
    94  	depth int
    95  }
    96  
    97  type TreeChunker struct {
    98  	branches int64
    99  	hashFunc SwarmHasher
   100  	dataSize int64
   101  	data     io.Reader
   102  	// calculated
   103  	addr        Address
   104  	depth       int
   105  	hashSize    int64        // self.hashFunc.New().Size()
   106  	chunkSize   int64        // hashSize* branches
   107  	workerCount int64        // the number of worker routines used
   108  	workerLock  sync.RWMutex // lock for the worker count
   109  	jobC        chan *hashJob
   110  	wg          *sync.WaitGroup
   111  	putter      Putter
   112  	getter      Getter
   113  	errC        chan error
   114  	quitC       chan bool
   115  }
   116  
   117  /*
   118  	Join reconstructs original content based on a root key.
   119  	When joining, the caller gets returned a Lazy SectionReader, which is
   120  	seekable and implements on-demand fetching of chunks as and where it is read.
   121  	New chunks to retrieve are coming from the getter, which the caller provides.
   122  	If an error is encountered during joining, it appears as a reader error.
   123  	The SectionReader.
   124  	As a result, partial reads from a document are possible even if other parts
   125  	are corrupt or lost.
   126  	The chunks are not meant to be validated by the chunker when joining. This
   127  	is because it is left to the DPA to decide which sources are trusted.
   128  */
   129  func TreeJoin(addr Address, getter Getter, depth int) *LazyChunkReader {
   130  	jp := &JoinerParams{
   131  		ChunkerParams: ChunkerParams{
   132  			chunkSize: DefaultChunkSize,
   133  			hashSize:  int64(len(addr)),
   134  		},
   135  		addr:   addr,
   136  		getter: getter,
   137  		depth:  depth,
   138  	}
   139  
   140  	return NewTreeJoiner(jp).Join()
   141  }
   142  
   143  /*
   144  	When splitting, data is given as a SectionReader, and the key is a hashSize long byte slice (Key), the root hash of the entire content will fill this once processing finishes.
   145  	New chunks to store are store using the putter which the caller provides.
   146  */
   147  func TreeSplit(data io.Reader, size int64, putter Putter) (k Address, wait func(), err error) {
   148  	tsp := &TreeSplitterParams{
   149  		SplitterParams: SplitterParams{
   150  			ChunkerParams: ChunkerParams{
   151  				chunkSize: DefaultChunkSize,
   152  				hashSize:  putter.RefSize(),
   153  			},
   154  			reader: data,
   155  			putter: putter,
   156  		},
   157  		size: size,
   158  	}
   159  	return NewTreeSplitter(tsp).Split()
   160  }
   161  
   162  func NewTreeJoiner(params *JoinerParams) *TreeChunker {
   163  	tc := &TreeChunker{}
   164  	tc.hashSize = params.hashSize
   165  	tc.branches = params.chunkSize / params.hashSize
   166  	tc.addr = params.addr
   167  	tc.getter = params.getter
   168  	tc.depth = params.depth
   169  	tc.chunkSize = params.chunkSize
   170  	tc.workerCount = 0
   171  	tc.jobC = make(chan *hashJob, 2*ChunkProcessors)
   172  	tc.wg = &sync.WaitGroup{}
   173  	tc.errC = make(chan error)
   174  	tc.quitC = make(chan bool)
   175  
   176  	return tc
   177  }
   178  
   179  func NewTreeSplitter(params *TreeSplitterParams) *TreeChunker {
   180  	tc := &TreeChunker{}
   181  	tc.data = params.reader
   182  	tc.dataSize = params.size
   183  	tc.hashSize = params.hashSize
   184  	tc.branches = params.chunkSize / params.hashSize
   185  	tc.addr = params.addr
   186  	tc.chunkSize = params.chunkSize
   187  	tc.putter = params.putter
   188  	tc.workerCount = 0
   189  	tc.jobC = make(chan *hashJob, 2*ChunkProcessors)
   190  	tc.wg = &sync.WaitGroup{}
   191  	tc.errC = make(chan error)
   192  	tc.quitC = make(chan bool)
   193  
   194  	return tc
   195  }
   196  
   197  // String() for pretty printing
   198  func (c *Chunk) String() string {
   199  	return fmt.Sprintf("Key: %v TreeSize: %v Chunksize: %v", c.Addr.Log(), c.Size, len(c.SData))
   200  }
   201  
   202  type hashJob struct {
   203  	key      Address
   204  	chunk    []byte
   205  	size     int64
   206  	parentWg *sync.WaitGroup
   207  }
   208  
   209  func (tc *TreeChunker) incrementWorkerCount() {
   210  	tc.workerLock.Lock()
   211  	defer tc.workerLock.Unlock()
   212  	tc.workerCount += 1
   213  }
   214  
   215  func (tc *TreeChunker) getWorkerCount() int64 {
   216  	tc.workerLock.RLock()
   217  	defer tc.workerLock.RUnlock()
   218  	return tc.workerCount
   219  }
   220  
   221  func (tc *TreeChunker) decrementWorkerCount() {
   222  	tc.workerLock.Lock()
   223  	defer tc.workerLock.Unlock()
   224  	tc.workerCount -= 1
   225  }
   226  
   227  func (tc *TreeChunker) Split() (k Address, wait func(), err error) {
   228  	if tc.chunkSize <= 0 {
   229  		panic("chunker must be initialised")
   230  	}
   231  
   232  	tc.runWorker()
   233  
   234  	depth := 0
   235  	treeSize := tc.chunkSize
   236  
   237  	// takes lowest depth such that chunksize*HashCount^(depth+1) > size
   238  	// power series, will find the order of magnitude of the data size in base hashCount or numbers of levels of branching in the resulting tree.
   239  	for ; treeSize < tc.dataSize; treeSize *= tc.branches {
   240  		depth++
   241  	}
   242  
   243  	key := make([]byte, tc.hashSize)
   244  	// this waitgroup member is released after the root hash is calculated
   245  	tc.wg.Add(1)
   246  	//launch actual recursive function passing the waitgroups
   247  	go tc.split(depth, treeSize/tc.branches, key, tc.dataSize, tc.wg)
   248  
   249  	// closes internal error channel if all subprocesses in the workgroup finished
   250  	go func() {
   251  		// waiting for all threads to finish
   252  		tc.wg.Wait()
   253  		close(tc.errC)
   254  	}()
   255  
   256  	defer close(tc.quitC)
   257  	defer tc.putter.Close()
   258  	select {
   259  	case err := <-tc.errC:
   260  		if err != nil {
   261  			return nil, nil, err
   262  		}
   263  	case <-time.NewTimer(splitTimeout).C:
   264  		return nil, nil, errOperationTimedOut
   265  	}
   266  
   267  	return key, tc.putter.Wait, nil
   268  }
   269  
   270  func (tc *TreeChunker) split(depth int, treeSize int64, addr Address, size int64, parentWg *sync.WaitGroup) {
   271  
   272  	//
   273  
   274  	for depth > 0 && size < treeSize {
   275  		treeSize /= tc.branches
   276  		depth--
   277  	}
   278  
   279  	if depth == 0 {
   280  		// leaf nodes -> content chunks
   281  		chunkData := make([]byte, size+8)
   282  		binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size))
   283  		var readBytes int64
   284  		for readBytes < size {
   285  			n, err := tc.data.Read(chunkData[8+readBytes:])
   286  			readBytes += int64(n)
   287  			if err != nil && !(err == io.EOF && readBytes == size) {
   288  				tc.errC <- err
   289  				return
   290  			}
   291  		}
   292  		select {
   293  		case tc.jobC <- &hashJob{addr, chunkData, size, parentWg}:
   294  		case <-tc.quitC:
   295  		}
   296  		return
   297  	}
   298  	// dept > 0
   299  	// intermediate chunk containing child nodes hashes
   300  	branchCnt := (size + treeSize - 1) / treeSize
   301  
   302  	var chunk = make([]byte, branchCnt*tc.hashSize+8)
   303  	var pos, i int64
   304  
   305  	binary.LittleEndian.PutUint64(chunk[0:8], uint64(size))
   306  
   307  	childrenWg := &sync.WaitGroup{}
   308  	var secSize int64
   309  	for i < branchCnt {
   310  		// the last item can have shorter data
   311  		if size-pos < treeSize {
   312  			secSize = size - pos
   313  		} else {
   314  			secSize = treeSize
   315  		}
   316  		// the hash of that data
   317  		subTreeKey := chunk[8+i*tc.hashSize : 8+(i+1)*tc.hashSize]
   318  
   319  		childrenWg.Add(1)
   320  		tc.split(depth-1, treeSize/tc.branches, subTreeKey, secSize, childrenWg)
   321  
   322  		i++
   323  		pos += treeSize
   324  	}
   325  	// wait for all the children to complete calculating their hashes and copying them onto sections of the chunk
   326  	// parentWg.Add(1)
   327  	// go func() {
   328  	childrenWg.Wait()
   329  
   330  	worker := tc.getWorkerCount()
   331  	if int64(len(tc.jobC)) > worker && worker < ChunkProcessors {
   332  		tc.runWorker()
   333  
   334  	}
   335  	select {
   336  	case tc.jobC <- &hashJob{addr, chunk, size, parentWg}:
   337  	case <-tc.quitC:
   338  	}
   339  }
   340  
   341  func (tc *TreeChunker) runWorker() {
   342  	tc.incrementWorkerCount()
   343  	go func() {
   344  		defer tc.decrementWorkerCount()
   345  		for {
   346  			select {
   347  
   348  			case job, ok := <-tc.jobC:
   349  				if !ok {
   350  					return
   351  				}
   352  
   353  				h, err := tc.putter.Put(job.chunk)
   354  				if err != nil {
   355  					tc.errC <- err
   356  					return
   357  				}
   358  				copy(job.key, h)
   359  				job.parentWg.Done()
   360  			case <-tc.quitC:
   361  				return
   362  			}
   363  		}
   364  	}()
   365  }
   366  
   367  func (tc *TreeChunker) Append() (Address, func(), error) {
   368  	return nil, nil, errAppendOppNotSuported
   369  }
   370  
   371  // LazyChunkReader implements LazySectionReader
   372  type LazyChunkReader struct {
   373  	key       Address // root key
   374  	chunkData ChunkData
   375  	off       int64 // offset
   376  	chunkSize int64 // inherit from chunker
   377  	branches  int64 // inherit from chunker
   378  	hashSize  int64 // inherit from chunker
   379  	depth     int
   380  	getter    Getter
   381  }
   382  
   383  func (tc *TreeChunker) Join() *LazyChunkReader {
   384  	return &LazyChunkReader{
   385  		key:       tc.addr,
   386  		chunkSize: tc.chunkSize,
   387  		branches:  tc.branches,
   388  		hashSize:  tc.hashSize,
   389  		depth:     tc.depth,
   390  		getter:    tc.getter,
   391  	}
   392  }
   393  
   394  // Size is meant to be called on the LazySectionReader
   395  func (r *LazyChunkReader) Size(quitC chan bool) (n int64, err error) {
   396  	metrics.GetOrRegisterCounter("lazychunkreader.size", nil).Inc(1)
   397  
   398  	log.Debug("lazychunkreader.size", "key", r.key)
   399  	if r.chunkData == nil {
   400  		chunkData, err := r.getter.Get(Reference(r.key))
   401  		if err != nil {
   402  			return 0, err
   403  		}
   404  		if chunkData == nil {
   405  			select {
   406  			case <-quitC:
   407  				return 0, errors.New("aborted")
   408  			default:
   409  				return 0, fmt.Errorf("root chunk not found for %v", r.key.Hex())
   410  			}
   411  		}
   412  		r.chunkData = chunkData
   413  	}
   414  	return r.chunkData.Size(), nil
   415  }
   416  
   417  // read at can be called numerous times
   418  // concurrent reads are allowed
   419  // Size() needs to be called synchronously on the LazyChunkReader first
   420  func (r *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) {
   421  	metrics.GetOrRegisterCounter("lazychunkreader.readat", nil).Inc(1)
   422  
   423  	// this is correct, a swarm doc cannot be zero length, so no EOF is expected
   424  	if len(b) == 0 {
   425  		return 0, nil
   426  	}
   427  	quitC := make(chan bool)
   428  	size, err := r.Size(quitC)
   429  	if err != nil {
   430  		log.Error("lazychunkreader.readat.size", "size", size, "err", err)
   431  		return 0, err
   432  	}
   433  
   434  	errC := make(chan error)
   435  
   436  	// }
   437  	var treeSize int64
   438  	var depth int
   439  	// calculate depth and max treeSize
   440  	treeSize = r.chunkSize
   441  	for ; treeSize < size; treeSize *= r.branches {
   442  		depth++
   443  	}
   444  	wg := sync.WaitGroup{}
   445  	length := int64(len(b))
   446  	for d := 0; d < r.depth; d++ {
   447  		off *= r.chunkSize
   448  		length *= r.chunkSize
   449  	}
   450  	wg.Add(1)
   451  	go r.join(b, off, off+length, depth, treeSize/r.branches, r.chunkData, &wg, errC, quitC)
   452  	go func() {
   453  		wg.Wait()
   454  		close(errC)
   455  	}()
   456  
   457  	err = <-errC
   458  	if err != nil {
   459  		log.Error("lazychunkreader.readat.errc", "err", err)
   460  		close(quitC)
   461  		return 0, err
   462  	}
   463  	if off+int64(len(b)) >= size {
   464  		return int(size - off), io.EOF
   465  	}
   466  	return len(b), nil
   467  }
   468  
   469  func (r *LazyChunkReader) join(b []byte, off int64, eoff int64, depth int, treeSize int64, chunkData ChunkData, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) {
   470  	defer parentWg.Done()
   471  	// find appropriate block level
   472  	for chunkData.Size() < treeSize && depth > r.depth {
   473  		treeSize /= r.branches
   474  		depth--
   475  	}
   476  
   477  	// leaf chunk found
   478  	if depth == r.depth {
   479  		extra := 8 + eoff - int64(len(chunkData))
   480  		if extra > 0 {
   481  			eoff -= extra
   482  		}
   483  		copy(b, chunkData[8+off:8+eoff])
   484  		return // simply give back the chunks reader for content chunks
   485  	}
   486  
   487  	// subtree
   488  	start := off / treeSize
   489  	end := (eoff + treeSize - 1) / treeSize
   490  
   491  	// last non-leaf chunk can be shorter than default chunk size, let's not read it further then its end
   492  	currentBranches := int64(len(chunkData)-8) / r.hashSize
   493  	if end > currentBranches {
   494  		end = currentBranches
   495  	}
   496  
   497  	wg := &sync.WaitGroup{}
   498  	defer wg.Wait()
   499  	for i := start; i < end; i++ {
   500  		soff := i * treeSize
   501  		roff := soff
   502  		seoff := soff + treeSize
   503  
   504  		if soff < off {
   505  			soff = off
   506  		}
   507  		if seoff > eoff {
   508  			seoff = eoff
   509  		}
   510  		if depth > 1 {
   511  			wg.Wait()
   512  		}
   513  		wg.Add(1)
   514  		go func(j int64) {
   515  			childKey := chunkData[8+j*r.hashSize : 8+(j+1)*r.hashSize]
   516  			chunkData, err := r.getter.Get(Reference(childKey))
   517  			if err != nil {
   518  				log.Error("lazychunkreader.join", "key", fmt.Sprintf("%x", childKey), "err", err)
   519  				select {
   520  				case errC <- fmt.Errorf("chunk %v-%v not found; key: %s", off, off+treeSize, fmt.Sprintf("%x", childKey)):
   521  				case <-quitC:
   522  				}
   523  				return
   524  			}
   525  			if l := len(chunkData); l < 9 {
   526  				select {
   527  				case errC <- fmt.Errorf("chunk %v-%v incomplete; key: %s, data length %v", off, off+treeSize, fmt.Sprintf("%x", childKey), l):
   528  				case <-quitC:
   529  				}
   530  				return
   531  			}
   532  			if soff < off {
   533  				soff = off
   534  			}
   535  			r.join(b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/r.branches, chunkData, wg, errC, quitC)
   536  		}(i)
   537  	} //for
   538  }
   539  
   540  // Read keeps a cursor so cannot be called simulateously, see ReadAt
   541  func (r *LazyChunkReader) Read(b []byte) (read int, err error) {
   542  	log.Debug("lazychunkreader.read", "key", r.key)
   543  	metrics.GetOrRegisterCounter("lazychunkreader.read", nil).Inc(1)
   544  
   545  	read, err = r.ReadAt(b, r.off)
   546  	if err != nil && err != io.EOF {
   547  		log.Error("lazychunkreader.readat", "read", read, "err", err)
   548  		metrics.GetOrRegisterCounter("lazychunkreader.read.err", nil).Inc(1)
   549  	}
   550  
   551  	metrics.GetOrRegisterCounter("lazychunkreader.read.bytes", nil).Inc(int64(read))
   552  
   553  	r.off += int64(read)
   554  	return
   555  }
   556  
   557  // completely analogous to standard SectionReader implementation
   558  var errWhence = errors.New("Seek: invalid whence")
   559  var errOffset = errors.New("Seek: invalid offset")
   560  
   561  func (r *LazyChunkReader) Seek(offset int64, whence int) (int64, error) {
   562  	log.Debug("lazychunkreader.seek", "key", r.key, "offset", offset)
   563  	switch whence {
   564  	default:
   565  		return 0, errWhence
   566  	case 0:
   567  		offset += 0
   568  	case 1:
   569  		offset += r.off
   570  	case 2:
   571  		if r.chunkData == nil { //seek from the end requires rootchunk for size. call Size first
   572  			_, err := r.Size(nil)
   573  			if err != nil {
   574  				return 0, fmt.Errorf("can't get size: %v", err)
   575  			}
   576  		}
   577  		offset += r.chunkData.Size()
   578  	}
   579  
   580  	if offset < 0 {
   581  		return 0, errOffset
   582  	}
   583  	r.off = offset
   584  	return offset, nil
   585  }