github.com/n1ghtfa1l/go-vnt@v0.6.4-alpha.6/swarm/storage/chunker.go (about)

     1  // Copyright 2016 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package storage
    18  
    19  import (
    20  	"encoding/binary"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"sync"
    25  	"time"
    26  
    27  	"github.com/vntchain/go-vnt/metrics"
    28  )
    29  
    30  /*
    31  The distributed storage implemented in this package requires fix sized chunks of content.
    32  
    33  Chunker is the interface to a component that is responsible for disassembling and assembling larger data.
    34  
    35  TreeChunker implements a Chunker based on a tree structure defined as follows:
    36  
    37  1 each node in the tree including the root and other branching nodes are stored as a chunk.
    38  
    39  2 branching nodes encode data contents that includes the size of the dataslice covered by its entire subtree under the node as well as the hash keys of all its children :
    40  data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1}
    41  
    42  3 Leaf nodes encode an actual subslice of the input data.
    43  
    44  4 if data size is not more than maximum chunksize, the data is stored in a single chunk
    45    key = hash(int64(size) + data)
    46  
    47  5 if data size is more than chunksize*branches^l, but no more than chunksize*
    48    branches^(l+1), the data vector is split into slices of chunksize*
    49    branches^l length (except the last one).
    50    key = hash(int64(size) + key(slice0) + key(slice1) + ...)
    51  
    52   The underlying hash function is configurable
    53  */
    54  
    55  /*
    56  Tree chunker is a concrete implementation of data chunking.
    57  This chunker works in a simple way, it builds a tree out of the document so that each node either represents a chunk of real data or a chunk of data representing an branching non-leaf node of the tree. In particular each such non-leaf chunk will represent is a concatenation of the hash of its respective children. This scheme simultaneously guarantees data integrity as well as self addressing. Abstract nodes are transparent since their represented size component is strictly greater than their maximum data size, since they encode a subtree.
    58  
    59  If all is well it is possible to implement this by simply composing readers so that no extra allocation or buffering is necessary for the data splitting and joining. This means that in principle there can be direct IO between : memory, file system, network socket (bzz peers storage request is read from the socket). In practice there may be need for several stages of internal buffering.
    60  The hashing itself does use extra copies and allocation though, since it does need it.
    61  */
    62  
    63  var (
    64  	errAppendOppNotSuported = errors.New("Append operation not supported")
    65  	errOperationTimedOut    = errors.New("operation timed out")
    66  )
    67  
    68  //metrics variables
    69  var (
    70  	newChunkCounter = metrics.NewRegisteredCounter("storage.chunks.new", nil)
    71  )
    72  
    73  type TreeChunker struct {
    74  	branches int64
    75  	hashFunc SwarmHasher
    76  	// calculated
    77  	hashSize    int64        // self.hashFunc.New().Size()
    78  	chunkSize   int64        // hashSize* branches
    79  	workerCount int64        // the number of worker routines used
    80  	workerLock  sync.RWMutex // lock for the worker count
    81  }
    82  
    83  func NewTreeChunker(params *ChunkerParams) (self *TreeChunker) {
    84  	self = &TreeChunker{}
    85  	self.hashFunc = MakeHashFunc(params.Hash)
    86  	self.branches = params.Branches
    87  	self.hashSize = int64(self.hashFunc().Size())
    88  	self.chunkSize = self.hashSize * self.branches
    89  	self.workerCount = 0
    90  
    91  	return
    92  }
    93  
    94  // func (self *TreeChunker) KeySize() int64 {
    95  // 	return self.hashSize
    96  // }
    97  
    98  // String() for pretty printing
    99  func (self *Chunk) String() string {
   100  	return fmt.Sprintf("Key: %v TreeSize: %v Chunksize: %v", self.Key.Log(), self.Size, len(self.SData))
   101  }
   102  
   103  type hashJob struct {
   104  	key      Key
   105  	chunk    []byte
   106  	size     int64
   107  	parentWg *sync.WaitGroup
   108  }
   109  
   110  func (self *TreeChunker) incrementWorkerCount() {
   111  	self.workerLock.Lock()
   112  	defer self.workerLock.Unlock()
   113  	self.workerCount += 1
   114  }
   115  
   116  func (self *TreeChunker) getWorkerCount() int64 {
   117  	self.workerLock.RLock()
   118  	defer self.workerLock.RUnlock()
   119  	return self.workerCount
   120  }
   121  
   122  func (self *TreeChunker) decrementWorkerCount() {
   123  	self.workerLock.Lock()
   124  	defer self.workerLock.Unlock()
   125  	self.workerCount -= 1
   126  }
   127  
   128  func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) {
   129  	if self.chunkSize <= 0 {
   130  		panic("chunker must be initialised")
   131  	}
   132  
   133  	jobC := make(chan *hashJob, 2*ChunkProcessors)
   134  	wg := &sync.WaitGroup{}
   135  	errC := make(chan error)
   136  	quitC := make(chan bool)
   137  
   138  	// wwg = workers waitgroup keeps track of hashworkers spawned by this split call
   139  	if wwg != nil {
   140  		wwg.Add(1)
   141  	}
   142  
   143  	self.incrementWorkerCount()
   144  	go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg)
   145  
   146  	depth := 0
   147  	treeSize := self.chunkSize
   148  
   149  	// takes lowest depth such that chunksize*HashCount^(depth+1) > size
   150  	// power series, will find the order of magnitude of the data size in base hashCount or numbers of levels of branching in the resulting tree.
   151  	for ; treeSize < size; treeSize *= self.branches {
   152  		depth++
   153  	}
   154  
   155  	key := make([]byte, self.hashFunc().Size())
   156  	// this waitgroup member is released after the root hash is calculated
   157  	wg.Add(1)
   158  	//launch actual recursive function passing the waitgroups
   159  	go self.split(depth, treeSize/self.branches, key, data, size, jobC, chunkC, errC, quitC, wg, swg, wwg)
   160  
   161  	// closes internal error channel if all subprocesses in the workgroup finished
   162  	go func() {
   163  		// waiting for all threads to finish
   164  		wg.Wait()
   165  		// if storage waitgroup is non-nil, we wait for storage to finish too
   166  		if swg != nil {
   167  			swg.Wait()
   168  		}
   169  		close(errC)
   170  	}()
   171  
   172  	defer close(quitC)
   173  	select {
   174  	case err := <-errC:
   175  		if err != nil {
   176  			return nil, err
   177  		}
   178  	case <-time.NewTimer(splitTimeout).C:
   179  		return nil, errOperationTimedOut
   180  	}
   181  
   182  	return key, nil
   183  }
   184  
   185  func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reader, size int64, jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, parentWg, swg, wwg *sync.WaitGroup) {
   186  
   187  	//
   188  
   189  	for depth > 0 && size < treeSize {
   190  		treeSize /= self.branches
   191  		depth--
   192  	}
   193  
   194  	if depth == 0 {
   195  		// leaf nodes -> content chunks
   196  		chunkData := make([]byte, size+8)
   197  		binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size))
   198  		var readBytes int64
   199  		for readBytes < size {
   200  			n, err := data.Read(chunkData[8+readBytes:])
   201  			readBytes += int64(n)
   202  			if err != nil && !(err == io.EOF && readBytes == size) {
   203  				errC <- err
   204  				return
   205  			}
   206  		}
   207  		select {
   208  		case jobC <- &hashJob{key, chunkData, size, parentWg}:
   209  		case <-quitC:
   210  		}
   211  		return
   212  	}
   213  	// dept > 0
   214  	// intermediate chunk containing child nodes hashes
   215  	branchCnt := (size + treeSize - 1) / treeSize
   216  
   217  	var chunk = make([]byte, branchCnt*self.hashSize+8)
   218  	var pos, i int64
   219  
   220  	binary.LittleEndian.PutUint64(chunk[0:8], uint64(size))
   221  
   222  	childrenWg := &sync.WaitGroup{}
   223  	var secSize int64
   224  	for i < branchCnt {
   225  		// the last item can have shorter data
   226  		if size-pos < treeSize {
   227  			secSize = size - pos
   228  		} else {
   229  			secSize = treeSize
   230  		}
   231  		// the hash of that data
   232  		subTreeKey := chunk[8+i*self.hashSize : 8+(i+1)*self.hashSize]
   233  
   234  		childrenWg.Add(1)
   235  		self.split(depth-1, treeSize/self.branches, subTreeKey, data, secSize, jobC, chunkC, errC, quitC, childrenWg, swg, wwg)
   236  
   237  		i++
   238  		pos += treeSize
   239  	}
   240  	// wait for all the children to complete calculating their hashes and copying them onto sections of the chunk
   241  	// parentWg.Add(1)
   242  	// go func() {
   243  	childrenWg.Wait()
   244  
   245  	worker := self.getWorkerCount()
   246  	if int64(len(jobC)) > worker && worker < ChunkProcessors {
   247  		if wwg != nil {
   248  			wwg.Add(1)
   249  		}
   250  		self.incrementWorkerCount()
   251  		go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg)
   252  
   253  	}
   254  	select {
   255  	case jobC <- &hashJob{key, chunk, size, parentWg}:
   256  	case <-quitC:
   257  	}
   258  }
   259  
   260  func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, swg, wwg *sync.WaitGroup) {
   261  	defer self.decrementWorkerCount()
   262  
   263  	hasher := self.hashFunc()
   264  	if wwg != nil {
   265  		defer wwg.Done()
   266  	}
   267  	for {
   268  		select {
   269  
   270  		case job, ok := <-jobC:
   271  			if !ok {
   272  				return
   273  			}
   274  			// now we got the hashes in the chunk, then hash the chunks
   275  			self.hashChunk(hasher, job, chunkC, swg)
   276  		case <-quitC:
   277  			return
   278  		}
   279  	}
   280  }
   281  
   282  // The treeChunkers own Hash hashes together
   283  // - the size (of the subtree encoded in the Chunk)
   284  // - the Chunk, ie. the contents read from the input reader
   285  func (self *TreeChunker) hashChunk(hasher SwarmHash, job *hashJob, chunkC chan *Chunk, swg *sync.WaitGroup) {
   286  	hasher.ResetWithLength(job.chunk[:8]) // 8 bytes of length
   287  	hasher.Write(job.chunk[8:])           // minus 8 []byte length
   288  	h := hasher.Sum(nil)
   289  
   290  	newChunk := &Chunk{
   291  		Key:   h,
   292  		SData: job.chunk,
   293  		Size:  job.size,
   294  		wg:    swg,
   295  	}
   296  
   297  	// report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk)
   298  	copy(job.key, h)
   299  	// send off new chunk to storage
   300  	if chunkC != nil {
   301  		if swg != nil {
   302  			swg.Add(1)
   303  		}
   304  	}
   305  	job.parentWg.Done()
   306  
   307  	if chunkC != nil {
   308  		//NOTE: this increases the chunk count even if the local node already has this chunk;
   309  		//on file upload the node will increase this counter even if the same file has already been uploaded
   310  		//So it should be evaluated whether it is worth keeping this counter
   311  		//and/or actually better track when the chunk is Put to the local database
   312  		//(which may question the need for disambiguation when a completely new chunk has been created
   313  		//and/or a chunk is being put to the local DB; for chunk tracking it may be worth distinguishing
   314  		newChunkCounter.Inc(1)
   315  		chunkC <- newChunk
   316  	}
   317  }
   318  
   319  func (self *TreeChunker) Append(key Key, data io.Reader, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) {
   320  	return nil, errAppendOppNotSuported
   321  }
   322  
   323  // LazyChunkReader implements LazySectionReader
   324  type LazyChunkReader struct {
   325  	key       Key         // root key
   326  	chunkC    chan *Chunk // chunk channel to send retrieve requests on
   327  	chunk     *Chunk      // size of the entire subtree
   328  	off       int64       // offset
   329  	chunkSize int64       // inherit from chunker
   330  	branches  int64       // inherit from chunker
   331  	hashSize  int64       // inherit from chunker
   332  }
   333  
   334  // implements the Joiner interface
   335  func (self *TreeChunker) Join(key Key, chunkC chan *Chunk) LazySectionReader {
   336  	return &LazyChunkReader{
   337  		key:       key,
   338  		chunkC:    chunkC,
   339  		chunkSize: self.chunkSize,
   340  		branches:  self.branches,
   341  		hashSize:  self.hashSize,
   342  	}
   343  }
   344  
   345  // Size is meant to be called on the LazySectionReader
   346  func (self *LazyChunkReader) Size(quitC chan bool) (n int64, err error) {
   347  	if self.chunk != nil {
   348  		return self.chunk.Size, nil
   349  	}
   350  	chunk := retrieve(self.key, self.chunkC, quitC)
   351  	if chunk == nil {
   352  		select {
   353  		case <-quitC:
   354  			return 0, errors.New("aborted")
   355  		default:
   356  			return 0, fmt.Errorf("root chunk not found for %v", self.key.Hex())
   357  		}
   358  	}
   359  	self.chunk = chunk
   360  	return chunk.Size, nil
   361  }
   362  
   363  // read at can be called numerous times
   364  // concurrent reads are allowed
   365  // Size() needs to be called synchronously on the LazyChunkReader first
   366  func (self *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) {
   367  	// this is correct, a swarm doc cannot be zero length, so no EOF is expected
   368  	if len(b) == 0 {
   369  		return 0, nil
   370  	}
   371  	quitC := make(chan bool)
   372  	size, err := self.Size(quitC)
   373  	if err != nil {
   374  		return 0, err
   375  	}
   376  
   377  	errC := make(chan error)
   378  
   379  	// }
   380  	var treeSize int64
   381  	var depth int
   382  	// calculate depth and max treeSize
   383  	treeSize = self.chunkSize
   384  	for ; treeSize < size; treeSize *= self.branches {
   385  		depth++
   386  	}
   387  	wg := sync.WaitGroup{}
   388  	wg.Add(1)
   389  	go self.join(b, off, off+int64(len(b)), depth, treeSize/self.branches, self.chunk, &wg, errC, quitC)
   390  	go func() {
   391  		wg.Wait()
   392  		close(errC)
   393  	}()
   394  
   395  	err = <-errC
   396  	if err != nil {
   397  		close(quitC)
   398  
   399  		return 0, err
   400  	}
   401  	if off+int64(len(b)) >= size {
   402  		return len(b), io.EOF
   403  	}
   404  	return len(b), nil
   405  }
   406  
   407  func (self *LazyChunkReader) join(b []byte, off int64, eoff int64, depth int, treeSize int64, chunk *Chunk, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) {
   408  	defer parentWg.Done()
   409  	// return NewDPA(&LocalStore{})
   410  
   411  	// chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8]))
   412  
   413  	// find appropriate block level
   414  	for chunk.Size < treeSize && depth > 0 {
   415  		treeSize /= self.branches
   416  		depth--
   417  	}
   418  
   419  	// leaf chunk found
   420  	if depth == 0 {
   421  		extra := 8 + eoff - int64(len(chunk.SData))
   422  		if extra > 0 {
   423  			eoff -= extra
   424  		}
   425  		copy(b, chunk.SData[8+off:8+eoff])
   426  		return // simply give back the chunks reader for content chunks
   427  	}
   428  
   429  	// subtree
   430  	start := off / treeSize
   431  	end := (eoff + treeSize - 1) / treeSize
   432  
   433  	wg := &sync.WaitGroup{}
   434  	defer wg.Wait()
   435  
   436  	for i := start; i < end; i++ {
   437  		soff := i * treeSize
   438  		roff := soff
   439  		seoff := soff + treeSize
   440  
   441  		if soff < off {
   442  			soff = off
   443  		}
   444  		if seoff > eoff {
   445  			seoff = eoff
   446  		}
   447  		if depth > 1 {
   448  			wg.Wait()
   449  		}
   450  		wg.Add(1)
   451  		go func(j int64) {
   452  			childKey := chunk.SData[8+j*self.hashSize : 8+(j+1)*self.hashSize]
   453  			chunk := retrieve(childKey, self.chunkC, quitC)
   454  			if chunk == nil {
   455  				select {
   456  				case errC <- fmt.Errorf("chunk %v-%v not found", off, off+treeSize):
   457  				case <-quitC:
   458  				}
   459  				return
   460  			}
   461  			if soff < off {
   462  				soff = off
   463  			}
   464  			self.join(b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/self.branches, chunk, wg, errC, quitC)
   465  		}(i)
   466  	} //for
   467  }
   468  
   469  // the helper method submits chunks for a key to a oueue (DPA) and
   470  // block until they time out or arrive
   471  // abort if quitC is readable
   472  func retrieve(key Key, chunkC chan *Chunk, quitC chan bool) *Chunk {
   473  	chunk := &Chunk{
   474  		Key: key,
   475  		C:   make(chan bool), // close channel to signal data delivery
   476  	}
   477  	// submit chunk for retrieval
   478  	select {
   479  	case chunkC <- chunk: // submit retrieval request, someone should be listening on the other side (or we will time out globally)
   480  	case <-quitC:
   481  		return nil
   482  	}
   483  	// waiting for the chunk retrieval
   484  	select { // chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8]))
   485  
   486  	case <-quitC:
   487  		// this is how we control process leakage (quitC is closed once join is finished (after timeout))
   488  		return nil
   489  	case <-chunk.C: // bells are ringing, data have been delivered
   490  	}
   491  	if len(chunk.SData) == 0 {
   492  		return nil // chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8]))
   493  
   494  	}
   495  	return chunk
   496  }
   497  
   498  // Read keeps a cursor so cannot be called simulateously, see ReadAt
   499  func (self *LazyChunkReader) Read(b []byte) (read int, err error) {
   500  	read, err = self.ReadAt(b, self.off)
   501  
   502  	self.off += int64(read)
   503  	return
   504  }
   505  
   506  // completely analogous to standard SectionReader implementation
   507  var errWhence = errors.New("Seek: invalid whence")
   508  var errOffset = errors.New("Seek: invalid offset")
   509  
   510  func (s *LazyChunkReader) Seek(offset int64, whence int) (int64, error) {
   511  	switch whence {
   512  	default:
   513  		return 0, errWhence
   514  	case 0:
   515  		offset += 0
   516  	case 1:
   517  		offset += s.off
   518  	case 2:
   519  		if s.chunk == nil { //seek from the end requires rootchunk for size. call Size first
   520  			_, err := s.Size(nil)
   521  			if err != nil {
   522  				return 0, fmt.Errorf("can't get size: %v", err)
   523  			}
   524  		}
   525  		offset += s.chunk.Size
   526  	}
   527  
   528  	if offset < 0 {
   529  		return 0, errOffset
   530  	}
   531  	s.off = offset
   532  	return offset, nil
   533  }