github.com/Elemental-core/elementalcore@v0.0.0-20191206075037-63891242267a/swarm/storage/chunker.go (about)

     1  // Copyright 2016 The elementalcore Authors
     2  // This file is part of the elementalcore library.
     3  //
     4  // The elementalcore library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The elementalcore library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the elementalcore library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package storage
    18  
    19  import (
    20  	"encoding/binary"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"sync"
    25  	"time"
    26  )
    27  
    28  /*
    29  The distributed storage implemented in this package requires fix sized chunks of content.
    30  
    31  Chunker is the interface to a component that is responsible for disassembling and assembling larger data.
    32  
    33  TreeChunker implements a Chunker based on a tree structure defined as follows:
    34  
    35  1 each node in the tree including the root and other branching nodes are stored as a chunk.
    36  
    37  2 branching nodes encode data contents that includes the size of the dataslice covered by its entire subtree under the node as well as the hash keys of all its children :
    38  data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1}
    39  
    40  3 Leaf nodes encode an actual subslice of the input data.
    41  
    42  4 if data size is not more than maximum chunksize, the data is stored in a single chunk
    43    key = hash(int64(size) + data)
    44  
    45  5 if data size is more than chunksize*branches^l, but no more than chunksize*
    46    branches^(l+1), the data vector is split into slices of chunksize*
    47    branches^l length (except the last one).
    48    key = hash(int64(size) + key(slice0) + key(slice1) + ...)
    49  
    50   The underlying hash function is configurable
    51  */
    52  
    53  /*
    54  Tree chunker is a concrete implementation of data chunking.
    55  This chunker works in a simple way, it builds a tree out of the document so that each node either represents a chunk of real data or a chunk of data representing an branching non-leaf node of the tree. In particular each such non-leaf chunk will represent is a concatenation of the hash of its respective children. This scheme simultaneously guarantees data integrity as well as self addressing. Abstract nodes are transparent since their represented size component is strictly greater than their maximum data size, since they encode a subtree.
    56  
    57  If all is well it is possible to implement this by simply composing readers so that no extra allocation or buffering is necessary for the data splitting and joining. This means that in principle there can be direct IO between : memory, file system, network socket (bzz peers storage request is read from the socket). In practice there may be need for several stages of internal buffering.
    58  The hashing itself does use extra copies and allocation though, since it does need it.
    59  */
    60  
    61  var (
    62  	errAppendOppNotSuported = errors.New("Append operation not supported")
    63  	errOperationTimedOut    = errors.New("operation timed out")
    64  )
    65  
    66  type TreeChunker struct {
    67  	branches int64
    68  	hashFunc SwarmHasher
    69  	// calculated
    70  	hashSize    int64        // self.hashFunc.New().Size()
    71  	chunkSize   int64        // hashSize* branches
    72  	workerCount int64        // the number of worker routines used
    73  	workerLock  sync.RWMutex // lock for the worker count
    74  }
    75  
    76  func NewTreeChunker(params *ChunkerParams) (self *TreeChunker) {
    77  	self = &TreeChunker{}
    78  	self.hashFunc = MakeHashFunc(params.Hash)
    79  	self.branches = params.Branches
    80  	self.hashSize = int64(self.hashFunc().Size())
    81  	self.chunkSize = self.hashSize * self.branches
    82  	self.workerCount = 0
    83  
    84  	return
    85  }
    86  
    87  // func (self *TreeChunker) KeySize() int64 {
    88  // 	return self.hashSize
    89  // }
    90  
    91  // String() for pretty printing
    92  func (self *Chunk) String() string {
    93  	return fmt.Sprintf("Key: %v TreeSize: %v Chunksize: %v", self.Key.Log(), self.Size, len(self.SData))
    94  }
    95  
    96  type hashJob struct {
    97  	key      Key
    98  	chunk    []byte
    99  	size     int64
   100  	parentWg *sync.WaitGroup
   101  }
   102  
   103  func (self *TreeChunker) incrementWorkerCount() {
   104  	self.workerLock.Lock()
   105  	defer self.workerLock.Unlock()
   106  	self.workerCount += 1
   107  }
   108  
   109  func (self *TreeChunker) getWorkerCount() int64 {
   110  	self.workerLock.RLock()
   111  	defer self.workerLock.RUnlock()
   112  	return self.workerCount
   113  }
   114  
   115  func (self *TreeChunker) decrementWorkerCount() {
   116  	self.workerLock.Lock()
   117  	defer self.workerLock.Unlock()
   118  	self.workerCount -= 1
   119  }
   120  
   121  func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) {
   122  	if self.chunkSize <= 0 {
   123  		panic("chunker must be initialised")
   124  	}
   125  
   126  	jobC := make(chan *hashJob, 2*ChunkProcessors)
   127  	wg := &sync.WaitGroup{}
   128  	errC := make(chan error)
   129  	quitC := make(chan bool)
   130  
   131  	// wwg = workers waitgroup keeps track of hashworkers spawned by this split call
   132  	if wwg != nil {
   133  		wwg.Add(1)
   134  	}
   135  
   136  	self.incrementWorkerCount()
   137  	go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg)
   138  
   139  	depth := 0
   140  	treeSize := self.chunkSize
   141  
   142  	// takes lowest depth such that chunksize*HashCount^(depth+1) > size
   143  	// power series, will find the order of magnitude of the data size in base hashCount or numbers of levels of branching in the resulting tree.
   144  	for ; treeSize < size; treeSize *= self.branches {
   145  		depth++
   146  	}
   147  
   148  	key := make([]byte, self.hashFunc().Size())
   149  	// this waitgroup member is released after the root hash is calculated
   150  	wg.Add(1)
   151  	//launch actual recursive function passing the waitgroups
   152  	go self.split(depth, treeSize/self.branches, key, data, size, jobC, chunkC, errC, quitC, wg, swg, wwg)
   153  
   154  	// closes internal error channel if all subprocesses in the workgroup finished
   155  	go func() {
   156  		// waiting for all threads to finish
   157  		wg.Wait()
   158  		// if storage waitgroup is non-nil, we wait for storage to finish too
   159  		if swg != nil {
   160  			swg.Wait()
   161  		}
   162  		close(errC)
   163  	}()
   164  
   165  	defer close(quitC)
   166  	select {
   167  	case err := <-errC:
   168  		if err != nil {
   169  			return nil, err
   170  		}
   171  	case <-time.NewTimer(splitTimeout).C:
   172  		return nil, errOperationTimedOut
   173  	}
   174  
   175  	return key, nil
   176  }
   177  
   178  func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reader, size int64, jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, parentWg, swg, wwg *sync.WaitGroup) {
   179  
   180  	//
   181  
   182  	for depth > 0 && size < treeSize {
   183  		treeSize /= self.branches
   184  		depth--
   185  	}
   186  
   187  	if depth == 0 {
   188  		// leaf nodes -> content chunks
   189  		chunkData := make([]byte, size+8)
   190  		binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size))
   191  		var readBytes int64
   192  		for readBytes < size {
   193  			n, err := data.Read(chunkData[8+readBytes:])
   194  			readBytes += int64(n)
   195  			if err != nil && !(err == io.EOF && readBytes == size) {
   196  				errC <- err
   197  				return
   198  			}
   199  		}
   200  		select {
   201  		case jobC <- &hashJob{key, chunkData, size, parentWg}:
   202  		case <-quitC:
   203  		}
   204  		return
   205  	}
   206  	// dept > 0
   207  	// intermediate chunk containing child nodes hashes
   208  	branchCnt := (size + treeSize - 1) / treeSize
   209  
   210  	var chunk = make([]byte, branchCnt*self.hashSize+8)
   211  	var pos, i int64
   212  
   213  	binary.LittleEndian.PutUint64(chunk[0:8], uint64(size))
   214  
   215  	childrenWg := &sync.WaitGroup{}
   216  	var secSize int64
   217  	for i < branchCnt {
   218  		// the last item can have shorter data
   219  		if size-pos < treeSize {
   220  			secSize = size - pos
   221  		} else {
   222  			secSize = treeSize
   223  		}
   224  		// the hash of that data
   225  		subTreeKey := chunk[8+i*self.hashSize : 8+(i+1)*self.hashSize]
   226  
   227  		childrenWg.Add(1)
   228  		self.split(depth-1, treeSize/self.branches, subTreeKey, data, secSize, jobC, chunkC, errC, quitC, childrenWg, swg, wwg)
   229  
   230  		i++
   231  		pos += treeSize
   232  	}
   233  	// wait for all the children to complete calculating their hashes and copying them onto sections of the chunk
   234  	// parentWg.Add(1)
   235  	// go func() {
   236  	childrenWg.Wait()
   237  
   238  	worker := self.getWorkerCount()
   239  	if int64(len(jobC)) > worker && worker < ChunkProcessors {
   240  		if wwg != nil {
   241  			wwg.Add(1)
   242  		}
   243  		self.incrementWorkerCount()
   244  		go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg)
   245  
   246  	}
   247  	select {
   248  	case jobC <- &hashJob{key, chunk, size, parentWg}:
   249  	case <-quitC:
   250  	}
   251  }
   252  
   253  func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, swg, wwg *sync.WaitGroup) {
   254  	defer self.decrementWorkerCount()
   255  
   256  	hasher := self.hashFunc()
   257  	if wwg != nil {
   258  		defer wwg.Done()
   259  	}
   260  	for {
   261  		select {
   262  
   263  		case job, ok := <-jobC:
   264  			if !ok {
   265  				return
   266  			}
   267  			// now we got the hashes in the chunk, then hash the chunks
   268  			self.hashChunk(hasher, job, chunkC, swg)
   269  		case <-quitC:
   270  			return
   271  		}
   272  	}
   273  }
   274  
   275  // The treeChunkers own Hash hashes together
   276  // - the size (of the subtree encoded in the Chunk)
   277  // - the Chunk, ie. the contents read from the input reader
   278  func (self *TreeChunker) hashChunk(hasher SwarmHash, job *hashJob, chunkC chan *Chunk, swg *sync.WaitGroup) {
   279  	hasher.ResetWithLength(job.chunk[:8]) // 8 bytes of length
   280  	hasher.Write(job.chunk[8:])           // minus 8 []byte length
   281  	h := hasher.Sum(nil)
   282  
   283  	newChunk := &Chunk{
   284  		Key:   h,
   285  		SData: job.chunk,
   286  		Size:  job.size,
   287  		wg:    swg,
   288  	}
   289  
   290  	// report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk)
   291  	copy(job.key, h)
   292  	// send off new chunk to storage
   293  	if chunkC != nil {
   294  		if swg != nil {
   295  			swg.Add(1)
   296  		}
   297  	}
   298  	job.parentWg.Done()
   299  
   300  	if chunkC != nil {
   301  		chunkC <- newChunk
   302  	}
   303  }
   304  
   305  func (self *TreeChunker) Append(key Key, data io.Reader, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) {
   306  	return nil, errAppendOppNotSuported
   307  }
   308  
   309  // LazyChunkReader implements LazySectionReader
   310  type LazyChunkReader struct {
   311  	key       Key         // root key
   312  	chunkC    chan *Chunk // chunk channel to send retrieve requests on
   313  	chunk     *Chunk      // size of the entire subtree
   314  	off       int64       // offset
   315  	chunkSize int64       // inherit from chunker
   316  	branches  int64       // inherit from chunker
   317  	hashSize  int64       // inherit from chunker
   318  }
   319  
   320  // implements the Joiner interface
   321  func (self *TreeChunker) Join(key Key, chunkC chan *Chunk) LazySectionReader {
   322  	return &LazyChunkReader{
   323  		key:       key,
   324  		chunkC:    chunkC,
   325  		chunkSize: self.chunkSize,
   326  		branches:  self.branches,
   327  		hashSize:  self.hashSize,
   328  	}
   329  }
   330  
   331  // Size is meant to be called on the LazySectionReader
   332  func (self *LazyChunkReader) Size(quitC chan bool) (n int64, err error) {
   333  	if self.chunk != nil {
   334  		return self.chunk.Size, nil
   335  	}
   336  	chunk := retrieve(self.key, self.chunkC, quitC)
   337  	if chunk == nil {
   338  		select {
   339  		case <-quitC:
   340  			return 0, errors.New("aborted")
   341  		default:
   342  			return 0, fmt.Errorf("root chunk not found for %v", self.key.Hex())
   343  		}
   344  	}
   345  	self.chunk = chunk
   346  	return chunk.Size, nil
   347  }
   348  
   349  // read at can be called numerous times
   350  // concurrent reads are allowed
   351  // Size() needs to be called synchronously on the LazyChunkReader first
   352  func (self *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) {
   353  	// this is correct, a swarm doc cannot be zero length, so no EOF is expected
   354  	if len(b) == 0 {
   355  		return 0, nil
   356  	}
   357  	quitC := make(chan bool)
   358  	size, err := self.Size(quitC)
   359  	if err != nil {
   360  		return 0, err
   361  	}
   362  
   363  	errC := make(chan error)
   364  
   365  	// }
   366  	var treeSize int64
   367  	var depth int
   368  	// calculate depth and max treeSize
   369  	treeSize = self.chunkSize
   370  	for ; treeSize < size; treeSize *= self.branches {
   371  		depth++
   372  	}
   373  	wg := sync.WaitGroup{}
   374  	wg.Add(1)
   375  	go self.join(b, off, off+int64(len(b)), depth, treeSize/self.branches, self.chunk, &wg, errC, quitC)
   376  	go func() {
   377  		wg.Wait()
   378  		close(errC)
   379  	}()
   380  
   381  	err = <-errC
   382  	if err != nil {
   383  		close(quitC)
   384  
   385  		return 0, err
   386  	}
   387  	if off+int64(len(b)) >= size {
   388  		return len(b), io.EOF
   389  	}
   390  	return len(b), nil
   391  }
   392  
   393  func (self *LazyChunkReader) join(b []byte, off int64, eoff int64, depth int, treeSize int64, chunk *Chunk, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) {
   394  	defer parentWg.Done()
   395  	// return NewDPA(&LocalStore{})
   396  
   397  	// chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8]))
   398  
   399  	// find appropriate block level
   400  	for chunk.Size < treeSize && depth > 0 {
   401  		treeSize /= self.branches
   402  		depth--
   403  	}
   404  
   405  	// leaf chunk found
   406  	if depth == 0 {
   407  		extra := 8 + eoff - int64(len(chunk.SData))
   408  		if extra > 0 {
   409  			eoff -= extra
   410  		}
   411  		copy(b, chunk.SData[8+off:8+eoff])
   412  		return // simply give back the chunks reader for content chunks
   413  	}
   414  
   415  	// subtree
   416  	start := off / treeSize
   417  	end := (eoff + treeSize - 1) / treeSize
   418  
   419  	wg := &sync.WaitGroup{}
   420  	defer wg.Wait()
   421  
   422  	for i := start; i < end; i++ {
   423  		soff := i * treeSize
   424  		roff := soff
   425  		seoff := soff + treeSize
   426  
   427  		if soff < off {
   428  			soff = off
   429  		}
   430  		if seoff > eoff {
   431  			seoff = eoff
   432  		}
   433  		if depth > 1 {
   434  			wg.Wait()
   435  		}
   436  		wg.Add(1)
   437  		go func(j int64) {
   438  			childKey := chunk.SData[8+j*self.hashSize : 8+(j+1)*self.hashSize]
   439  			chunk := retrieve(childKey, self.chunkC, quitC)
   440  			if chunk == nil {
   441  				select {
   442  				case errC <- fmt.Errorf("chunk %v-%v not found", off, off+treeSize):
   443  				case <-quitC:
   444  				}
   445  				return
   446  			}
   447  			if soff < off {
   448  				soff = off
   449  			}
   450  			self.join(b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/self.branches, chunk, wg, errC, quitC)
   451  		}(i)
   452  	} //for
   453  }
   454  
   455  // the helper method submits chunks for a key to a oueue (DPA) and
   456  // block until they time out or arrive
   457  // abort if quitC is readable
   458  func retrieve(key Key, chunkC chan *Chunk, quitC chan bool) *Chunk {
   459  	chunk := &Chunk{
   460  		Key: key,
   461  		C:   make(chan bool), // close channel to signal data delivery
   462  	}
   463  	// submit chunk for retrieval
   464  	select {
   465  	case chunkC <- chunk: // submit retrieval request, someone should be listening on the other side (or we will time out globally)
   466  	case <-quitC:
   467  		return nil
   468  	}
   469  	// waiting for the chunk retrieval
   470  	select { // chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8]))
   471  
   472  	case <-quitC:
   473  		// this is how we control process leakage (quitC is closed once join is finished (after timeout))
   474  		return nil
   475  	case <-chunk.C: // bells are ringing, data have been delivered
   476  	}
   477  	if len(chunk.SData) == 0 {
   478  		return nil // chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8]))
   479  
   480  	}
   481  	return chunk
   482  }
   483  
   484  // Read keeps a cursor so cannot be called simulateously, see ReadAt
   485  func (self *LazyChunkReader) Read(b []byte) (read int, err error) {
   486  	read, err = self.ReadAt(b, self.off)
   487  
   488  	self.off += int64(read)
   489  	return
   490  }
   491  
   492  // completely analogous to standard SectionReader implementation
   493  var errWhence = errors.New("Seek: invalid whence")
   494  var errOffset = errors.New("Seek: invalid offset")
   495  
   496  func (s *LazyChunkReader) Seek(offset int64, whence int) (int64, error) {
   497  	switch whence {
   498  	default:
   499  		return 0, errWhence
   500  	case 0:
   501  		offset += 0
   502  	case 1:
   503  		offset += s.off
   504  	case 2:
   505  		if s.chunk == nil { //seek from the end requires rootchunk for size. call Size first
   506  			_, err := s.Size(nil)
   507  			if err != nil {
   508  				return 0, fmt.Errorf("can't get size: %v", err)
   509  			}
   510  		}
   511  		offset += s.chunk.Size
   512  	}
   513  
   514  	if offset < 0 {
   515  		return 0, errOffset
   516  	}
   517  	s.off = offset
   518  	return offset, nil
   519  }