github.com/gochain-io/gochain@v2.2.26+incompatible/swarm/storage/chunker.go (about)

     1  // Copyright 2016 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package storage
    18  
    19  import (
    20  	"encoding/binary"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"sync"
    25  	"time"
    26  )
    27  
    28  /*
    29  The distributed storage implemented in this package requires fix sized chunks of content.
    30  
    31  Chunker is the interface to a component that is responsible for disassembling and assembling larger data.
    32  
    33  TreeChunker implements a Chunker based on a tree structure defined as follows:
    34  
    35  1 each node in the tree including the root and other branching nodes are stored as a chunk.
    36  
    37  2 branching nodes encode data contents that includes the size of the dataslice covered by its entire subtree under the node as well as the hash keys of all its children :
    38  data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1}
    39  
    40  3 Leaf nodes encode an actual subslice of the input data.
    41  
    42  4 if data size is not more than maximum chunksize, the data is stored in a single chunk
    43    key = hash(int64(size) + data)
    44  
    45  5 if data size is more than chunksize*branches^l, but no more than chunksize*
    46    branches^(l+1), the data vector is split into slices of chunksize*
    47    branches^l length (except the last one).
    48    key = hash(int64(size) + key(slice0) + key(slice1) + ...)
    49  
    50   The underlying hash function is configurable
    51  */
    52  
    53  /*
    54  Tree chunker is a concrete implementation of data chunking.
    55  This chunker works in a simple way, it builds a tree out of the document so that each node either represents a chunk of real data or a chunk of data representing an branching non-leaf node of the tree. In particular each such non-leaf chunk will represent is a concatenation of the hash of its respective children. This scheme simultaneously guarantees data integrity as well as self addressing. Abstract nodes are transparent since their represented size component is strictly greater than their maximum data size, since they encode a subtree.
    56  
    57  If all is well it is possible to implement this by simply composing readers so that no extra allocation or buffering is necessary for the data splitting and joining. This means that in principle there can be direct IO between : memory, file system, network socket (bzz peers storage request is read from the socket). In practice there may be need for several stages of internal buffering.
    58  The hashing itself does use extra copies and allocation though, since it does need it.
    59  */
    60  
    61  var (
    62  	errAppendOppNotSupported = errors.New("Append operation not supported")
    63  )
    64  
    65  type TreeChunker struct {
    66  	branches int64
    67  	hashFunc SwarmHasher
    68  	// calculated
    69  	hashSize    int64        // self.hashFunc.New().Size()
    70  	chunkSize   int64        // hashSize* branches
    71  	workerCount int64        // the number of worker routines used
    72  	workerLock  sync.RWMutex // lock for the worker count
    73  }
    74  
    75  func NewTreeChunker(params *ChunkerParams) *TreeChunker {
    76  	t := &TreeChunker{}
    77  	t.hashFunc = MakeHashFunc(params.Hash)
    78  	t.branches = params.Branches
    79  	t.hashSize = int64(t.hashFunc().Size())
    80  	t.chunkSize = t.hashSize * t.branches
    81  	t.workerCount = 0
    82  	return t
    83  }
    84  
    85  // String() for pretty printing
    86  func (c *Chunk) String() string {
    87  	return fmt.Sprintf("Key: %v TreeSize: %v Chunksize: %v", c.Key.Log(), c.Size, len(c.SData))
    88  }
    89  
    90  type hashJob struct {
    91  	key   Key
    92  	chunk []byte
    93  	size  int64
    94  	done  func()
    95  }
    96  
    97  func (t *TreeChunker) incrementWorkerCount() {
    98  	t.workerLock.Lock()
    99  	defer t.workerLock.Unlock()
   100  	t.workerCount += 1
   101  }
   102  
   103  func (t *TreeChunker) getWorkerCount() int64 {
   104  	t.workerLock.RLock()
   105  	defer t.workerLock.RUnlock()
   106  	return t.workerCount
   107  }
   108  
   109  func (t *TreeChunker) decrementWorkerCount() {
   110  	t.workerLock.Lock()
   111  	defer t.workerLock.Unlock()
   112  	t.workerCount -= 1
   113  }
   114  
   115  func (t *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg *sync.WaitGroup) (Key, error) {
   116  	if t.chunkSize <= 0 {
   117  		panic("chunker must be initialised")
   118  	}
   119  
   120  	jobC := make(chan *hashJob, 2*ChunkProcessors)
   121  	var rootHash sync.WaitGroup
   122  	quitC := make(chan bool)
   123  	defer close(quitC)
   124  
   125  	t.incrementWorkerCount()
   126  	if swg != nil {
   127  		swg.Add(1)
   128  	}
   129  	go t.hashWorker(jobC, chunkC, quitC, swg)
   130  
   131  	depth := 0
   132  	treeSize := t.chunkSize
   133  
   134  	// takes lowest depth such that chunksize*HashCount^(depth+1) > size
   135  	// power series, will find the order of magnitude of the data size in base hashCount or numbers of levels of branching in the resulting tree.
   136  	for ; treeSize < size; treeSize *= t.branches {
   137  		depth++
   138  	}
   139  
   140  	key := make([]byte, t.hashFunc().Size())
   141  	rootHash.Add(1)
   142  	err := t.split(depth, treeSize/t.branches, key, data, size, jobC, chunkC, quitC, rootHash.Done, swg)
   143  	close(jobC)
   144  	if err != nil {
   145  		return nil, err
   146  	}
   147  
   148  	done := make(chan struct{})
   149  	go func() {
   150  		rootHash.Wait()
   151  		if swg != nil {
   152  			swg.Wait()
   153  		}
   154  		close(done)
   155  	}()
   156  
   157  	select {
   158  	case <-done:
   159  	case <-time.After(splitTimeout):
   160  		return nil, timeoutErr
   161  	}
   162  
   163  	return key, nil
   164  }
   165  
   166  func (t *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reader, size int64, jobC chan *hashJob, chunkC chan *Chunk, quitC chan bool, hashDone func(), swg *sync.WaitGroup) error {
   167  	for depth > 0 && size < treeSize {
   168  		treeSize /= t.branches
   169  		depth--
   170  	}
   171  
   172  	if depth == 0 {
   173  		// leaf nodes -> content chunks
   174  		chunkData := make([]byte, size+8)
   175  		binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size))
   176  		var readBytes int64
   177  		for readBytes < size {
   178  			n, err := data.Read(chunkData[8+readBytes:])
   179  			readBytes += int64(n)
   180  			if err != nil && !(err == io.EOF && readBytes == size) {
   181  				return err
   182  			}
   183  		}
   184  		select {
   185  		case jobC <- &hashJob{key, chunkData, size, hashDone}:
   186  		case <-quitC:
   187  		}
   188  		return nil
   189  	}
   190  	// dept > 0
   191  	// intermediate chunk containing child nodes hashes
   192  	branchCnt := (size + treeSize - 1) / treeSize
   193  
   194  	var chunk = make([]byte, branchCnt*t.hashSize+8)
   195  	var pos, i int64
   196  
   197  	binary.LittleEndian.PutUint64(chunk[0:8], uint64(size))
   198  
   199  	var childHashes sync.WaitGroup
   200  	var secSize int64
   201  	for i < branchCnt {
   202  		// the last item can have shorter data
   203  		if size-pos < treeSize {
   204  			secSize = size - pos
   205  		} else {
   206  			secSize = treeSize
   207  		}
   208  		// the hash of that data
   209  		subTreeKey := chunk[8+i*t.hashSize : 8+(i+1)*t.hashSize]
   210  
   211  		childHashes.Add(1)
   212  		err := t.split(depth-1, treeSize/t.branches, subTreeKey, data, secSize, jobC, chunkC, quitC, childHashes.Done, swg)
   213  		if err != nil {
   214  			return err
   215  		}
   216  
   217  		i++
   218  		pos += treeSize
   219  	}
   220  	childHashes.Wait()
   221  
   222  	worker := t.getWorkerCount()
   223  	if int64(len(jobC)) > worker && worker < ChunkProcessors {
   224  		t.incrementWorkerCount()
   225  		if swg != nil {
   226  			swg.Add(1)
   227  		}
   228  		go t.hashWorker(jobC, chunkC, quitC, swg)
   229  	}
   230  	select {
   231  	case jobC <- &hashJob{key, chunk, size, hashDone}:
   232  	case <-quitC:
   233  	}
   234  	return nil
   235  }
   236  
   237  func (t *TreeChunker) hashWorker(jobC <-chan *hashJob, chunkC chan *Chunk, quitC chan bool, swg *sync.WaitGroup) {
   238  	defer t.decrementWorkerCount()
   239  	if swg != nil {
   240  		swg.Done()
   241  	}
   242  
   243  	hasher := t.hashFunc()
   244  	for {
   245  		select {
   246  		case job, ok := <-jobC:
   247  			if !ok {
   248  				return
   249  			}
   250  			// now we got the hashes in the chunk, then hash the chunks
   251  			t.hashChunk(hasher, job, chunkC, swg)
   252  		case <-quitC:
   253  			return
   254  		}
   255  	}
   256  }
   257  
   258  // The treeChunkers own Hash hashes together
   259  // - the size (of the subtree encoded in the Chunk)
   260  // - the Chunk, ie. the contents read from the input reader
   261  func (t *TreeChunker) hashChunk(hasher SwarmHash, job *hashJob, chunkC chan *Chunk, swg *sync.WaitGroup) {
   262  	defer job.done()
   263  	hasher.ResetWithLength(job.chunk[:8]) // 8 bytes of length
   264  	hasher.Write(job.chunk[8:])           // minus 8 []byte length
   265  	h := hasher.Sum(nil)
   266  
   267  	// report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk)
   268  	copy(job.key, h)
   269  
   270  	// send off new chunk to storage
   271  	if chunkC != nil {
   272  		if swg != nil {
   273  			swg.Add(1)
   274  		}
   275  		chunkC <- &Chunk{Key: h, SData: job.chunk, Size: job.size, wg: swg}
   276  	}
   277  }
   278  
   279  func (t *TreeChunker) Append(key Key, data io.Reader, chunkC chan *Chunk, swg *sync.WaitGroup) (Key, error) {
   280  	return nil, errAppendOppNotSupported
   281  }
   282  
   283  // LazyChunkReader implements LazySectionReader
   284  type LazyChunkReader struct {
   285  	key       Key         // root key
   286  	chunkC    chan *Chunk // chunk channel to send retrieve requests on
   287  	chunk     *Chunk      // size of the entire subtree
   288  	off       int64       // offset
   289  	chunkSize int64       // inherit from chunker
   290  	branches  int64       // inherit from chunker
   291  	hashSize  int64       // inherit from chunker
   292  }
   293  
   294  // implements the Joiner interface
   295  func (t *TreeChunker) Join(key Key, chunkC chan *Chunk) LazySectionReader {
   296  	return &LazyChunkReader{
   297  		key:       key,
   298  		chunkC:    chunkC,
   299  		chunkSize: t.chunkSize,
   300  		branches:  t.branches,
   301  		hashSize:  t.hashSize,
   302  	}
   303  }
   304  
   305  // Size is meant to be called on the LazySectionReader
   306  func (l *LazyChunkReader) Size(quitC chan bool) (n int64, err error) {
   307  	if l.chunk != nil {
   308  		return l.chunk.Size, nil
   309  	}
   310  	chunk := retrieve(l.key, l.chunkC, quitC)
   311  	if chunk == nil {
   312  		select {
   313  		case <-quitC:
   314  			return 0, errors.New("aborted")
   315  		default:
   316  			return 0, fmt.Errorf("root chunk not found for %v", l.key.Hex())
   317  		}
   318  	}
   319  	l.chunk = chunk
   320  	return chunk.Size, nil
   321  }
   322  
   323  // read at can be called numerous times
   324  // concurrent reads are allowed
   325  // Size() needs to be called synchronously on the LazyChunkReader first
   326  func (l *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) {
   327  	// this is correct, a swarm doc cannot be zero length, so no EOF is expected
   328  	if len(b) == 0 {
   329  		return 0, nil
   330  	}
   331  	quitC := make(chan bool)
   332  	size, err := l.Size(quitC)
   333  	if err != nil {
   334  		return 0, err
   335  	}
   336  
   337  	errC := make(chan error)
   338  
   339  	// }
   340  	var treeSize int64
   341  	var depth int
   342  	// calculate depth and max treeSize
   343  	treeSize = l.chunkSize
   344  	for ; treeSize < size; treeSize *= l.branches {
   345  		depth++
   346  	}
   347  	wg := sync.WaitGroup{}
   348  	wg.Add(1)
   349  	go l.join(b, off, off+int64(len(b)), depth, treeSize/l.branches, l.chunk, &wg, errC, quitC)
   350  	go func() {
   351  		wg.Wait()
   352  		close(errC)
   353  	}()
   354  
   355  	err = <-errC
   356  	if err != nil {
   357  		close(quitC)
   358  
   359  		return 0, err
   360  	}
   361  	if off+int64(len(b)) >= size {
   362  		return len(b), io.EOF
   363  	}
   364  	return len(b), nil
   365  }
   366  
   367  func (l *LazyChunkReader) join(b []byte, off int64, eoff int64, depth int, treeSize int64, chunk *Chunk, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) {
   368  	defer parentWg.Done()
   369  
   370  	// find appropriate block level
   371  	for chunk.Size < treeSize && depth > 0 {
   372  		treeSize /= l.branches
   373  		depth--
   374  	}
   375  
   376  	// leaf chunk found
   377  	if depth == 0 {
   378  		extra := 8 + eoff - int64(len(chunk.SData))
   379  		if extra > 0 {
   380  			eoff -= extra
   381  		}
   382  		copy(b, chunk.SData[8+off:8+eoff])
   383  		return // simply give back the chunks reader for content chunks
   384  	}
   385  
   386  	// subtree
   387  	start := off / treeSize
   388  	end := (eoff + treeSize - 1) / treeSize
   389  
   390  	wg := &sync.WaitGroup{}
   391  	defer wg.Wait()
   392  
   393  	for i := start; i < end; i++ {
   394  		soff := i * treeSize
   395  		roff := soff
   396  		seoff := soff + treeSize
   397  
   398  		if soff < off {
   399  			soff = off
   400  		}
   401  		if seoff > eoff {
   402  			seoff = eoff
   403  		}
   404  		if depth > 1 {
   405  			wg.Wait()
   406  		}
   407  		wg.Add(1)
   408  		go func(j int64) {
   409  			childKey := chunk.SData[8+j*l.hashSize : 8+(j+1)*l.hashSize]
   410  			chunk := retrieve(childKey, l.chunkC, quitC)
   411  			if chunk == nil {
   412  				select {
   413  				case errC <- fmt.Errorf("chunk %v-%v not found", off, off+treeSize):
   414  				case <-quitC:
   415  				}
   416  				return
   417  			}
   418  			if soff < off {
   419  				soff = off
   420  			}
   421  			l.join(b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/l.branches, chunk, wg, errC, quitC)
   422  		}(i)
   423  	} //for
   424  }
   425  
   426  // the helper method submits chunks for a key to a oueue (DPA) and
   427  // block until they time out or arrive
   428  // abort if quitC is readable
   429  func retrieve(key Key, chunkC chan *Chunk, quitC chan bool) *Chunk {
   430  	chunk := &Chunk{
   431  		Key: key,
   432  		C:   make(chan bool), // close channel to signal data delivery
   433  	}
   434  	// submit chunk for retrieval
   435  	select {
   436  	case chunkC <- chunk: // submit retrieval request, someone should be listening on the other side (or we will time out globally)
   437  	case <-quitC:
   438  		return nil
   439  	}
   440  	// waiting for the chunk retrieval
   441  	select { // chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8]))
   442  
   443  	case <-quitC:
   444  		// this is how we control process leakage (quitC is closed once join is finished (after timeout))
   445  		return nil
   446  	case <-chunk.C: // bells are ringing, data have been delivered
   447  	}
   448  	if len(chunk.SData) == 0 {
   449  		return nil // chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8]))
   450  
   451  	}
   452  	return chunk
   453  }
   454  
   455  // Read keeps a cursor so cannot be called simulateously, see ReadAt
   456  func (l *LazyChunkReader) Read(b []byte) (read int, err error) {
   457  	read, err = l.ReadAt(b, l.off)
   458  
   459  	l.off += int64(read)
   460  	return
   461  }
   462  
   463  // completely analogous to standard SectionReader implementation
   464  var errWhence = errors.New("Seek: invalid whence")
   465  var errOffset = errors.New("Seek: invalid offset")
   466  
   467  func (l *LazyChunkReader) Seek(offset int64, whence int) (int64, error) {
   468  	switch whence {
   469  	default:
   470  		return 0, errWhence
   471  	case 0:
   472  		offset += 0
   473  	case 1:
   474  		offset += l.off
   475  	case 2:
   476  		if l.chunk == nil { //seek from the end requires rootchunk for size. call Size first
   477  			_, err := l.Size(nil)
   478  			if err != nil {
   479  				return 0, fmt.Errorf("can't get size: %v", err)
   480  			}
   481  		}
   482  		offset += l.chunk.Size
   483  	}
   484  
   485  	if offset < 0 {
   486  		return 0, errOffset
   487  	}
   488  	l.off = offset
   489  	return offset, nil
   490  }