github.com/ylsgit/go-ethereum@v1.6.5/swarm/storage/chunker.go (about)

     1  // Copyright 2016 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package storage
    18  
    19  import (
    20  	"encoding/binary"
    21  	"errors"
    22  	"fmt"
    23  	"hash"
    24  	"io"
    25  	"sync"
    26  )
    27  
    28  /*
    29  The distributed storage implemented in this package requires fix sized chunks of content.
    30  
    31  Chunker is the interface to a component that is responsible for disassembling and assembling larger data.
    32  
    33  TreeChunker implements a Chunker based on a tree structure defined as follows:
    34  
    35  1 each node in the tree including the root and other branching nodes are stored as a chunk.
    36  
    37  2 branching nodes encode data contents that includes the size of the dataslice covered by its entire subtree under the node as well as the hash keys of all its children :
    38  data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1}
    39  
    40  3 Leaf nodes encode an actual subslice of the input data.
    41  
    42  4 if data size is not more than maximum chunksize, the data is stored in a single chunk
    43    key = hash(int64(size) + data)
    44  
    45  5 if data size is more than chunksize*branches^l, but no more than chunksize*
    46    branches^(l+1), the data vector is split into slices of chunksize*
    47    branches^l length (except the last one).
    48    key = hash(int64(size) + key(slice0) + key(slice1) + ...)
    49  
    50   The underlying hash function is configurable
    51  */
    52  
    53  const (
    54  	defaultHash = "SHA3" // http://golang.org/pkg/hash/#Hash
    55  	// defaultHash           = "SHA256" // http://golang.org/pkg/hash/#Hash
    56  	defaultBranches int64 = 128
    57  	// hashSize     int64 = hasherfunc.New().Size() // hasher knows about its own length in bytes
    58  	// chunksize    int64 = branches * hashSize     // chunk is defined as this
    59  )
    60  
    61  /*
    62  Tree chunker is a concrete implementation of data chunking.
    63  This chunker works in a simple way, it builds a tree out of the document so that each node either represents a chunk of real data or a chunk of data representing an branching non-leaf node of the tree. In particular each such non-leaf chunk will represent is a concatenation of the hash of its respective children. This scheme simultaneously guarantees data integrity as well as self addressing. Abstract nodes are transparent since their represented size component is strictly greater than their maximum data size, since they encode a subtree.
    64  
    65  If all is well it is possible to implement this by simply composing readers so that no extra allocation or buffering is necessary for the data splitting and joining. This means that in principle there can be direct IO between : memory, file system, network socket (bzz peers storage request is read from the socket). In practice there may be need for several stages of internal buffering.
    66  The hashing itself does use extra copies and allocation though, since it does need it.
    67  */
    68  
    69  type ChunkerParams struct {
    70  	Branches int64
    71  	Hash     string
    72  }
    73  
    74  func NewChunkerParams() *ChunkerParams {
    75  	return &ChunkerParams{
    76  		Branches: defaultBranches,
    77  		Hash:     defaultHash,
    78  	}
    79  }
    80  
    81  type TreeChunker struct {
    82  	branches int64
    83  	hashFunc Hasher
    84  	// calculated
    85  	hashSize    int64 // self.hashFunc.New().Size()
    86  	chunkSize   int64 // hashSize* branches
    87  	workerCount int
    88  }
    89  
    90  func NewTreeChunker(params *ChunkerParams) (self *TreeChunker) {
    91  	self = &TreeChunker{}
    92  	self.hashFunc = MakeHashFunc(params.Hash)
    93  	self.branches = params.Branches
    94  	self.hashSize = int64(self.hashFunc().Size())
    95  	self.chunkSize = self.hashSize * self.branches
    96  	self.workerCount = 1
    97  	return
    98  }
    99  
   100  // func (self *TreeChunker) KeySize() int64 {
   101  // 	return self.hashSize
   102  // }
   103  
   104  // String() for pretty printing
   105  func (self *Chunk) String() string {
   106  	return fmt.Sprintf("Key: %v TreeSize: %v Chunksize: %v", self.Key.Log(), self.Size, len(self.SData))
   107  }
   108  
   109  type hashJob struct {
   110  	key      Key
   111  	chunk    []byte
   112  	size     int64
   113  	parentWg *sync.WaitGroup
   114  }
   115  
   116  func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) {
   117  
   118  	if self.chunkSize <= 0 {
   119  		panic("chunker must be initialised")
   120  	}
   121  
   122  	jobC := make(chan *hashJob, 2*processors)
   123  	wg := &sync.WaitGroup{}
   124  	errC := make(chan error)
   125  	quitC := make(chan bool)
   126  
   127  	// wwg = workers waitgroup keeps track of hashworkers spawned by this split call
   128  	if wwg != nil {
   129  		wwg.Add(1)
   130  	}
   131  	go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg)
   132  
   133  	depth := 0
   134  	treeSize := self.chunkSize
   135  
   136  	// takes lowest depth such that chunksize*HashCount^(depth+1) > size
   137  	// power series, will find the order of magnitude of the data size in base hashCount or numbers of levels of branching in the resulting tree.
   138  	for ; treeSize < size; treeSize *= self.branches {
   139  		depth++
   140  	}
   141  
   142  	key := make([]byte, self.hashFunc().Size())
   143  	// this waitgroup member is released after the root hash is calculated
   144  	wg.Add(1)
   145  	//launch actual recursive function passing the waitgroups
   146  	go self.split(depth, treeSize/self.branches, key, data, size, jobC, chunkC, errC, quitC, wg, swg, wwg)
   147  
   148  	// closes internal error channel if all subprocesses in the workgroup finished
   149  	go func() {
   150  		// waiting for all threads to finish
   151  		wg.Wait()
   152  		// if storage waitgroup is non-nil, we wait for storage to finish too
   153  		if swg != nil {
   154  			swg.Wait()
   155  		}
   156  		close(errC)
   157  	}()
   158  
   159  	select {
   160  	case err := <-errC:
   161  		if err != nil {
   162  			close(quitC)
   163  			return nil, err
   164  		}
   165  		//TODO: add a timeout
   166  	}
   167  	return key, nil
   168  }
   169  
   170  func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reader, size int64, jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, parentWg, swg, wwg *sync.WaitGroup) {
   171  
   172  	for depth > 0 && size < treeSize {
   173  		treeSize /= self.branches
   174  		depth--
   175  	}
   176  
   177  	if depth == 0 {
   178  		// leaf nodes -> content chunks
   179  		chunkData := make([]byte, size+8)
   180  		binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size))
   181  		var readBytes int64
   182  		for readBytes < size {
   183  			n, err := data.Read(chunkData[8+readBytes:])
   184  			readBytes += int64(n)
   185  			if err != nil && !(err == io.EOF && readBytes == size) {
   186  				errC <- err
   187  				return
   188  			}
   189  		}
   190  		select {
   191  		case jobC <- &hashJob{key, chunkData, size, parentWg}:
   192  		case <-quitC:
   193  		}
   194  		return
   195  	}
   196  	// dept > 0
   197  	// intermediate chunk containing child nodes hashes
   198  	branchCnt := int64((size + treeSize - 1) / treeSize)
   199  
   200  	var chunk []byte = make([]byte, branchCnt*self.hashSize+8)
   201  	var pos, i int64
   202  
   203  	binary.LittleEndian.PutUint64(chunk[0:8], uint64(size))
   204  
   205  	childrenWg := &sync.WaitGroup{}
   206  	var secSize int64
   207  	for i < branchCnt {
   208  		// the last item can have shorter data
   209  		if size-pos < treeSize {
   210  			secSize = size - pos
   211  		} else {
   212  			secSize = treeSize
   213  		}
   214  		// the hash of that data
   215  		subTreeKey := chunk[8+i*self.hashSize : 8+(i+1)*self.hashSize]
   216  
   217  		childrenWg.Add(1)
   218  		self.split(depth-1, treeSize/self.branches, subTreeKey, data, secSize, jobC, chunkC, errC, quitC, childrenWg, swg, wwg)
   219  
   220  		i++
   221  		pos += treeSize
   222  	}
   223  	// wait for all the children to complete calculating their hashes and copying them onto sections of the chunk
   224  	// parentWg.Add(1)
   225  	// go func() {
   226  	childrenWg.Wait()
   227  	if len(jobC) > self.workerCount && self.workerCount < processors {
   228  		if wwg != nil {
   229  			wwg.Add(1)
   230  		}
   231  		self.workerCount++
   232  		go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg)
   233  	}
   234  	select {
   235  	case jobC <- &hashJob{key, chunk, size, parentWg}:
   236  	case <-quitC:
   237  	}
   238  }
   239  
   240  func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, swg, wwg *sync.WaitGroup) {
   241  	hasher := self.hashFunc()
   242  	if wwg != nil {
   243  		defer wwg.Done()
   244  	}
   245  	for {
   246  		select {
   247  
   248  		case job, ok := <-jobC:
   249  			if !ok {
   250  				return
   251  			}
   252  			// now we got the hashes in the chunk, then hash the chunks
   253  			hasher.Reset()
   254  			self.hashChunk(hasher, job, chunkC, swg)
   255  		case <-quitC:
   256  			return
   257  		}
   258  	}
   259  }
   260  
   261  // The treeChunkers own Hash hashes together
   262  // - the size (of the subtree encoded in the Chunk)
   263  // - the Chunk, ie. the contents read from the input reader
   264  func (self *TreeChunker) hashChunk(hasher hash.Hash, job *hashJob, chunkC chan *Chunk, swg *sync.WaitGroup) {
   265  	hasher.Write(job.chunk)
   266  	h := hasher.Sum(nil)
   267  	newChunk := &Chunk{
   268  		Key:   h,
   269  		SData: job.chunk,
   270  		Size:  job.size,
   271  		wg:    swg,
   272  	}
   273  
   274  	// report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk)
   275  	copy(job.key, h)
   276  	// send off new chunk to storage
   277  	if chunkC != nil {
   278  		if swg != nil {
   279  			swg.Add(1)
   280  		}
   281  	}
   282  	job.parentWg.Done()
   283  
   284  	if chunkC != nil {
   285  		chunkC <- newChunk
   286  	}
   287  }
   288  
   289  // LazyChunkReader implements LazySectionReader
   290  type LazyChunkReader struct {
   291  	key       Key         // root key
   292  	chunkC    chan *Chunk // chunk channel to send retrieve requests on
   293  	chunk     *Chunk      // size of the entire subtree
   294  	off       int64       // offset
   295  	chunkSize int64       // inherit from chunker
   296  	branches  int64       // inherit from chunker
   297  	hashSize  int64       // inherit from chunker
   298  }
   299  
   300  // implements the Joiner interface
   301  func (self *TreeChunker) Join(key Key, chunkC chan *Chunk) LazySectionReader {
   302  
   303  	return &LazyChunkReader{
   304  		key:       key,
   305  		chunkC:    chunkC,
   306  		chunkSize: self.chunkSize,
   307  		branches:  self.branches,
   308  		hashSize:  self.hashSize,
   309  	}
   310  }
   311  
   312  // Size is meant to be called on the LazySectionReader
   313  func (self *LazyChunkReader) Size(quitC chan bool) (n int64, err error) {
   314  	if self.chunk != nil {
   315  		return self.chunk.Size, nil
   316  	}
   317  	chunk := retrieve(self.key, self.chunkC, quitC)
   318  	if chunk == nil {
   319  		select {
   320  		case <-quitC:
   321  			return 0, errors.New("aborted")
   322  		default:
   323  			return 0, fmt.Errorf("root chunk not found for %v", self.key.Hex())
   324  		}
   325  	}
   326  	self.chunk = chunk
   327  	return chunk.Size, nil
   328  }
   329  
   330  // read at can be called numerous times
   331  // concurrent reads are allowed
   332  // Size() needs to be called synchronously on the LazyChunkReader first
   333  func (self *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) {
   334  	// this is correct, a swarm doc cannot be zero length, so no EOF is expected
   335  	if len(b) == 0 {
   336  		return 0, nil
   337  	}
   338  	quitC := make(chan bool)
   339  	size, err := self.Size(quitC)
   340  	if err != nil {
   341  		return 0, err
   342  	}
   343  
   344  	errC := make(chan error)
   345  
   346  	// }
   347  	var treeSize int64
   348  	var depth int
   349  	// calculate depth and max treeSize
   350  	treeSize = self.chunkSize
   351  	for ; treeSize < size; treeSize *= self.branches {
   352  		depth++
   353  	}
   354  	wg := sync.WaitGroup{}
   355  	wg.Add(1)
   356  	go self.join(b, off, off+int64(len(b)), depth, treeSize/self.branches, self.chunk, &wg, errC, quitC)
   357  	go func() {
   358  		wg.Wait()
   359  		close(errC)
   360  	}()
   361  
   362  	err = <-errC
   363  	if err != nil {
   364  		close(quitC)
   365  
   366  		return 0, err
   367  	}
   368  	if off+int64(len(b)) >= size {
   369  		return len(b), io.EOF
   370  	}
   371  	return len(b), nil
   372  }
   373  
   374  func (self *LazyChunkReader) join(b []byte, off int64, eoff int64, depth int, treeSize int64, chunk *Chunk, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) {
   375  	defer parentWg.Done()
   376  	// return NewDPA(&LocalStore{})
   377  
   378  	// chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8]))
   379  
   380  	// find appropriate block level
   381  	for chunk.Size < treeSize && depth > 0 {
   382  		treeSize /= self.branches
   383  		depth--
   384  	}
   385  
   386  	// leaf chunk found
   387  	if depth == 0 {
   388  		extra := 8 + eoff - int64(len(chunk.SData))
   389  		if extra > 0 {
   390  			eoff -= extra
   391  		}
   392  		copy(b, chunk.SData[8+off:8+eoff])
   393  		return // simply give back the chunks reader for content chunks
   394  	}
   395  
   396  	// subtree
   397  	start := off / treeSize
   398  	end := (eoff + treeSize - 1) / treeSize
   399  
   400  	wg := &sync.WaitGroup{}
   401  	defer wg.Wait()
   402  
   403  	for i := start; i < end; i++ {
   404  		soff := i * treeSize
   405  		roff := soff
   406  		seoff := soff + treeSize
   407  
   408  		if soff < off {
   409  			soff = off
   410  		}
   411  		if seoff > eoff {
   412  			seoff = eoff
   413  		}
   414  		if depth > 1 {
   415  			wg.Wait()
   416  		}
   417  		wg.Add(1)
   418  		go func(j int64) {
   419  			childKey := chunk.SData[8+j*self.hashSize : 8+(j+1)*self.hashSize]
   420  			chunk := retrieve(childKey, self.chunkC, quitC)
   421  			if chunk == nil {
   422  				select {
   423  				case errC <- fmt.Errorf("chunk %v-%v not found", off, off+treeSize):
   424  				case <-quitC:
   425  				}
   426  				return
   427  			}
   428  			if soff < off {
   429  				soff = off
   430  			}
   431  			self.join(b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/self.branches, chunk, wg, errC, quitC)
   432  		}(i)
   433  	} //for
   434  }
   435  
   436  // the helper method submits chunks for a key to a oueue (DPA) and
   437  // block until they time out or arrive
   438  // abort if quitC is readable
   439  func retrieve(key Key, chunkC chan *Chunk, quitC chan bool) *Chunk {
   440  	chunk := &Chunk{
   441  		Key: key,
   442  		C:   make(chan bool), // close channel to signal data delivery
   443  	}
   444  	// submit chunk for retrieval
   445  	select {
   446  	case chunkC <- chunk: // submit retrieval request, someone should be listening on the other side (or we will time out globally)
   447  	case <-quitC:
   448  		return nil
   449  	}
   450  	// waiting for the chunk retrieval
   451  	select { // chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8]))
   452  
   453  	case <-quitC:
   454  		// this is how we control process leakage (quitC is closed once join is finished (after timeout))
   455  		return nil
   456  	case <-chunk.C: // bells are ringing, data have been delivered
   457  	}
   458  	if len(chunk.SData) == 0 {
   459  		return nil // chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8]))
   460  
   461  	}
   462  	return chunk
   463  }
   464  
   465  // Read keeps a cursor so cannot be called simulateously, see ReadAt
   466  func (self *LazyChunkReader) Read(b []byte) (read int, err error) {
   467  	read, err = self.ReadAt(b, self.off)
   468  
   469  	self.off += int64(read)
   470  	return
   471  }
   472  
   473  // completely analogous to standard SectionReader implementation
   474  var errWhence = errors.New("Seek: invalid whence")
   475  var errOffset = errors.New("Seek: invalid offset")
   476  
   477  func (s *LazyChunkReader) Seek(offset int64, whence int) (int64, error) {
   478  	switch whence {
   479  	default:
   480  		return 0, errWhence
   481  	case 0:
   482  		offset += 0
   483  	case 1:
   484  		offset += s.off
   485  	case 2:
   486  		if s.chunk == nil { //seek from the end requires rootchunk for size. call Size first
   487  			_, err := s.Size(nil)
   488  			if err != nil {
   489  				return 0, fmt.Errorf("can't get size: %v", err)
   490  			}
   491  		}
   492  		offset += s.chunk.Size
   493  	}
   494  
   495  	if offset < 0 {
   496  		return 0, errOffset
   497  	}
   498  	s.off = offset
   499  	return offset, nil
   500  }