github.com/m3shine/gochain@v2.2.26+incompatible/swarm/storage/pyramid.go (about)

     1  // Copyright 2016 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package storage
    18  
    19  import (
    20  	"encoding/binary"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"sync"
    25  	"time"
    26  )
    27  
    28  /*
    29     The main idea of a pyramid chunker is to process the input data without knowing the entire size apriori.
    30     For this to be achieved, the chunker tree is built from the ground up until the data is exhausted.
    31     This opens up new aveneus such as easy append and other sort of modifications to the tree thereby avoiding
    32     duplication of data chunks.
    33  
    34  
    35     Below is an example of a two level chunks tree. The leaf chunks are called data chunks and all the above
    36     chunks are called tree chunks. The tree chunk above data chunks is level 0 and so on until it reaches
    37     the root tree chunk.
    38  
    39  
    40  
    41                                              T10                                        <- Tree chunk lvl1
    42                                              |
    43                    __________________________|_____________________________
    44                   /                  |                   |                \
    45                  /                   |                   \                 \
    46              __T00__             ___T01__           ___T02__           ___T03__         <- Tree chunks lvl 0
    47             / /     \           / /      \         / /      \         / /      \
    48            / /       \         / /        \       / /       \        / /        \
    49           D1 D2 ... D128	     D1 D2 ... D128     D1 D2 ... D128     D1 D2 ... D128      <-  Data Chunks
    50  
    51  
    52      The split function continuously read the data and creates data chunks and send them to storage.
    53      When certain no of data chunks are created (defaultBranches), a signal is sent to create a tree
    54      entry. When the level 0 tree entries reaches certain threshold (defaultBranches), another signal
    55      is sent to a tree entry one level up.. and so on... until only the data is exhausted AND only one
    56      tree entry is present in certain level. The key of tree entry is given out as the rootKey of the file.
    57  
    58  */
    59  
    60  var (
    61  	errLoadingTreeRootChunk = errors.New("LoadTree Error: Could not load root chunk")
    62  	errLoadingTreeChunk     = errors.New("LoadTree Error: Could not load chunk")
    63  )
    64  
    65  const (
    66  	ChunkProcessors       = 8
    67  	DefaultBranches int64 = 128
    68  	splitTimeout          = time.Minute * 5
    69  )
    70  
    71  var timeoutErr = fmt.Errorf("timed out after %s", splitTimeout)
    72  
    73  const (
    74  	DataChunk = 0
    75  	TreeChunk = 1
    76  )
    77  
    78  type ChunkerParams struct {
    79  	Branches int64
    80  	Hash     string
    81  }
    82  
    83  func NewChunkerParams() *ChunkerParams {
    84  	return &ChunkerParams{
    85  		Branches: DefaultBranches,
    86  		Hash:     SHA3Hash,
    87  	}
    88  }
    89  
    90  // Entry to create a tree node
    91  type TreeEntry struct {
    92  	level         int
    93  	branchCount   int64
    94  	subtreeSize   uint64
    95  	chunk         []byte
    96  	key           []byte
    97  	index         int  // used in append to indicate the index of existing tree entry
    98  	updatePending bool // indicates if the entry is loaded from existing tree
    99  }
   100  
   101  func NewTreeEntry(pyramid *PyramidChunker) *TreeEntry {
   102  	return &TreeEntry{
   103  		level:         0,
   104  		branchCount:   0,
   105  		subtreeSize:   0,
   106  		chunk:         make([]byte, pyramid.chunkSize+8),
   107  		key:           make([]byte, pyramid.hashSize),
   108  		index:         0,
   109  		updatePending: false,
   110  	}
   111  }
   112  
   113  // Used by the hash processor to create a data/tree chunk and send to storage
   114  type chunkJob struct {
   115  	key       Key
   116  	chunk     []byte
   117  	size      int64
   118  	done      func()
   119  	chunkType int // used to identify the tree related chunks for debugging
   120  	chunkLvl  int // leaf-1 is level 0 and goes upwards until it reaches root
   121  }
   122  
   123  type PyramidChunker struct {
   124  	hashFunc    SwarmHasher
   125  	chunkSize   int64
   126  	hashSize    int64
   127  	branches    int64
   128  	workerCount int64
   129  	workerLock  sync.RWMutex
   130  }
   131  
   132  func NewPyramidChunker(params *ChunkerParams) *PyramidChunker {
   133  	p := &PyramidChunker{}
   134  	p.hashFunc = MakeHashFunc(params.Hash)
   135  	p.branches = params.Branches
   136  	p.hashSize = int64(p.hashFunc().Size())
   137  	p.chunkSize = p.hashSize * p.branches
   138  	p.workerCount = 0
   139  	return p
   140  }
   141  
   142  func (p *PyramidChunker) Join(key Key, chunkC chan *Chunk) LazySectionReader {
   143  	return &LazyChunkReader{
   144  		key:       key,
   145  		chunkC:    chunkC,
   146  		chunkSize: p.chunkSize,
   147  		branches:  p.branches,
   148  		hashSize:  p.hashSize,
   149  	}
   150  }
   151  
   152  func (p *PyramidChunker) incrementWorkerCount() {
   153  	p.workerLock.Lock()
   154  	defer p.workerLock.Unlock()
   155  	p.workerCount += 1
   156  }
   157  
   158  func (p *PyramidChunker) getWorkerCount() int64 {
   159  	p.workerLock.Lock()
   160  	defer p.workerLock.Unlock()
   161  	return p.workerCount
   162  }
   163  
   164  func (p *PyramidChunker) decrementWorkerCount() {
   165  	p.workerLock.Lock()
   166  	defer p.workerLock.Unlock()
   167  	p.workerCount -= 1
   168  }
   169  
   170  func (p *PyramidChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg *sync.WaitGroup) (Key, error) {
   171  	rootKey := make([]byte, p.hashSize)
   172  	chunkLevel := make([][]*TreeEntry, p.branches)
   173  	quitC := make(chan bool)
   174  	defer close(quitC)
   175  
   176  	var wg sync.WaitGroup
   177  	wg.Add(1)
   178  	go p.prepareChunks(false, chunkLevel, data, rootKey, quitC, wg.Done, chunkC, swg)
   179  
   180  	done := make(chan struct{})
   181  	go func() {
   182  		wg.Wait()
   183  		if swg != nil {
   184  			swg.Wait()
   185  		}
   186  		close(done)
   187  	}()
   188  
   189  	select {
   190  	case <-done:
   191  	case <-time.After(splitTimeout):
   192  		return nil, timeoutErr
   193  	}
   194  	return rootKey, nil
   195  
   196  }
   197  
   198  func (p *PyramidChunker) Append(key Key, data io.Reader, chunkC chan *Chunk, swg *sync.WaitGroup) (Key, error) {
   199  	rootKey := make([]byte, p.hashSize)
   200  	chunkLevel := make([][]*TreeEntry, p.branches)
   201  	quitC := make(chan bool)
   202  	defer close(quitC)
   203  
   204  	// Load the right most unfinished tree chunks in every level
   205  	p.loadTree(chunkLevel, key, chunkC, quitC)
   206  
   207  	var wg sync.WaitGroup
   208  	wg.Add(1)
   209  	go p.prepareChunks(true, chunkLevel, data, rootKey, quitC, wg.Done, chunkC, swg)
   210  
   211  	done := make(chan struct{})
   212  	go func() {
   213  		wg.Wait()
   214  		if swg != nil {
   215  			swg.Wait()
   216  		}
   217  		close(done)
   218  	}()
   219  
   220  	select {
   221  	case <-done:
   222  	case <-time.After(splitTimeout):
   223  		return nil, timeoutErr
   224  	}
   225  	return rootKey, nil
   226  
   227  }
   228  
   229  func (p *PyramidChunker) processor(id int64, jobC <-chan *chunkJob, chunkC chan *Chunk, quitC chan bool, swg *sync.WaitGroup) {
   230  	defer p.decrementWorkerCount()
   231  	if swg != nil {
   232  		defer swg.Done()
   233  	}
   234  
   235  	hasher := p.hashFunc()
   236  	for {
   237  		select {
   238  
   239  		case job, ok := <-jobC:
   240  			if !ok {
   241  				return
   242  			}
   243  			p.processChunk(id, hasher, job, chunkC, swg)
   244  		case <-quitC:
   245  			return
   246  		}
   247  	}
   248  }
   249  
   250  func (p *PyramidChunker) processChunk(id int64, hasher SwarmHash, job *chunkJob, chunkC chan *Chunk, swg *sync.WaitGroup) {
   251  	defer job.done()
   252  	hasher.ResetWithLength(job.chunk[:8]) // 8 bytes of length
   253  	hasher.Write(job.chunk[8:])           // minus 8 []byte length
   254  	h := hasher.Sum(nil)
   255  
   256  	// report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk)
   257  	copy(job.key, h)
   258  
   259  	// send off new chunk to storage
   260  	if chunkC != nil {
   261  		if swg != nil {
   262  			swg.Add(1)
   263  		}
   264  		chunkC <- &Chunk{Key: h, SData: job.chunk, Size: job.size, wg: swg}
   265  	}
   266  }
   267  
   268  func (p *PyramidChunker) loadTree(chunkLevel [][]*TreeEntry, key Key, chunkC chan *Chunk, quitC chan bool) error {
   269  	// Get the root chunk to get the total size
   270  	chunk := retrieve(key, chunkC, quitC)
   271  	if chunk == nil {
   272  		return errLoadingTreeRootChunk
   273  	}
   274  
   275  	//if data size is less than a chunk... add a parent with update as pending
   276  	if chunk.Size <= p.chunkSize {
   277  		newEntry := &TreeEntry{
   278  			level:         0,
   279  			branchCount:   1,
   280  			subtreeSize:   uint64(chunk.Size),
   281  			chunk:         make([]byte, p.chunkSize+8),
   282  			key:           make([]byte, p.hashSize),
   283  			index:         0,
   284  			updatePending: true,
   285  		}
   286  		copy(newEntry.chunk[8:], chunk.Key)
   287  		chunkLevel[0] = append(chunkLevel[0], newEntry)
   288  		return nil
   289  	}
   290  
   291  	var treeSize int64
   292  	var depth int
   293  	treeSize = p.chunkSize
   294  	for ; treeSize < chunk.Size; treeSize *= p.branches {
   295  		depth++
   296  	}
   297  
   298  	// Add the root chunk entry
   299  	branchCount := int64(len(chunk.SData)-8) / p.hashSize
   300  	newEntry := &TreeEntry{
   301  		level:         depth - 1,
   302  		branchCount:   branchCount,
   303  		subtreeSize:   uint64(chunk.Size),
   304  		chunk:         chunk.SData,
   305  		key:           key,
   306  		index:         0,
   307  		updatePending: true,
   308  	}
   309  	chunkLevel[depth-1] = append(chunkLevel[depth-1], newEntry)
   310  
   311  	// Add the rest of the tree
   312  	for lvl := depth - 1; lvl >= 1; lvl-- {
   313  
   314  		//TODO(jmozah): instead of loading finished branches and then trim in the end,
   315  		//avoid loading them in the first place
   316  		for _, ent := range chunkLevel[lvl] {
   317  			branchCount = int64(len(ent.chunk)-8) / p.hashSize
   318  			for i := int64(0); i < branchCount; i++ {
   319  				key := ent.chunk[8+(i*p.hashSize) : 8+((i+1)*p.hashSize)]
   320  				newChunk := retrieve(key, chunkC, quitC)
   321  				if newChunk == nil {
   322  					return errLoadingTreeChunk
   323  				}
   324  				bewBranchCount := int64(len(newChunk.SData)-8) / p.hashSize
   325  				newEntry := &TreeEntry{
   326  					level:         lvl - 1,
   327  					branchCount:   bewBranchCount,
   328  					subtreeSize:   uint64(newChunk.Size),
   329  					chunk:         newChunk.SData,
   330  					key:           key,
   331  					index:         0,
   332  					updatePending: true,
   333  				}
   334  				chunkLevel[lvl-1] = append(chunkLevel[lvl-1], newEntry)
   335  
   336  			}
   337  
   338  			// We need to get only the right most unfinished branch.. so trim all finished branches
   339  			if int64(len(chunkLevel[lvl-1])) >= p.branches {
   340  				chunkLevel[lvl-1] = nil
   341  			}
   342  		}
   343  	}
   344  
   345  	return nil
   346  }
   347  
   348  func (p *PyramidChunker) prepareChunks(isAppend bool, chunkLevel [][]*TreeEntry, data io.Reader, rootKey []byte, quitC chan bool, done func(), chunkC chan *Chunk, swg *sync.WaitGroup) {
   349  	defer done()
   350  	jobC := make(chan *chunkJob, 2*ChunkProcessors)
   351  	defer close(jobC)
   352  
   353  	chunkWG := &sync.WaitGroup{}
   354  	totalDataSize := 0
   355  
   356  	p.incrementWorkerCount()
   357  	if swg != nil {
   358  		swg.Add(1)
   359  	}
   360  	go p.processor(p.workerCount, jobC, chunkC, quitC, swg)
   361  
   362  	parent := NewTreeEntry(p)
   363  	var unFinishedChunk *Chunk
   364  
   365  	if isAppend && len(chunkLevel[0]) != 0 {
   366  		lastIndex := len(chunkLevel[0]) - 1
   367  		ent := chunkLevel[0][lastIndex]
   368  
   369  		if ent.branchCount < p.branches {
   370  			parent = &TreeEntry{
   371  				level:         0,
   372  				branchCount:   ent.branchCount,
   373  				subtreeSize:   ent.subtreeSize,
   374  				chunk:         ent.chunk,
   375  				key:           ent.key,
   376  				index:         lastIndex,
   377  				updatePending: true,
   378  			}
   379  
   380  			lastBranch := parent.branchCount - 1
   381  			lastKey := parent.chunk[8+lastBranch*p.hashSize : 8+(lastBranch+1)*p.hashSize]
   382  
   383  			unFinishedChunk = retrieve(lastKey, chunkC, quitC)
   384  			if unFinishedChunk.Size < p.chunkSize {
   385  
   386  				parent.subtreeSize = parent.subtreeSize - uint64(unFinishedChunk.Size)
   387  				parent.branchCount = parent.branchCount - 1
   388  			} else {
   389  				unFinishedChunk = nil
   390  			}
   391  		}
   392  	}
   393  
   394  	for index := 0; ; index++ {
   395  		var n int
   396  		var err error
   397  		chunkData := make([]byte, p.chunkSize+8)
   398  		if unFinishedChunk != nil {
   399  			copy(chunkData, unFinishedChunk.SData)
   400  			n, err = data.Read(chunkData[8+unFinishedChunk.Size:])
   401  			n += int(unFinishedChunk.Size)
   402  			unFinishedChunk = nil
   403  		} else {
   404  			n, err = data.Read(chunkData[8:])
   405  		}
   406  
   407  		totalDataSize += n
   408  		if err != nil {
   409  			if err == io.EOF || err == io.ErrUnexpectedEOF {
   410  				if parent.branchCount == 1 {
   411  					// Data is exactly one chunk.. pick the last chunk key as root
   412  					chunkWG.Wait()
   413  					lastChunksKey := parent.chunk[8 : 8+p.hashSize]
   414  					copy(rootKey, lastChunksKey)
   415  					break
   416  				}
   417  			} else {
   418  				close(quitC)
   419  				break
   420  			}
   421  		}
   422  
   423  		// Data ended in chunk boundary.. just signal to start bulding tree
   424  		if n == 0 {
   425  			p.buildTree(isAppend, chunkLevel, parent, chunkWG, jobC, quitC, true, rootKey)
   426  			break
   427  		} else {
   428  
   429  			pkey := p.enqueueDataChunk(chunkData, uint64(n), parent, chunkWG, jobC, quitC)
   430  
   431  			// update tree related parent data structures
   432  			parent.subtreeSize += uint64(n)
   433  			parent.branchCount++
   434  
   435  			// Data got exhausted... signal to send any parent tree related chunks
   436  			if int64(n) < p.chunkSize {
   437  
   438  				// only one data chunk .. so dont add any parent chunk
   439  				if parent.branchCount <= 1 {
   440  					chunkWG.Wait()
   441  					copy(rootKey, pkey)
   442  					break
   443  				}
   444  
   445  				p.buildTree(isAppend, chunkLevel, parent, chunkWG, jobC, quitC, true, rootKey)
   446  				break
   447  			}
   448  
   449  			if parent.branchCount == p.branches {
   450  				p.buildTree(isAppend, chunkLevel, parent, chunkWG, jobC, quitC, false, rootKey)
   451  				parent = NewTreeEntry(p)
   452  			}
   453  
   454  		}
   455  
   456  		workers := p.getWorkerCount()
   457  		if int64(len(jobC)) > workers && workers < ChunkProcessors {
   458  			p.incrementWorkerCount()
   459  			if swg != nil {
   460  				swg.Add(1)
   461  			}
   462  			go p.processor(p.workerCount, jobC, chunkC, quitC, swg)
   463  		}
   464  	}
   465  }
   466  
   467  func (p *PyramidChunker) buildTree(isAppend bool, chunkLevel [][]*TreeEntry, ent *TreeEntry, chunkWG *sync.WaitGroup, jobC chan *chunkJob, quitC chan bool, last bool, rootKey []byte) {
   468  	chunkWG.Wait()
   469  	p.enqueueTreeChunk(chunkLevel, ent, chunkWG, jobC, quitC, last)
   470  
   471  	compress := false
   472  	endLvl := p.branches
   473  	for lvl := int64(0); lvl < p.branches; lvl++ {
   474  		lvlCount := int64(len(chunkLevel[lvl]))
   475  		if lvlCount >= p.branches {
   476  			endLvl = lvl + 1
   477  			compress = true
   478  			break
   479  		}
   480  	}
   481  
   482  	if !compress && !last {
   483  		return
   484  	}
   485  
   486  	// Wait for all the keys to be processed before compressing the tree
   487  	chunkWG.Wait()
   488  
   489  	for lvl := int64(ent.level); lvl < endLvl; lvl++ {
   490  
   491  		lvlCount := int64(len(chunkLevel[lvl]))
   492  		if lvlCount == 1 && last {
   493  			copy(rootKey, chunkLevel[lvl][0].key)
   494  			return
   495  		}
   496  
   497  		for startCount := int64(0); startCount < lvlCount; startCount += p.branches {
   498  
   499  			endCount := startCount + p.branches
   500  			if endCount > lvlCount {
   501  				endCount = lvlCount
   502  			}
   503  
   504  			var nextLvlCount int64
   505  			var tempEntry *TreeEntry
   506  			if len(chunkLevel[lvl+1]) > 0 {
   507  				nextLvlCount = int64(len(chunkLevel[lvl+1]) - 1)
   508  				tempEntry = chunkLevel[lvl+1][nextLvlCount]
   509  			}
   510  			if isAppend && tempEntry != nil && tempEntry.updatePending {
   511  				updateEntry := &TreeEntry{
   512  					level:         int(lvl + 1),
   513  					branchCount:   0,
   514  					subtreeSize:   0,
   515  					chunk:         make([]byte, p.chunkSize+8),
   516  					key:           make([]byte, p.hashSize),
   517  					index:         int(nextLvlCount),
   518  					updatePending: true,
   519  				}
   520  				for index := int64(0); index < lvlCount; index++ {
   521  					updateEntry.branchCount++
   522  					updateEntry.subtreeSize += chunkLevel[lvl][index].subtreeSize
   523  					copy(updateEntry.chunk[8+(index*p.hashSize):8+((index+1)*p.hashSize)], chunkLevel[lvl][index].key[:p.hashSize])
   524  				}
   525  
   526  				p.enqueueTreeChunk(chunkLevel, updateEntry, chunkWG, jobC, quitC, last)
   527  
   528  			} else {
   529  
   530  				noOfBranches := endCount - startCount
   531  				newEntry := &TreeEntry{
   532  					level:         int(lvl + 1),
   533  					branchCount:   noOfBranches,
   534  					subtreeSize:   0,
   535  					chunk:         make([]byte, (noOfBranches*p.hashSize)+8),
   536  					key:           make([]byte, p.hashSize),
   537  					index:         int(nextLvlCount),
   538  					updatePending: false,
   539  				}
   540  
   541  				index := int64(0)
   542  				for i := startCount; i < endCount; i++ {
   543  					entry := chunkLevel[lvl][i]
   544  					newEntry.subtreeSize += entry.subtreeSize
   545  					copy(newEntry.chunk[8+(index*p.hashSize):8+((index+1)*p.hashSize)], entry.key[:p.hashSize])
   546  					index++
   547  				}
   548  
   549  				p.enqueueTreeChunk(chunkLevel, newEntry, chunkWG, jobC, quitC, last)
   550  
   551  			}
   552  
   553  		}
   554  
   555  		if !isAppend {
   556  			chunkWG.Wait()
   557  			if compress {
   558  				chunkLevel[lvl] = nil
   559  			}
   560  		}
   561  	}
   562  
   563  }
   564  
   565  func (p *PyramidChunker) enqueueTreeChunk(chunkLevel [][]*TreeEntry, ent *TreeEntry, chunkWG *sync.WaitGroup, jobC chan *chunkJob, quitC chan bool, last bool) {
   566  	if ent != nil {
   567  
   568  		// wait for data chunks to get over before processing the tree chunk
   569  		if last {
   570  			chunkWG.Wait()
   571  		}
   572  
   573  		binary.LittleEndian.PutUint64(ent.chunk[:8], ent.subtreeSize)
   574  		ent.key = make([]byte, p.hashSize)
   575  		chunkWG.Add(1)
   576  		select {
   577  		case jobC <- &chunkJob{ent.key, ent.chunk[:ent.branchCount*p.hashSize+8], int64(ent.subtreeSize), chunkWG.Done, TreeChunk, 0}:
   578  		case <-quitC:
   579  		}
   580  
   581  		// Update or append based on weather it is a new entry or being reused
   582  		if ent.updatePending {
   583  			chunkWG.Wait()
   584  			chunkLevel[ent.level][ent.index] = ent
   585  		} else {
   586  			chunkLevel[ent.level] = append(chunkLevel[ent.level], ent)
   587  		}
   588  
   589  	}
   590  }
   591  
   592  func (p *PyramidChunker) enqueueDataChunk(chunkData []byte, size uint64, parent *TreeEntry, chunkWG *sync.WaitGroup, jobC chan *chunkJob, quitC chan bool) Key {
   593  	binary.LittleEndian.PutUint64(chunkData[:8], size)
   594  	pkey := parent.chunk[8+parent.branchCount*p.hashSize : 8+(parent.branchCount+1)*p.hashSize]
   595  
   596  	chunkWG.Add(1)
   597  	select {
   598  	case jobC <- &chunkJob{pkey, chunkData[:size+8], int64(size), chunkWG.Done, DataChunk, -1}:
   599  	case <-quitC:
   600  	}
   601  
   602  	return pkey
   603  
   604  }