github.com/alanchchen/go-ethereum@v1.6.6-0.20170601190819-6171d01b1195/swarm/storage/pyramid.go (about)

     1  // Copyright 2016 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package storage
    18  
    19  import (
    20  	"encoding/binary"
    21  	"fmt"
    22  	"io"
    23  	"math"
    24  	"strings"
    25  	"sync"
    26  
    27  	"github.com/ethereum/go-ethereum/common"
    28  )
    29  
    30  const (
    31  	processors = 8
    32  )
    33  
    34  type Tree struct {
    35  	Chunks int64
    36  	Levels []map[int64]*Node
    37  	Lock   sync.RWMutex
    38  }
    39  
    40  type Node struct {
    41  	Pending  int64
    42  	Size     uint64
    43  	Children []common.Hash
    44  	Last     bool
    45  }
    46  
    47  func (self *Node) String() string {
    48  	var children []string
    49  	for _, node := range self.Children {
    50  		children = append(children, node.Hex())
    51  	}
    52  	return fmt.Sprintf("pending: %v, size: %v, last :%v, children: %v", self.Pending, self.Size, self.Last, strings.Join(children, ", "))
    53  }
    54  
    55  type Task struct {
    56  	Index int64 // Index of the chunk being processed
    57  	Size  uint64
    58  	Data  []byte // Binary blob of the chunk
    59  	Last  bool
    60  }
    61  
    62  type PyramidChunker struct {
    63  	hashFunc    Hasher
    64  	chunkSize   int64
    65  	hashSize    int64
    66  	branches    int64
    67  	workerCount int
    68  }
    69  
    70  func NewPyramidChunker(params *ChunkerParams) (self *PyramidChunker) {
    71  	self = &PyramidChunker{}
    72  	self.hashFunc = MakeHashFunc(params.Hash)
    73  	self.branches = params.Branches
    74  	self.hashSize = int64(self.hashFunc().Size())
    75  	self.chunkSize = self.hashSize * self.branches
    76  	self.workerCount = 1
    77  	return
    78  }
    79  
    80  func (self *PyramidChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) {
    81  
    82  	chunks := (size + self.chunkSize - 1) / self.chunkSize
    83  	depth := int(math.Ceil(math.Log(float64(chunks))/math.Log(float64(self.branches)))) + 1
    84  
    85  	results := Tree{
    86  		Chunks: chunks,
    87  		Levels: make([]map[int64]*Node, depth),
    88  	}
    89  	for i := 0; i < depth; i++ {
    90  		results.Levels[i] = make(map[int64]*Node)
    91  	}
    92  	// Create a pool of workers to crunch through the file
    93  	tasks := make(chan *Task, 2*processors)
    94  	pend := new(sync.WaitGroup)
    95  	abortC := make(chan bool)
    96  	for i := 0; i < processors; i++ {
    97  		pend.Add(1)
    98  		go self.processor(pend, swg, tasks, chunkC, &results)
    99  	}
   100  	// Feed the chunks into the task pool
   101  	read := 0
   102  	for index := 0; ; index++ {
   103  		buffer := make([]byte, self.chunkSize+8)
   104  		n, err := data.Read(buffer[8:])
   105  		read += n
   106  		last := int64(read) == size || err == io.ErrUnexpectedEOF || err == io.EOF
   107  		if err != nil && !last {
   108  			close(abortC)
   109  			break
   110  		}
   111  		binary.LittleEndian.PutUint64(buffer[:8], uint64(n))
   112  		pend.Add(1)
   113  		select {
   114  		case tasks <- &Task{Index: int64(index), Size: uint64(n), Data: buffer[:n+8], Last: last}:
   115  		case <-abortC:
   116  			return nil, err
   117  		}
   118  		if last {
   119  			break
   120  		}
   121  	}
   122  	// Wait for the workers and return
   123  	close(tasks)
   124  	pend.Wait()
   125  
   126  	key := results.Levels[0][0].Children[0][:]
   127  	return key, nil
   128  }
   129  
   130  func (self *PyramidChunker) processor(pend, swg *sync.WaitGroup, tasks chan *Task, chunkC chan *Chunk, results *Tree) {
   131  	defer pend.Done()
   132  
   133  	// Start processing leaf chunks ad infinitum
   134  	hasher := self.hashFunc()
   135  	for task := range tasks {
   136  		depth, pow := len(results.Levels)-1, self.branches
   137  		size := task.Size
   138  		data := task.Data
   139  		var node *Node
   140  		for depth >= 0 {
   141  			// New chunk received, reset the hasher and start processing
   142  			hasher.Reset()
   143  			if node == nil { // Leaf node, hash the data chunk
   144  				hasher.Write(task.Data)
   145  			} else { // Internal node, hash the children
   146  				size = node.Size
   147  				data = make([]byte, hasher.Size()*len(node.Children)+8)
   148  				binary.LittleEndian.PutUint64(data[:8], size)
   149  
   150  				hasher.Write(data[:8])
   151  				for i, hash := range node.Children {
   152  					copy(data[i*hasher.Size()+8:], hash[:])
   153  					hasher.Write(hash[:])
   154  				}
   155  			}
   156  			hash := hasher.Sum(nil)
   157  			last := task.Last || (node != nil) && node.Last
   158  			// Insert the subresult into the memoization tree
   159  			results.Lock.Lock()
   160  			if node = results.Levels[depth][task.Index/pow]; node == nil {
   161  				// Figure out the pending tasks
   162  				pending := self.branches
   163  				if task.Index/pow == results.Chunks/pow {
   164  					pending = (results.Chunks + pow/self.branches - 1) / (pow / self.branches) % self.branches
   165  				}
   166  				node = &Node{pending, 0, make([]common.Hash, pending), last}
   167  				results.Levels[depth][task.Index/pow] = node
   168  			}
   169  			node.Pending--
   170  			i := task.Index / (pow / self.branches) % self.branches
   171  			if last {
   172  				node.Last = true
   173  			}
   174  			copy(node.Children[i][:], hash)
   175  			node.Size += size
   176  			left := node.Pending
   177  			if chunkC != nil {
   178  				if swg != nil {
   179  					swg.Add(1)
   180  				}
   181  				select {
   182  				case chunkC <- &Chunk{Key: hash, SData: data, wg: swg}:
   183  					// case <- self.quitC
   184  				}
   185  			}
   186  			if depth+1 < len(results.Levels) {
   187  				delete(results.Levels[depth+1], task.Index/(pow/self.branches))
   188  			}
   189  
   190  			results.Lock.Unlock()
   191  			// If there's more work to be done, leave for others
   192  			if left > 0 {
   193  				break
   194  			}
   195  			// We're the last ones in this batch, merge the children together
   196  			depth--
   197  			pow *= self.branches
   198  		}
   199  		pend.Done()
   200  	}
   201  }