github.com/memikequinn/go-ethereum@v1.6.6-0.20170621145815-58a1e13e6dd7/swarm/storage/pyramid.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package storage 18 19 import ( 20 "encoding/binary" 21 "fmt" 22 "io" 23 "math" 24 "strings" 25 "sync" 26 27 "github.com/ethereum/go-ethereum/common" 28 ) 29 30 const ( 31 processors = 8 32 ) 33 34 type Tree struct { 35 Chunks int64 36 Levels []map[int64]*Node 37 Lock sync.RWMutex 38 } 39 40 type Node struct { 41 Pending int64 42 Size uint64 43 Children []common.Hash 44 Last bool 45 } 46 47 func (self *Node) String() string { 48 var children []string 49 for _, node := range self.Children { 50 children = append(children, node.Hex()) 51 } 52 return fmt.Sprintf("pending: %v, size: %v, last :%v, children: %v", self.Pending, self.Size, self.Last, strings.Join(children, ", ")) 53 } 54 55 type Task struct { 56 Index int64 // Index of the chunk being processed 57 Size uint64 58 Data []byte // Binary blob of the chunk 59 Last bool 60 } 61 62 type PyramidChunker struct { 63 hashFunc Hasher 64 chunkSize int64 65 hashSize int64 66 branches int64 67 workerCount int 68 } 69 70 func NewPyramidChunker(params *ChunkerParams) (self *PyramidChunker) { 71 self = &PyramidChunker{} 72 self.hashFunc = MakeHashFunc(params.Hash) 73 self.branches = params.Branches 74 self.hashSize = int64(self.hashFunc().Size()) 75 self.chunkSize = self.hashSize * self.branches 76 self.workerCount = 1 77 return 78 } 79 80 func (self *PyramidChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) { 81 82 chunks := (size + self.chunkSize - 1) / self.chunkSize 83 depth := int(math.Ceil(math.Log(float64(chunks))/math.Log(float64(self.branches)))) + 1 84 85 results := Tree{ 86 Chunks: chunks, 87 Levels: make([]map[int64]*Node, depth), 88 } 89 for i := 0; i < depth; i++ { 90 results.Levels[i] = make(map[int64]*Node) 91 } 92 // Create a pool of workers to crunch through the file 93 tasks := make(chan *Task, 2*processors) 94 pend := new(sync.WaitGroup) 95 abortC := make(chan bool) 96 for i := 0; i < processors; i++ { 97 pend.Add(1) 98 go self.processor(pend, swg, tasks, chunkC, &results) 99 } 100 // Feed the chunks into the task pool 101 read := 0 102 for index := 0; ; index++ { 103 buffer := make([]byte, self.chunkSize+8) 104 n, err := data.Read(buffer[8:]) 105 read += n 106 last := int64(read) == size || err == io.ErrUnexpectedEOF || err == io.EOF 107 if err != nil && !last { 108 close(abortC) 109 break 110 } 111 binary.LittleEndian.PutUint64(buffer[:8], uint64(n)) 112 pend.Add(1) 113 select { 114 case tasks <- &Task{Index: int64(index), Size: uint64(n), Data: buffer[:n+8], Last: last}: 115 case <-abortC: 116 return nil, err 117 } 118 if last { 119 break 120 } 121 } 122 // Wait for the workers and return 123 close(tasks) 124 pend.Wait() 125 126 key := results.Levels[0][0].Children[0][:] 127 return key, nil 128 } 129 130 func (self *PyramidChunker) processor(pend, swg *sync.WaitGroup, tasks chan *Task, chunkC chan *Chunk, results *Tree) { 131 defer pend.Done() 132 133 // Start processing leaf chunks ad infinitum 134 hasher := self.hashFunc() 135 for task := range tasks { 136 depth, pow := len(results.Levels)-1, self.branches 137 size := task.Size 138 data := task.Data 139 var node *Node 140 for depth >= 0 { 141 // New chunk received, reset the hasher and start processing 142 hasher.Reset() 143 if node == nil { // Leaf node, hash the data chunk 144 hasher.Write(task.Data) 145 } else { // Internal node, hash the children 146 size = node.Size 147 data = make([]byte, hasher.Size()*len(node.Children)+8) 148 binary.LittleEndian.PutUint64(data[:8], size) 149 150 hasher.Write(data[:8]) 151 for i, hash := range node.Children { 152 copy(data[i*hasher.Size()+8:], hash[:]) 153 hasher.Write(hash[:]) 154 } 155 } 156 hash := hasher.Sum(nil) 157 last := task.Last || (node != nil) && node.Last 158 // Insert the subresult into the memoization tree 159 results.Lock.Lock() 160 if node = results.Levels[depth][task.Index/pow]; node == nil { 161 // Figure out the pending tasks 162 pending := self.branches 163 if task.Index/pow == results.Chunks/pow { 164 pending = (results.Chunks + pow/self.branches - 1) / (pow / self.branches) % self.branches 165 } 166 node = &Node{pending, 0, make([]common.Hash, pending), last} 167 results.Levels[depth][task.Index/pow] = node 168 } 169 node.Pending-- 170 i := task.Index / (pow / self.branches) % self.branches 171 if last { 172 node.Last = true 173 } 174 copy(node.Children[i][:], hash) 175 node.Size += size 176 left := node.Pending 177 if chunkC != nil { 178 if swg != nil { 179 swg.Add(1) 180 } 181 select { 182 case chunkC <- &Chunk{Key: hash, SData: data, wg: swg}: 183 // case <- self.quitC 184 } 185 } 186 if depth+1 < len(results.Levels) { 187 delete(results.Levels[depth+1], task.Index/(pow/self.branches)) 188 } 189 190 results.Lock.Unlock() 191 // If there's more work to be done, leave for others 192 if left > 0 { 193 break 194 } 195 // We're the last ones in this batch, merge the children together 196 depth-- 197 pow *= self.branches 198 } 199 pend.Done() 200 } 201 }