github.com/alanchchen/go-ethereum@v1.6.6-0.20170601190819-6171d01b1195/swarm/storage/chunker.go (about) 1 // Copyright 2016 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package storage 18 19 import ( 20 "encoding/binary" 21 "errors" 22 "fmt" 23 "hash" 24 "io" 25 "sync" 26 ) 27 28 /* 29 The distributed storage implemented in this package requires fix sized chunks of content. 30 31 Chunker is the interface to a component that is responsible for disassembling and assembling larger data. 32 33 TreeChunker implements a Chunker based on a tree structure defined as follows: 34 35 1 each node in the tree including the root and other branching nodes are stored as a chunk. 36 37 2 branching nodes encode data contents that includes the size of the dataslice covered by its entire subtree under the node as well as the hash keys of all its children : 38 data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1} 39 40 3 Leaf nodes encode an actual subslice of the input data. 41 42 4 if data size is not more than maximum chunksize, the data is stored in a single chunk 43 key = hash(int64(size) + data) 44 45 5 if data size is more than chunksize*branches^l, but no more than chunksize* 46 branches^(l+1), the data vector is split into slices of chunksize* 47 branches^l length (except the last one). 48 key = hash(int64(size) + key(slice0) + key(slice1) + ...) 49 50 The underlying hash function is configurable 51 */ 52 53 const ( 54 defaultHash = "SHA3" // http://golang.org/pkg/hash/#Hash 55 // defaultHash = "SHA256" // http://golang.org/pkg/hash/#Hash 56 defaultBranches int64 = 128 57 // hashSize int64 = hasherfunc.New().Size() // hasher knows about its own length in bytes 58 // chunksize int64 = branches * hashSize // chunk is defined as this 59 ) 60 61 /* 62 Tree chunker is a concrete implementation of data chunking. 63 This chunker works in a simple way, it builds a tree out of the document so that each node either represents a chunk of real data or a chunk of data representing an branching non-leaf node of the tree. In particular each such non-leaf chunk will represent is a concatenation of the hash of its respective children. This scheme simultaneously guarantees data integrity as well as self addressing. Abstract nodes are transparent since their represented size component is strictly greater than their maximum data size, since they encode a subtree. 64 65 If all is well it is possible to implement this by simply composing readers so that no extra allocation or buffering is necessary for the data splitting and joining. This means that in principle there can be direct IO between : memory, file system, network socket (bzz peers storage request is read from the socket). In practice there may be need for several stages of internal buffering. 66 The hashing itself does use extra copies and allocation though, since it does need it. 67 */ 68 69 type ChunkerParams struct { 70 Branches int64 71 Hash string 72 } 73 74 func NewChunkerParams() *ChunkerParams { 75 return &ChunkerParams{ 76 Branches: defaultBranches, 77 Hash: defaultHash, 78 } 79 } 80 81 type TreeChunker struct { 82 branches int64 83 hashFunc Hasher 84 // calculated 85 hashSize int64 // self.hashFunc.New().Size() 86 chunkSize int64 // hashSize* branches 87 workerCount int 88 } 89 90 func NewTreeChunker(params *ChunkerParams) (self *TreeChunker) { 91 self = &TreeChunker{} 92 self.hashFunc = MakeHashFunc(params.Hash) 93 self.branches = params.Branches 94 self.hashSize = int64(self.hashFunc().Size()) 95 self.chunkSize = self.hashSize * self.branches 96 self.workerCount = 1 97 return 98 } 99 100 // func (self *TreeChunker) KeySize() int64 { 101 // return self.hashSize 102 // } 103 104 // String() for pretty printing 105 func (self *Chunk) String() string { 106 return fmt.Sprintf("Key: %v TreeSize: %v Chunksize: %v", self.Key.Log(), self.Size, len(self.SData)) 107 } 108 109 type hashJob struct { 110 key Key 111 chunk []byte 112 size int64 113 parentWg *sync.WaitGroup 114 } 115 116 func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) { 117 118 if self.chunkSize <= 0 { 119 panic("chunker must be initialised") 120 } 121 122 jobC := make(chan *hashJob, 2*processors) 123 wg := &sync.WaitGroup{} 124 errC := make(chan error) 125 quitC := make(chan bool) 126 127 // wwg = workers waitgroup keeps track of hashworkers spawned by this split call 128 if wwg != nil { 129 wwg.Add(1) 130 } 131 go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg) 132 133 depth := 0 134 treeSize := self.chunkSize 135 136 // takes lowest depth such that chunksize*HashCount^(depth+1) > size 137 // power series, will find the order of magnitude of the data size in base hashCount or numbers of levels of branching in the resulting tree. 138 for ; treeSize < size; treeSize *= self.branches { 139 depth++ 140 } 141 142 key := make([]byte, self.hashFunc().Size()) 143 // this waitgroup member is released after the root hash is calculated 144 wg.Add(1) 145 //launch actual recursive function passing the waitgroups 146 go self.split(depth, treeSize/self.branches, key, data, size, jobC, chunkC, errC, quitC, wg, swg, wwg) 147 148 // closes internal error channel if all subprocesses in the workgroup finished 149 go func() { 150 // waiting for all threads to finish 151 wg.Wait() 152 // if storage waitgroup is non-nil, we wait for storage to finish too 153 if swg != nil { 154 swg.Wait() 155 } 156 close(errC) 157 }() 158 159 select { 160 case err := <-errC: 161 if err != nil { 162 close(quitC) 163 return nil, err 164 } 165 //TODO: add a timeout 166 } 167 return key, nil 168 } 169 170 func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reader, size int64, jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, parentWg, swg, wwg *sync.WaitGroup) { 171 172 for depth > 0 && size < treeSize { 173 treeSize /= self.branches 174 depth-- 175 } 176 177 if depth == 0 { 178 // leaf nodes -> content chunks 179 chunkData := make([]byte, size+8) 180 binary.LittleEndian.PutUint64(chunkData[0:8], uint64(size)) 181 var readBytes int64 182 for readBytes < size { 183 n, err := data.Read(chunkData[8+readBytes:]) 184 readBytes += int64(n) 185 if err != nil && !(err == io.EOF && readBytes == size) { 186 errC <- err 187 return 188 } 189 } 190 select { 191 case jobC <- &hashJob{key, chunkData, size, parentWg}: 192 case <-quitC: 193 } 194 return 195 } 196 // dept > 0 197 // intermediate chunk containing child nodes hashes 198 branchCnt := int64((size + treeSize - 1) / treeSize) 199 200 var chunk []byte = make([]byte, branchCnt*self.hashSize+8) 201 var pos, i int64 202 203 binary.LittleEndian.PutUint64(chunk[0:8], uint64(size)) 204 205 childrenWg := &sync.WaitGroup{} 206 var secSize int64 207 for i < branchCnt { 208 // the last item can have shorter data 209 if size-pos < treeSize { 210 secSize = size - pos 211 } else { 212 secSize = treeSize 213 } 214 // the hash of that data 215 subTreeKey := chunk[8+i*self.hashSize : 8+(i+1)*self.hashSize] 216 217 childrenWg.Add(1) 218 self.split(depth-1, treeSize/self.branches, subTreeKey, data, secSize, jobC, chunkC, errC, quitC, childrenWg, swg, wwg) 219 220 i++ 221 pos += treeSize 222 } 223 // wait for all the children to complete calculating their hashes and copying them onto sections of the chunk 224 // parentWg.Add(1) 225 // go func() { 226 childrenWg.Wait() 227 if len(jobC) > self.workerCount && self.workerCount < processors { 228 if wwg != nil { 229 wwg.Add(1) 230 } 231 self.workerCount++ 232 go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg) 233 } 234 select { 235 case jobC <- &hashJob{key, chunk, size, parentWg}: 236 case <-quitC: 237 } 238 } 239 240 func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, swg, wwg *sync.WaitGroup) { 241 hasher := self.hashFunc() 242 if wwg != nil { 243 defer wwg.Done() 244 } 245 for { 246 select { 247 248 case job, ok := <-jobC: 249 if !ok { 250 return 251 } 252 // now we got the hashes in the chunk, then hash the chunks 253 hasher.Reset() 254 self.hashChunk(hasher, job, chunkC, swg) 255 case <-quitC: 256 return 257 } 258 } 259 } 260 261 // The treeChunkers own Hash hashes together 262 // - the size (of the subtree encoded in the Chunk) 263 // - the Chunk, ie. the contents read from the input reader 264 func (self *TreeChunker) hashChunk(hasher hash.Hash, job *hashJob, chunkC chan *Chunk, swg *sync.WaitGroup) { 265 hasher.Write(job.chunk) 266 h := hasher.Sum(nil) 267 newChunk := &Chunk{ 268 Key: h, 269 SData: job.chunk, 270 Size: job.size, 271 wg: swg, 272 } 273 274 // report hash of this chunk one level up (keys corresponds to the proper subslice of the parent chunk) 275 copy(job.key, h) 276 // send off new chunk to storage 277 if chunkC != nil { 278 if swg != nil { 279 swg.Add(1) 280 } 281 } 282 job.parentWg.Done() 283 284 if chunkC != nil { 285 chunkC <- newChunk 286 } 287 } 288 289 // LazyChunkReader implements LazySectionReader 290 type LazyChunkReader struct { 291 key Key // root key 292 chunkC chan *Chunk // chunk channel to send retrieve requests on 293 chunk *Chunk // size of the entire subtree 294 off int64 // offset 295 chunkSize int64 // inherit from chunker 296 branches int64 // inherit from chunker 297 hashSize int64 // inherit from chunker 298 } 299 300 // implements the Joiner interface 301 func (self *TreeChunker) Join(key Key, chunkC chan *Chunk) LazySectionReader { 302 303 return &LazyChunkReader{ 304 key: key, 305 chunkC: chunkC, 306 chunkSize: self.chunkSize, 307 branches: self.branches, 308 hashSize: self.hashSize, 309 } 310 } 311 312 // Size is meant to be called on the LazySectionReader 313 func (self *LazyChunkReader) Size(quitC chan bool) (n int64, err error) { 314 if self.chunk != nil { 315 return self.chunk.Size, nil 316 } 317 chunk := retrieve(self.key, self.chunkC, quitC) 318 if chunk == nil { 319 select { 320 case <-quitC: 321 return 0, errors.New("aborted") 322 default: 323 return 0, fmt.Errorf("root chunk not found for %v", self.key.Hex()) 324 } 325 } 326 self.chunk = chunk 327 return chunk.Size, nil 328 } 329 330 // read at can be called numerous times 331 // concurrent reads are allowed 332 // Size() needs to be called synchronously on the LazyChunkReader first 333 func (self *LazyChunkReader) ReadAt(b []byte, off int64) (read int, err error) { 334 // this is correct, a swarm doc cannot be zero length, so no EOF is expected 335 if len(b) == 0 { 336 return 0, nil 337 } 338 quitC := make(chan bool) 339 size, err := self.Size(quitC) 340 if err != nil { 341 return 0, err 342 } 343 344 errC := make(chan error) 345 346 // } 347 var treeSize int64 348 var depth int 349 // calculate depth and max treeSize 350 treeSize = self.chunkSize 351 for ; treeSize < size; treeSize *= self.branches { 352 depth++ 353 } 354 wg := sync.WaitGroup{} 355 wg.Add(1) 356 go self.join(b, off, off+int64(len(b)), depth, treeSize/self.branches, self.chunk, &wg, errC, quitC) 357 go func() { 358 wg.Wait() 359 close(errC) 360 }() 361 362 err = <-errC 363 if err != nil { 364 close(quitC) 365 366 return 0, err 367 } 368 if off+int64(len(b)) >= size { 369 return len(b), io.EOF 370 } 371 return len(b), nil 372 } 373 374 func (self *LazyChunkReader) join(b []byte, off int64, eoff int64, depth int, treeSize int64, chunk *Chunk, parentWg *sync.WaitGroup, errC chan error, quitC chan bool) { 375 defer parentWg.Done() 376 // return NewDPA(&LocalStore{}) 377 378 // chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8])) 379 380 // find appropriate block level 381 for chunk.Size < treeSize && depth > 0 { 382 treeSize /= self.branches 383 depth-- 384 } 385 386 // leaf chunk found 387 if depth == 0 { 388 extra := 8 + eoff - int64(len(chunk.SData)) 389 if extra > 0 { 390 eoff -= extra 391 } 392 copy(b, chunk.SData[8+off:8+eoff]) 393 return // simply give back the chunks reader for content chunks 394 } 395 396 // subtree 397 start := off / treeSize 398 end := (eoff + treeSize - 1) / treeSize 399 400 wg := &sync.WaitGroup{} 401 defer wg.Wait() 402 403 for i := start; i < end; i++ { 404 soff := i * treeSize 405 roff := soff 406 seoff := soff + treeSize 407 408 if soff < off { 409 soff = off 410 } 411 if seoff > eoff { 412 seoff = eoff 413 } 414 if depth > 1 { 415 wg.Wait() 416 } 417 wg.Add(1) 418 go func(j int64) { 419 childKey := chunk.SData[8+j*self.hashSize : 8+(j+1)*self.hashSize] 420 chunk := retrieve(childKey, self.chunkC, quitC) 421 if chunk == nil { 422 select { 423 case errC <- fmt.Errorf("chunk %v-%v not found", off, off+treeSize): 424 case <-quitC: 425 } 426 return 427 } 428 if soff < off { 429 soff = off 430 } 431 self.join(b[soff-off:seoff-off], soff-roff, seoff-roff, depth-1, treeSize/self.branches, chunk, wg, errC, quitC) 432 }(i) 433 } //for 434 } 435 436 // the helper method submits chunks for a key to a oueue (DPA) and 437 // block until they time out or arrive 438 // abort if quitC is readable 439 func retrieve(key Key, chunkC chan *Chunk, quitC chan bool) *Chunk { 440 chunk := &Chunk{ 441 Key: key, 442 C: make(chan bool), // close channel to signal data delivery 443 } 444 // submit chunk for retrieval 445 select { 446 case chunkC <- chunk: // submit retrieval request, someone should be listening on the other side (or we will time out globally) 447 case <-quitC: 448 return nil 449 } 450 // waiting for the chunk retrieval 451 select { // chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8])) 452 453 case <-quitC: 454 // this is how we control process leakage (quitC is closed once join is finished (after timeout)) 455 return nil 456 case <-chunk.C: // bells are ringing, data have been delivered 457 } 458 if len(chunk.SData) == 0 { 459 return nil // chunk.Size = int64(binary.LittleEndian.Uint64(chunk.SData[0:8])) 460 461 } 462 return chunk 463 } 464 465 // Read keeps a cursor so cannot be called simulateously, see ReadAt 466 func (self *LazyChunkReader) Read(b []byte) (read int, err error) { 467 read, err = self.ReadAt(b, self.off) 468 469 self.off += int64(read) 470 return 471 } 472 473 // completely analogous to standard SectionReader implementation 474 var errWhence = errors.New("Seek: invalid whence") 475 var errOffset = errors.New("Seek: invalid offset") 476 477 func (s *LazyChunkReader) Seek(offset int64, whence int) (int64, error) { 478 switch whence { 479 default: 480 return 0, errWhence 481 case 0: 482 offset += 0 483 case 1: 484 offset += s.off 485 case 2: 486 if s.chunk == nil { //seek from the end requires rootchunk for size. call Size first 487 _, err := s.Size(nil) 488 if err != nil { 489 return 0, fmt.Errorf("can't get size: %v", err) 490 } 491 } 492 offset += s.chunk.Size 493 } 494 495 if offset < 0 { 496 return 0, errOffset 497 } 498 s.off = offset 499 return offset, nil 500 }