github.com/sberex/go-sberex@v1.8.2-0.20181113200658-ed96ac38f7d7/bmt/bmt.go (about) 1 // This file is part of the go-sberex library. The go-sberex library is 2 // free software: you can redistribute it and/or modify it under the terms 3 // of the GNU Lesser General Public License as published by the Free 4 // Software Foundation, either version 3 of the License, or (at your option) 5 // any later version. 6 // 7 // The go-sberex library is distributed in the hope that it will be useful, 8 // but WITHOUT ANY WARRANTY; without even the implied warranty of 9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser 10 // General Public License <http://www.gnu.org/licenses/> for more details. 11 12 // Package bmt provides a binary merkle tree implementation 13 package bmt 14 15 import ( 16 "fmt" 17 "hash" 18 "io" 19 "strings" 20 "sync" 21 "sync/atomic" 22 ) 23 24 /* 25 Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size 26 It is defined as the root hash of the binary merkle tree built over fixed size segments 27 of the underlying chunk using any base hash function (e.g keccak 256 SHA3) 28 29 It is used as the chunk hash function in swarm which in turn is the basis for the 30 128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash 31 32 The BMT is optimal for providing compact inclusion proofs, i.e. prove that a 33 segment is a substring of a chunk starting at a particular offset 34 The size of the underlying segments is fixed at 32 bytes (called the resolution 35 of the BMT hash), the EVM word size to optimize for on-chain BMT verification 36 as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash. 37 38 Two implementations are provided: 39 40 * RefHasher is optimized for code simplicity and meant as a reference implementation 41 * Hasher is optimized for speed taking advantage of concurrency with minimalistic 42 control structure to coordinate the concurrent routines 43 It implements the ChunkHash interface as well as the go standard hash.Hash interface 44 45 */ 46 47 const ( 48 // DefaultSegmentCount is the maximum number of segments of the underlying chunk 49 DefaultSegmentCount = 128 // Should be equal to storage.DefaultBranches 50 // DefaultPoolSize is the maximum number of bmt trees used by the hashers, i.e, 51 // the maximum number of concurrent BMT hashing operations performed by the same hasher 52 DefaultPoolSize = 8 53 ) 54 55 // BaseHasher is a hash.Hash constructor function used for the base hash of the BMT. 56 type BaseHasher func() hash.Hash 57 58 // Hasher a reusable hasher for fixed maximum size chunks representing a BMT 59 // implements the hash.Hash interface 60 // reuse pool of Tree-s for amortised memory allocation and resource control 61 // supports order-agnostic concurrent segment writes 62 // as well as sequential read and write 63 // can not be called concurrently on more than one chunk 64 // can be further appended after Sum 65 // Reset gives back the Tree to the pool and guaranteed to leave 66 // the tree and itself in a state reusable for hashing a new chunk 67 type Hasher struct { 68 pool *TreePool // BMT resource pool 69 bmt *Tree // prebuilt BMT resource for flowcontrol and proofs 70 blocksize int // segment size (size of hash) also for hash.Hash 71 count int // segment count 72 size int // for hash.Hash same as hashsize 73 cur int // cursor position for righmost currently open chunk 74 segment []byte // the rightmost open segment (not complete) 75 depth int // index of last level 76 result chan []byte // result channel 77 hash []byte // to record the result 78 max int32 // max segments for SegmentWriter interface 79 blockLength []byte // The block length that needes to be added in Sum 80 } 81 82 // New creates a reusable Hasher 83 // implements the hash.Hash interface 84 // pulls a new Tree from a resource pool for hashing each chunk 85 func New(p *TreePool) *Hasher { 86 return &Hasher{ 87 pool: p, 88 depth: depth(p.SegmentCount), 89 size: p.SegmentSize, 90 blocksize: p.SegmentSize, 91 count: p.SegmentCount, 92 result: make(chan []byte), 93 } 94 } 95 96 // Node is a reuseable segment hasher representing a node in a BMT 97 // it allows for continued writes after a Sum 98 // and is left in completely reusable state after Reset 99 type Node struct { 100 level, index int // position of node for information/logging only 101 initial bool // first and last node 102 root bool // whether the node is root to a smaller BMT 103 isLeft bool // whether it is left side of the parent double segment 104 unbalanced bool // indicates if a node has only the left segment 105 parent *Node // BMT connections 106 state int32 // atomic increment impl concurrent boolean toggle 107 left, right []byte 108 } 109 110 // NewNode constructor for segment hasher nodes in the BMT 111 func NewNode(level, index int, parent *Node) *Node { 112 return &Node{ 113 parent: parent, 114 level: level, 115 index: index, 116 initial: index == 0, 117 isLeft: index%2 == 0, 118 } 119 } 120 121 // TreePool provides a pool of Trees used as resources by Hasher 122 // a Tree popped from the pool is guaranteed to have clean state 123 // for hashing a new chunk 124 // Hasher Reset releases the Tree to the pool 125 type TreePool struct { 126 lock sync.Mutex 127 c chan *Tree 128 hasher BaseHasher 129 SegmentSize int 130 SegmentCount int 131 Capacity int 132 count int 133 } 134 135 // NewTreePool creates a Tree pool with hasher, segment size, segment count and capacity 136 // on GetTree it reuses free Trees or creates a new one if size is not reached 137 func NewTreePool(hasher BaseHasher, segmentCount, capacity int) *TreePool { 138 return &TreePool{ 139 c: make(chan *Tree, capacity), 140 hasher: hasher, 141 SegmentSize: hasher().Size(), 142 SegmentCount: segmentCount, 143 Capacity: capacity, 144 } 145 } 146 147 // Drain drains the pool uptil it has no more than n resources 148 func (self *TreePool) Drain(n int) { 149 self.lock.Lock() 150 defer self.lock.Unlock() 151 for len(self.c) > n { 152 <-self.c 153 self.count-- 154 } 155 } 156 157 // Reserve is blocking until it returns an available Tree 158 // it reuses free Trees or creates a new one if size is not reached 159 func (self *TreePool) Reserve() *Tree { 160 self.lock.Lock() 161 defer self.lock.Unlock() 162 var t *Tree 163 if self.count == self.Capacity { 164 return <-self.c 165 } 166 select { 167 case t = <-self.c: 168 default: 169 t = NewTree(self.hasher, self.SegmentSize, self.SegmentCount) 170 self.count++ 171 } 172 return t 173 } 174 175 // Release gives back a Tree to the pool. 176 // This Tree is guaranteed to be in reusable state 177 // does not need locking 178 func (self *TreePool) Release(t *Tree) { 179 self.c <- t // can never fail but... 180 } 181 182 // Tree is a reusable control structure representing a BMT 183 // organised in a binary tree 184 // Hasher uses a TreePool to pick one for each chunk hash 185 // the Tree is 'locked' while not in the pool 186 type Tree struct { 187 leaves []*Node 188 } 189 190 // Draw draws the BMT (badly) 191 func (self *Tree) Draw(hash []byte, d int) string { 192 var left, right []string 193 var anc []*Node 194 for i, n := range self.leaves { 195 left = append(left, fmt.Sprintf("%v", hashstr(n.left))) 196 if i%2 == 0 { 197 anc = append(anc, n.parent) 198 } 199 right = append(right, fmt.Sprintf("%v", hashstr(n.right))) 200 } 201 anc = self.leaves 202 var hashes [][]string 203 for l := 0; len(anc) > 0; l++ { 204 var nodes []*Node 205 hash := []string{""} 206 for i, n := range anc { 207 hash = append(hash, fmt.Sprintf("%v|%v", hashstr(n.left), hashstr(n.right))) 208 if i%2 == 0 && n.parent != nil { 209 nodes = append(nodes, n.parent) 210 } 211 } 212 hash = append(hash, "") 213 hashes = append(hashes, hash) 214 anc = nodes 215 } 216 hashes = append(hashes, []string{"", fmt.Sprintf("%v", hashstr(hash)), ""}) 217 total := 60 218 del := " " 219 var rows []string 220 for i := len(hashes) - 1; i >= 0; i-- { 221 var textlen int 222 hash := hashes[i] 223 for _, s := range hash { 224 textlen += len(s) 225 } 226 if total < textlen { 227 total = textlen + len(hash) 228 } 229 delsize := (total - textlen) / (len(hash) - 1) 230 if delsize > len(del) { 231 delsize = len(del) 232 } 233 row := fmt.Sprintf("%v: %v", len(hashes)-i-1, strings.Join(hash, del[:delsize])) 234 rows = append(rows, row) 235 236 } 237 rows = append(rows, strings.Join(left, " ")) 238 rows = append(rows, strings.Join(right, " ")) 239 return strings.Join(rows, "\n") + "\n" 240 } 241 242 // NewTree initialises the Tree by building up the nodes of a BMT 243 // segment size is stipulated to be the size of the hash 244 // segmentCount needs to be positive integer and does not need to be 245 // a power of two and can even be an odd number 246 // segmentSize * segmentCount determines the maximum chunk size 247 // hashed using the tree 248 func NewTree(hasher BaseHasher, segmentSize, segmentCount int) *Tree { 249 n := NewNode(0, 0, nil) 250 n.root = true 251 prevlevel := []*Node{n} 252 // iterate over levels and creates 2^level nodes 253 level := 1 254 count := 2 255 for d := 1; d <= depth(segmentCount); d++ { 256 nodes := make([]*Node, count) 257 for i := 0; i < len(nodes); i++ { 258 parent := prevlevel[i/2] 259 t := NewNode(level, i, parent) 260 nodes[i] = t 261 } 262 prevlevel = nodes 263 level++ 264 count *= 2 265 } 266 // the datanode level is the nodes on the last level where 267 return &Tree{ 268 leaves: prevlevel, 269 } 270 } 271 272 // methods needed by hash.Hash 273 274 // Size returns the size 275 func (self *Hasher) Size() int { 276 return self.size 277 } 278 279 // BlockSize returns the block size 280 func (self *Hasher) BlockSize() int { 281 return self.blocksize 282 } 283 284 // Sum returns the hash of the buffer 285 // hash.Hash interface Sum method appends the byte slice to the underlying 286 // data before it calculates and returns the hash of the chunk 287 func (self *Hasher) Sum(b []byte) (r []byte) { 288 t := self.bmt 289 i := self.cur 290 n := t.leaves[i] 291 j := i 292 // must run strictly before all nodes calculate 293 // datanodes are guaranteed to have a parent 294 if len(self.segment) > self.size && i > 0 && n.parent != nil { 295 n = n.parent 296 } else { 297 i *= 2 298 } 299 d := self.finalise(n, i) 300 self.writeSegment(j, self.segment, d) 301 c := <-self.result 302 self.releaseTree() 303 304 // sha3(length + BMT(pure_chunk)) 305 if self.blockLength == nil { 306 return c 307 } 308 res := self.pool.hasher() 309 res.Reset() 310 res.Write(self.blockLength) 311 res.Write(c) 312 return res.Sum(nil) 313 } 314 315 // Hasher implements the SwarmHash interface 316 317 // Hash waits for the hasher result and returns it 318 // caller must call this on a BMT Hasher being written to 319 func (self *Hasher) Hash() []byte { 320 return <-self.result 321 } 322 323 // Hasher implements the io.Writer interface 324 325 // Write fills the buffer to hash 326 // with every full segment complete launches a hasher go routine 327 // that shoots up the BMT 328 func (self *Hasher) Write(b []byte) (int, error) { 329 l := len(b) 330 if l <= 0 { 331 return 0, nil 332 } 333 s := self.segment 334 i := self.cur 335 count := (self.count + 1) / 2 336 need := self.count*self.size - self.cur*2*self.size 337 size := self.size 338 if need > size { 339 size *= 2 340 } 341 if l < need { 342 need = l 343 } 344 // calculate missing bit to complete current open segment 345 rest := size - len(s) 346 if need < rest { 347 rest = need 348 } 349 s = append(s, b[:rest]...) 350 need -= rest 351 // read full segments and the last possibly partial segment 352 for need > 0 && i < count-1 { 353 // push all finished chunks we read 354 self.writeSegment(i, s, self.depth) 355 need -= size 356 if need < 0 { 357 size += need 358 } 359 s = b[rest : rest+size] 360 rest += size 361 i++ 362 } 363 self.segment = s 364 self.cur = i 365 // otherwise, we can assume len(s) == 0, so all buffer is read and chunk is not yet full 366 return l, nil 367 } 368 369 // Hasher implements the io.ReaderFrom interface 370 371 // ReadFrom reads from io.Reader and appends to the data to hash using Write 372 // it reads so that chunk to hash is maximum length or reader reaches EOF 373 // caller must Reset the hasher prior to call 374 func (self *Hasher) ReadFrom(r io.Reader) (m int64, err error) { 375 bufsize := self.size*self.count - self.size*self.cur - len(self.segment) 376 buf := make([]byte, bufsize) 377 var read int 378 for { 379 var n int 380 n, err = r.Read(buf) 381 read += n 382 if err == io.EOF || read == len(buf) { 383 hash := self.Sum(buf[:n]) 384 if read == len(buf) { 385 err = NewEOC(hash) 386 } 387 break 388 } 389 if err != nil { 390 break 391 } 392 n, err = self.Write(buf[:n]) 393 if err != nil { 394 break 395 } 396 } 397 return int64(read), err 398 } 399 400 // Reset needs to be called before writing to the hasher 401 func (self *Hasher) Reset() { 402 self.getTree() 403 self.blockLength = nil 404 } 405 406 // Hasher implements the SwarmHash interface 407 408 // ResetWithLength needs to be called before writing to the hasher 409 // the argument is supposed to be the byte slice binary representation of 410 // the legth of the data subsumed under the hash 411 func (self *Hasher) ResetWithLength(l []byte) { 412 self.Reset() 413 self.blockLength = l 414 415 } 416 417 // Release gives back the Tree to the pool whereby it unlocks 418 // it resets tree, segment and index 419 func (self *Hasher) releaseTree() { 420 if self.bmt != nil { 421 n := self.bmt.leaves[self.cur] 422 for ; n != nil; n = n.parent { 423 n.unbalanced = false 424 if n.parent != nil { 425 n.root = false 426 } 427 } 428 self.pool.Release(self.bmt) 429 self.bmt = nil 430 431 } 432 self.cur = 0 433 self.segment = nil 434 } 435 436 func (self *Hasher) writeSegment(i int, s []byte, d int) { 437 h := self.pool.hasher() 438 n := self.bmt.leaves[i] 439 440 if len(s) > self.size && n.parent != nil { 441 go func() { 442 h.Reset() 443 h.Write(s) 444 s = h.Sum(nil) 445 446 if n.root { 447 self.result <- s 448 return 449 } 450 self.run(n.parent, h, d, n.index, s) 451 }() 452 return 453 } 454 go self.run(n, h, d, i*2, s) 455 } 456 457 func (self *Hasher) run(n *Node, h hash.Hash, d int, i int, s []byte) { 458 isLeft := i%2 == 0 459 for { 460 if isLeft { 461 n.left = s 462 } else { 463 n.right = s 464 } 465 if !n.unbalanced && n.toggle() { 466 return 467 } 468 if !n.unbalanced || !isLeft || i == 0 && d == 0 { 469 h.Reset() 470 h.Write(n.left) 471 h.Write(n.right) 472 s = h.Sum(nil) 473 474 } else { 475 s = append(n.left, n.right...) 476 } 477 478 self.hash = s 479 if n.root { 480 self.result <- s 481 return 482 } 483 484 isLeft = n.isLeft 485 n = n.parent 486 i++ 487 } 488 } 489 490 // getTree obtains a BMT resource by reserving one from the pool 491 func (self *Hasher) getTree() *Tree { 492 if self.bmt != nil { 493 return self.bmt 494 } 495 t := self.pool.Reserve() 496 self.bmt = t 497 return t 498 } 499 500 // atomic bool toggle implementing a concurrent reusable 2-state object 501 // atomic addint with %2 implements atomic bool toggle 502 // it returns true if the toggler just put it in the active/waiting state 503 func (self *Node) toggle() bool { 504 return atomic.AddInt32(&self.state, 1)%2 == 1 505 } 506 507 func hashstr(b []byte) string { 508 end := len(b) 509 if end > 4 { 510 end = 4 511 } 512 return fmt.Sprintf("%x", b[:end]) 513 } 514 515 func depth(n int) (d int) { 516 for l := (n - 1) / 2; l > 0; l /= 2 { 517 d++ 518 } 519 return d 520 } 521 522 // finalise is following the zigzags on the tree belonging 523 // to the final datasegment 524 func (self *Hasher) finalise(n *Node, i int) (d int) { 525 isLeft := i%2 == 0 526 for { 527 // when the final segment's path is going via left segments 528 // the incoming data is pushed to the parent upon pulling the left 529 // we do not need toogle the state since this condition is 530 // detectable 531 n.unbalanced = isLeft 532 n.right = nil 533 if n.initial { 534 n.root = true 535 return d 536 } 537 isLeft = n.isLeft 538 n = n.parent 539 d++ 540 } 541 } 542 543 // EOC (end of chunk) implements the error interface 544 type EOC struct { 545 Hash []byte // read the hash of the chunk off the error 546 } 547 548 // Error returns the error string 549 func (self *EOC) Error() string { 550 return fmt.Sprintf("hasher limit reached, chunk hash: %x", self.Hash) 551 } 552 553 // NewEOC creates new end of chunk error with the hash 554 func NewEOC(hash []byte) *EOC { 555 return &EOC{hash} 556 }