github.com/MaynardMiner/ethereumprogpow@v1.8.23/swarm/bmt/bmt.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package bmt provides a binary merkle tree implementation used for swarm chunk hash 18 package bmt 19 20 import ( 21 "fmt" 22 "hash" 23 "strings" 24 "sync" 25 "sync/atomic" 26 ) 27 28 /* 29 Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size. 30 It is defined as the root hash of the binary merkle tree built over fixed size segments 31 of the underlying chunk using any base hash function (e.g., keccak 256 SHA3). 32 Chunks with data shorter than the fixed size are hashed as if they had zero padding. 33 34 BMT hash is used as the chunk hash function in swarm which in turn is the basis for the 35 128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash 36 37 The BMT is optimal for providing compact inclusion proofs, i.e. prove that a 38 segment is a substring of a chunk starting at a particular offset. 39 The size of the underlying segments is fixed to the size of the base hash (called the resolution 40 of the BMT hash), Using Keccak256 SHA3 hash is 32 bytes, the EVM word size to optimize for on-chain BMT verification 41 as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash. 42 43 Two implementations are provided: 44 45 * RefHasher is optimized for code simplicity and meant as a reference implementation 46 that is simple to understand 47 * Hasher is optimized for speed taking advantage of concurrency with minimalistic 48 control structure to coordinate the concurrent routines 49 50 BMT Hasher implements the following interfaces 51 * standard golang hash.Hash - synchronous, reusable 52 * SwarmHash - SumWithSpan provided 53 * io.Writer - synchronous left-to-right datawriter 54 * AsyncWriter - concurrent section writes and asynchronous Sum call 55 */ 56 57 const ( 58 // PoolSize is the maximum number of bmt trees used by the hashers, i.e, 59 // the maximum number of concurrent BMT hashing operations performed by the same hasher 60 PoolSize = 8 61 ) 62 63 // BaseHasherFunc is a hash.Hash constructor function used for the base hash of the BMT. 64 // implemented by Keccak256 SHA3 sha3.NewLegacyKeccak256 65 type BaseHasherFunc func() hash.Hash 66 67 // Hasher a reusable hasher for fixed maximum size chunks representing a BMT 68 // - implements the hash.Hash interface 69 // - reuses a pool of trees for amortised memory allocation and resource control 70 // - supports order-agnostic concurrent segment writes and section (double segment) writes 71 // as well as sequential read and write 72 // - the same hasher instance must not be called concurrently on more than one chunk 73 // - the same hasher instance is synchronously reuseable 74 // - Sum gives back the tree to the pool and guaranteed to leave 75 // the tree and itself in a state reusable for hashing a new chunk 76 // - generates and verifies segment inclusion proofs (TODO:) 77 type Hasher struct { 78 pool *TreePool // BMT resource pool 79 bmt *tree // prebuilt BMT resource for flowcontrol and proofs 80 } 81 82 // New creates a reusable BMT Hasher that 83 // pulls a new tree from a resource pool for hashing each chunk 84 func New(p *TreePool) *Hasher { 85 return &Hasher{ 86 pool: p, 87 } 88 } 89 90 // TreePool provides a pool of trees used as resources by the BMT Hasher. 91 // A tree popped from the pool is guaranteed to have a clean state ready 92 // for hashing a new chunk. 93 type TreePool struct { 94 lock sync.Mutex 95 c chan *tree // the channel to obtain a resource from the pool 96 hasher BaseHasherFunc // base hasher to use for the BMT levels 97 SegmentSize int // size of leaf segments, stipulated to be = hash size 98 SegmentCount int // the number of segments on the base level of the BMT 99 Capacity int // pool capacity, controls concurrency 100 Depth int // depth of the bmt trees = int(log2(segmentCount))+1 101 Size int // the total length of the data (count * size) 102 count int // current count of (ever) allocated resources 103 zerohashes [][]byte // lookup table for predictable padding subtrees for all levels 104 } 105 106 // NewTreePool creates a tree pool with hasher, segment size, segment count and capacity 107 // on Hasher.getTree it reuses free trees or creates a new one if capacity is not reached 108 func NewTreePool(hasher BaseHasherFunc, segmentCount, capacity int) *TreePool { 109 // initialises the zerohashes lookup table 110 depth := calculateDepthFor(segmentCount) 111 segmentSize := hasher().Size() 112 zerohashes := make([][]byte, depth+1) 113 zeros := make([]byte, segmentSize) 114 zerohashes[0] = zeros 115 h := hasher() 116 for i := 1; i < depth+1; i++ { 117 zeros = doSum(h, nil, zeros, zeros) 118 zerohashes[i] = zeros 119 } 120 return &TreePool{ 121 c: make(chan *tree, capacity), 122 hasher: hasher, 123 SegmentSize: segmentSize, 124 SegmentCount: segmentCount, 125 Capacity: capacity, 126 Size: segmentCount * segmentSize, 127 Depth: depth, 128 zerohashes: zerohashes, 129 } 130 } 131 132 // Drain drains the pool until it has no more than n resources 133 func (p *TreePool) Drain(n int) { 134 p.lock.Lock() 135 defer p.lock.Unlock() 136 for len(p.c) > n { 137 <-p.c 138 p.count-- 139 } 140 } 141 142 // Reserve is blocking until it returns an available tree 143 // it reuses free trees or creates a new one if size is not reached 144 // TODO: should use a context here 145 func (p *TreePool) reserve() *tree { 146 p.lock.Lock() 147 defer p.lock.Unlock() 148 var t *tree 149 if p.count == p.Capacity { 150 return <-p.c 151 } 152 select { 153 case t = <-p.c: 154 default: 155 t = newTree(p.SegmentSize, p.Depth, p.hasher) 156 p.count++ 157 } 158 return t 159 } 160 161 // release gives back a tree to the pool. 162 // this tree is guaranteed to be in reusable state 163 func (p *TreePool) release(t *tree) { 164 p.c <- t // can never fail ... 165 } 166 167 // tree is a reusable control structure representing a BMT 168 // organised in a binary tree 169 // Hasher uses a TreePool to obtain a tree for each chunk hash 170 // the tree is 'locked' while not in the pool 171 type tree struct { 172 leaves []*node // leaf nodes of the tree, other nodes accessible via parent links 173 cursor int // index of rightmost currently open segment 174 offset int // offset (cursor position) within currently open segment 175 section []byte // the rightmost open section (double segment) 176 result chan []byte // result channel 177 span []byte // The span of the data subsumed under the chunk 178 } 179 180 // node is a reuseable segment hasher representing a node in a BMT 181 type node struct { 182 isLeft bool // whether it is left side of the parent double segment 183 parent *node // pointer to parent node in the BMT 184 state int32 // atomic increment impl concurrent boolean toggle 185 left, right []byte // this is where the two children sections are written 186 hasher hash.Hash // preconstructed hasher on nodes 187 } 188 189 // newNode constructs a segment hasher node in the BMT (used by newTree) 190 func newNode(index int, parent *node, hasher hash.Hash) *node { 191 return &node{ 192 parent: parent, 193 isLeft: index%2 == 0, 194 hasher: hasher, 195 } 196 } 197 198 // Draw draws the BMT (badly) 199 func (t *tree) draw(hash []byte) string { 200 var left, right []string 201 var anc []*node 202 for i, n := range t.leaves { 203 left = append(left, fmt.Sprintf("%v", hashstr(n.left))) 204 if i%2 == 0 { 205 anc = append(anc, n.parent) 206 } 207 right = append(right, fmt.Sprintf("%v", hashstr(n.right))) 208 } 209 anc = t.leaves 210 var hashes [][]string 211 for l := 0; len(anc) > 0; l++ { 212 var nodes []*node 213 hash := []string{""} 214 for i, n := range anc { 215 hash = append(hash, fmt.Sprintf("%v|%v", hashstr(n.left), hashstr(n.right))) 216 if i%2 == 0 && n.parent != nil { 217 nodes = append(nodes, n.parent) 218 } 219 } 220 hash = append(hash, "") 221 hashes = append(hashes, hash) 222 anc = nodes 223 } 224 hashes = append(hashes, []string{"", fmt.Sprintf("%v", hashstr(hash)), ""}) 225 total := 60 226 del := " " 227 var rows []string 228 for i := len(hashes) - 1; i >= 0; i-- { 229 var textlen int 230 hash := hashes[i] 231 for _, s := range hash { 232 textlen += len(s) 233 } 234 if total < textlen { 235 total = textlen + len(hash) 236 } 237 delsize := (total - textlen) / (len(hash) - 1) 238 if delsize > len(del) { 239 delsize = len(del) 240 } 241 row := fmt.Sprintf("%v: %v", len(hashes)-i-1, strings.Join(hash, del[:delsize])) 242 rows = append(rows, row) 243 244 } 245 rows = append(rows, strings.Join(left, " ")) 246 rows = append(rows, strings.Join(right, " ")) 247 return strings.Join(rows, "\n") + "\n" 248 } 249 250 // newTree initialises a tree by building up the nodes of a BMT 251 // - segment size is stipulated to be the size of the hash 252 func newTree(segmentSize, depth int, hashfunc func() hash.Hash) *tree { 253 n := newNode(0, nil, hashfunc()) 254 prevlevel := []*node{n} 255 // iterate over levels and creates 2^(depth-level) nodes 256 // the 0 level is on double segment sections so we start at depth - 2 since 257 count := 2 258 for level := depth - 2; level >= 0; level-- { 259 nodes := make([]*node, count) 260 for i := 0; i < count; i++ { 261 parent := prevlevel[i/2] 262 var hasher hash.Hash 263 if level == 0 { 264 hasher = hashfunc() 265 } 266 nodes[i] = newNode(i, parent, hasher) 267 } 268 prevlevel = nodes 269 count *= 2 270 } 271 // the datanode level is the nodes on the last level 272 return &tree{ 273 leaves: prevlevel, 274 result: make(chan []byte), 275 section: make([]byte, 2*segmentSize), 276 } 277 } 278 279 // methods needed to implement hash.Hash 280 281 // Size returns the size 282 func (h *Hasher) Size() int { 283 return h.pool.SegmentSize 284 } 285 286 // BlockSize returns the block size 287 func (h *Hasher) BlockSize() int { 288 return 2 * h.pool.SegmentSize 289 } 290 291 // Sum returns the BMT root hash of the buffer 292 // using Sum presupposes sequential synchronous writes (io.Writer interface) 293 // hash.Hash interface Sum method appends the byte slice to the underlying 294 // data before it calculates and returns the hash of the chunk 295 // caller must make sure Sum is not called concurrently with Write, writeSection 296 func (h *Hasher) Sum(b []byte) (s []byte) { 297 t := h.getTree() 298 // write the last section with final flag set to true 299 go h.writeSection(t.cursor, t.section, true, true) 300 // wait for the result 301 s = <-t.result 302 span := t.span 303 // release the tree resource back to the pool 304 h.releaseTree() 305 // b + sha3(span + BMT(pure_chunk)) 306 if len(span) == 0 { 307 return append(b, s...) 308 } 309 return doSum(h.pool.hasher(), b, span, s) 310 } 311 312 // methods needed to implement the SwarmHash and the io.Writer interfaces 313 314 // Write calls sequentially add to the buffer to be hashed, 315 // with every full segment calls writeSection in a go routine 316 func (h *Hasher) Write(b []byte) (int, error) { 317 l := len(b) 318 if l == 0 || l > h.pool.Size { 319 return 0, nil 320 } 321 t := h.getTree() 322 secsize := 2 * h.pool.SegmentSize 323 // calculate length of missing bit to complete current open section 324 smax := secsize - t.offset 325 // if at the beginning of chunk or middle of the section 326 if t.offset < secsize { 327 // fill up current segment from buffer 328 copy(t.section[t.offset:], b) 329 // if input buffer consumed and open section not complete, then 330 // advance offset and return 331 if smax == 0 { 332 smax = secsize 333 } 334 if l <= smax { 335 t.offset += l 336 return l, nil 337 } 338 } else { 339 // if end of a section 340 if t.cursor == h.pool.SegmentCount*2 { 341 return 0, nil 342 } 343 } 344 // read full sections and the last possibly partial section from the input buffer 345 for smax < l { 346 // section complete; push to tree asynchronously 347 go h.writeSection(t.cursor, t.section, true, false) 348 // reset section 349 t.section = make([]byte, secsize) 350 // copy from input buffer at smax to right half of section 351 copy(t.section, b[smax:]) 352 // advance cursor 353 t.cursor++ 354 // smax here represents successive offsets in the input buffer 355 smax += secsize 356 } 357 t.offset = l - smax + secsize 358 return l, nil 359 } 360 361 // Reset needs to be called before writing to the hasher 362 func (h *Hasher) Reset() { 363 h.releaseTree() 364 } 365 366 // methods needed to implement the SwarmHash interface 367 368 // ResetWithLength needs to be called before writing to the hasher 369 // the argument is supposed to be the byte slice binary representation of 370 // the length of the data subsumed under the hash, i.e., span 371 func (h *Hasher) ResetWithLength(span []byte) { 372 h.Reset() 373 h.getTree().span = span 374 } 375 376 // releaseTree gives back the Tree to the pool whereby it unlocks 377 // it resets tree, segment and index 378 func (h *Hasher) releaseTree() { 379 t := h.bmt 380 if t == nil { 381 return 382 } 383 h.bmt = nil 384 go func() { 385 t.cursor = 0 386 t.offset = 0 387 t.span = nil 388 t.section = make([]byte, h.pool.SegmentSize*2) 389 select { 390 case <-t.result: 391 default: 392 } 393 h.pool.release(t) 394 }() 395 } 396 397 // NewAsyncWriter extends Hasher with an interface for concurrent segment/section writes 398 func (h *Hasher) NewAsyncWriter(double bool) *AsyncHasher { 399 secsize := h.pool.SegmentSize 400 if double { 401 secsize *= 2 402 } 403 write := func(i int, section []byte, final bool) { 404 h.writeSection(i, section, double, final) 405 } 406 return &AsyncHasher{ 407 Hasher: h, 408 double: double, 409 secsize: secsize, 410 write: write, 411 } 412 } 413 414 // SectionWriter is an asynchronous segment/section writer interface 415 type SectionWriter interface { 416 Reset() // standard init to be called before reuse 417 Write(index int, data []byte) // write into section of index 418 Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer 419 SectionSize() int // size of the async section unit to use 420 } 421 422 // AsyncHasher extends BMT Hasher with an asynchronous segment/section writer interface 423 // AsyncHasher is unsafe and does not check indexes and section data lengths 424 // it must be used with the right indexes and length and the right number of sections 425 // 426 // behaviour is undefined if 427 // * non-final sections are shorter or longer than secsize 428 // * if final section does not match length 429 // * write a section with index that is higher than length/secsize 430 // * set length in Sum call when length/secsize < maxsec 431 // 432 // * if Sum() is not called on a Hasher that is fully written 433 // a process will block, can be terminated with Reset 434 // * it will not leak processes if not all sections are written but it blocks 435 // and keeps the resource which can be released calling Reset() 436 type AsyncHasher struct { 437 *Hasher // extends the Hasher 438 mtx sync.Mutex // to lock the cursor access 439 double bool // whether to use double segments (call Hasher.writeSection) 440 secsize int // size of base section (size of hash or double) 441 write func(i int, section []byte, final bool) 442 } 443 444 // methods needed to implement AsyncWriter 445 446 // SectionSize returns the size of async section unit to use 447 func (sw *AsyncHasher) SectionSize() int { 448 return sw.secsize 449 } 450 451 // Write writes the i-th section of the BMT base 452 // this function can and is meant to be called concurrently 453 // it sets max segment threadsafely 454 func (sw *AsyncHasher) Write(i int, section []byte) { 455 sw.mtx.Lock() 456 defer sw.mtx.Unlock() 457 t := sw.getTree() 458 // cursor keeps track of the rightmost section written so far 459 // if index is lower than cursor then just write non-final section as is 460 if i < t.cursor { 461 // if index is not the rightmost, safe to write section 462 go sw.write(i, section, false) 463 return 464 } 465 // if there is a previous rightmost section safe to write section 466 if t.offset > 0 { 467 if i == t.cursor { 468 // i==cursor implies cursor was set by Hash call so we can write section as final one 469 // since it can be shorter, first we copy it to the padded buffer 470 t.section = make([]byte, sw.secsize) 471 copy(t.section, section) 472 go sw.write(i, t.section, true) 473 return 474 } 475 // the rightmost section just changed, so we write the previous one as non-final 476 go sw.write(t.cursor, t.section, false) 477 } 478 // set i as the index of the righmost section written so far 479 // set t.offset to cursor*secsize+1 480 t.cursor = i 481 t.offset = i*sw.secsize + 1 482 t.section = make([]byte, sw.secsize) 483 copy(t.section, section) 484 } 485 486 // Sum can be called any time once the length and the span is known 487 // potentially even before all segments have been written 488 // in such cases Sum will block until all segments are present and 489 // the hash for the length can be calculated. 490 // 491 // b: digest is appended to b 492 // length: known length of the input (unsafe; undefined if out of range) 493 // meta: metadata to hash together with BMT root for the final digest 494 // e.g., span for protection against existential forgery 495 func (sw *AsyncHasher) Sum(b []byte, length int, meta []byte) (s []byte) { 496 sw.mtx.Lock() 497 t := sw.getTree() 498 if length == 0 { 499 sw.mtx.Unlock() 500 s = sw.pool.zerohashes[sw.pool.Depth] 501 } else { 502 // for non-zero input the rightmost section is written to the tree asynchronously 503 // if the actual last section has been written (t.cursor == length/t.secsize) 504 maxsec := (length - 1) / sw.secsize 505 if t.offset > 0 { 506 go sw.write(t.cursor, t.section, maxsec == t.cursor) 507 } 508 // set cursor to maxsec so final section is written when it arrives 509 t.cursor = maxsec 510 t.offset = length 511 result := t.result 512 sw.mtx.Unlock() 513 // wait for the result or reset 514 s = <-result 515 } 516 // relesase the tree back to the pool 517 sw.releaseTree() 518 // if no meta is given just append digest to b 519 if len(meta) == 0 { 520 return append(b, s...) 521 } 522 // hash together meta and BMT root hash using the pools 523 return doSum(sw.pool.hasher(), b, meta, s) 524 } 525 526 // writeSection writes the hash of i-th section into level 1 node of the BMT tree 527 func (h *Hasher) writeSection(i int, section []byte, double bool, final bool) { 528 // select the leaf node for the section 529 var n *node 530 var isLeft bool 531 var hasher hash.Hash 532 var level int 533 t := h.getTree() 534 if double { 535 level++ 536 n = t.leaves[i] 537 hasher = n.hasher 538 isLeft = n.isLeft 539 n = n.parent 540 // hash the section 541 section = doSum(hasher, nil, section) 542 } else { 543 n = t.leaves[i/2] 544 hasher = n.hasher 545 isLeft = i%2 == 0 546 } 547 // write hash into parent node 548 if final { 549 // for the last segment use writeFinalNode 550 h.writeFinalNode(level, n, hasher, isLeft, section) 551 } else { 552 h.writeNode(n, hasher, isLeft, section) 553 } 554 } 555 556 // writeNode pushes the data to the node 557 // if it is the first of 2 sisters written, the routine terminates 558 // if it is the second, it calculates the hash and writes it 559 // to the parent node recursively 560 // since hashing the parent is synchronous the same hasher can be used 561 func (h *Hasher) writeNode(n *node, bh hash.Hash, isLeft bool, s []byte) { 562 level := 1 563 for { 564 // at the root of the bmt just write the result to the result channel 565 if n == nil { 566 h.getTree().result <- s 567 return 568 } 569 // otherwise assign child hash to left or right segment 570 if isLeft { 571 n.left = s 572 } else { 573 n.right = s 574 } 575 // the child-thread first arriving will terminate 576 if n.toggle() { 577 return 578 } 579 // the thread coming second now can be sure both left and right children are written 580 // so it calculates the hash of left|right and pushes it to the parent 581 s = doSum(bh, nil, n.left, n.right) 582 isLeft = n.isLeft 583 n = n.parent 584 level++ 585 } 586 } 587 588 // writeFinalNode is following the path starting from the final datasegment to the 589 // BMT root via parents 590 // for unbalanced trees it fills in the missing right sister nodes using 591 // the pool's lookup table for BMT subtree root hashes for all-zero sections 592 // otherwise behaves like `writeNode` 593 func (h *Hasher) writeFinalNode(level int, n *node, bh hash.Hash, isLeft bool, s []byte) { 594 595 for { 596 // at the root of the bmt just write the result to the result channel 597 if n == nil { 598 if s != nil { 599 h.getTree().result <- s 600 } 601 return 602 } 603 var noHash bool 604 if isLeft { 605 // coming from left sister branch 606 // when the final section's path is going via left child node 607 // we include an all-zero subtree hash for the right level and toggle the node. 608 n.right = h.pool.zerohashes[level] 609 if s != nil { 610 n.left = s 611 // if a left final node carries a hash, it must be the first (and only thread) 612 // so the toggle is already in passive state no need no call 613 // yet thread needs to carry on pushing hash to parent 614 noHash = false 615 } else { 616 // if again first thread then propagate nil and calculate no hash 617 noHash = n.toggle() 618 } 619 } else { 620 // right sister branch 621 if s != nil { 622 // if hash was pushed from right child node, write right segment change state 623 n.right = s 624 // if toggle is true, we arrived first so no hashing just push nil to parent 625 noHash = n.toggle() 626 627 } else { 628 // if s is nil, then thread arrived first at previous node and here there will be two, 629 // so no need to do anything and keep s = nil for parent 630 noHash = true 631 } 632 } 633 // the child-thread first arriving will just continue resetting s to nil 634 // the second thread now can be sure both left and right children are written 635 // it calculates the hash of left|right and pushes it to the parent 636 if noHash { 637 s = nil 638 } else { 639 s = doSum(bh, nil, n.left, n.right) 640 } 641 // iterate to parent 642 isLeft = n.isLeft 643 n = n.parent 644 level++ 645 } 646 } 647 648 // getTree obtains a BMT resource by reserving one from the pool and assigns it to the bmt field 649 func (h *Hasher) getTree() *tree { 650 if h.bmt != nil { 651 return h.bmt 652 } 653 t := h.pool.reserve() 654 h.bmt = t 655 return t 656 } 657 658 // atomic bool toggle implementing a concurrent reusable 2-state object 659 // atomic addint with %2 implements atomic bool toggle 660 // it returns true if the toggler just put it in the active/waiting state 661 func (n *node) toggle() bool { 662 return atomic.AddInt32(&n.state, 1)%2 == 1 663 } 664 665 // calculates the hash of the data using hash.Hash 666 func doSum(h hash.Hash, b []byte, data ...[]byte) []byte { 667 h.Reset() 668 for _, v := range data { 669 h.Write(v) 670 } 671 return h.Sum(b) 672 } 673 674 // hashstr is a pretty printer for bytes used in tree.draw 675 func hashstr(b []byte) string { 676 end := len(b) 677 if end > 4 { 678 end = 4 679 } 680 return fmt.Sprintf("%x", b[:end]) 681 } 682 683 // calculateDepthFor calculates the depth (number of levels) in the BMT tree 684 func calculateDepthFor(n int) (d int) { 685 c := 2 686 for ; c < n; c *= 2 { 687 d++ 688 } 689 return d + 1 690 }