github.com/muhammedhassanm/blockchain@v0.0.0-20200120143007-697261defd4d/go-ethereum-master/swarm/bmt/bmt.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package bmt provides a binary merkle tree implementation 18 package bmt 19 20 import ( 21 "fmt" 22 "hash" 23 "strings" 24 "sync" 25 "sync/atomic" 26 ) 27 28 /* 29 Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size 30 It is defined as the root hash of the binary merkle tree built over fixed size segments 31 of the underlying chunk using any base hash function (e.g keccak 256 SHA3). 32 Chunk with data shorter than the fixed size are hashed as if they had zero padding 33 34 BMT hash is used as the chunk hash function in swarm which in turn is the basis for the 35 128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash 36 37 The BMT is optimal for providing compact inclusion proofs, i.e. prove that a 38 segment is a substring of a chunk starting at a particular offset 39 The size of the underlying segments is fixed to the size of the base hash (called the resolution 40 of the BMT hash), Using Keccak256 SHA3 hash is 32 bytes, the EVM word size to optimize for on-chain BMT verification 41 as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash. 42 43 Two implementations are provided: 44 45 * RefHasher is optimized for code simplicity and meant as a reference implementation 46 that is simple to understand 47 * Hasher is optimized for speed taking advantage of concurrency with minimalistic 48 control structure to coordinate the concurrent routines 49 It implements the following interfaces 50 * standard golang hash.Hash 51 * SwarmHash 52 * io.Writer 53 * TODO: SegmentWriter 54 */ 55 56 const ( 57 // SegmentCount is the maximum number of segments of the underlying chunk 58 // Should be equal to max-chunk-data-size / hash-size 59 SegmentCount = 128 60 // PoolSize is the maximum number of bmt trees used by the hashers, i.e, 61 // the maximum number of concurrent BMT hashing operations performed by the same hasher 62 PoolSize = 8 63 ) 64 65 // BaseHasherFunc is a hash.Hash constructor function used for the base hash of the BMT. 66 // implemented by Keccak256 SHA3 sha3.NewKeccak256 67 type BaseHasherFunc func() hash.Hash 68 69 // Hasher a reusable hasher for fixed maximum size chunks representing a BMT 70 // - implements the hash.Hash interface 71 // - reuses a pool of trees for amortised memory allocation and resource control 72 // - supports order-agnostic concurrent segment writes (TODO:) 73 // as well as sequential read and write 74 // - the same hasher instance must not be called concurrently on more than one chunk 75 // - the same hasher instance is synchronously reuseable 76 // - Sum gives back the tree to the pool and guaranteed to leave 77 // the tree and itself in a state reusable for hashing a new chunk 78 // - generates and verifies segment inclusion proofs (TODO:) 79 type Hasher struct { 80 pool *TreePool // BMT resource pool 81 bmt *tree // prebuilt BMT resource for flowcontrol and proofs 82 } 83 84 // New creates a reusable Hasher 85 // implements the hash.Hash interface 86 // pulls a new tree from a resource pool for hashing each chunk 87 func New(p *TreePool) *Hasher { 88 return &Hasher{ 89 pool: p, 90 } 91 } 92 93 // TreePool provides a pool of trees used as resources by Hasher 94 // a tree popped from the pool is guaranteed to have clean state 95 // for hashing a new chunk 96 type TreePool struct { 97 lock sync.Mutex 98 c chan *tree // the channel to obtain a resource from the pool 99 hasher BaseHasherFunc // base hasher to use for the BMT levels 100 SegmentSize int // size of leaf segments, stipulated to be = hash size 101 SegmentCount int // the number of segments on the base level of the BMT 102 Capacity int // pool capacity, controls concurrency 103 Depth int // depth of the bmt trees = int(log2(segmentCount))+1 104 Datalength int // the total length of the data (count * size) 105 count int // current count of (ever) allocated resources 106 zerohashes [][]byte // lookup table for predictable padding subtrees for all levels 107 } 108 109 // NewTreePool creates a tree pool with hasher, segment size, segment count and capacity 110 // on Hasher.getTree it reuses free trees or creates a new one if capacity is not reached 111 func NewTreePool(hasher BaseHasherFunc, segmentCount, capacity int) *TreePool { 112 // initialises the zerohashes lookup table 113 depth := calculateDepthFor(segmentCount) 114 segmentSize := hasher().Size() 115 zerohashes := make([][]byte, depth) 116 zeros := make([]byte, segmentSize) 117 zerohashes[0] = zeros 118 h := hasher() 119 for i := 1; i < depth; i++ { 120 zeros = doHash(h, nil, zeros, zeros) 121 zerohashes[i] = zeros 122 } 123 return &TreePool{ 124 c: make(chan *tree, capacity), 125 hasher: hasher, 126 SegmentSize: segmentSize, 127 SegmentCount: segmentCount, 128 Capacity: capacity, 129 Datalength: segmentCount * segmentSize, 130 Depth: depth, 131 zerohashes: zerohashes, 132 } 133 } 134 135 // Drain drains the pool until it has no more than n resources 136 func (p *TreePool) Drain(n int) { 137 p.lock.Lock() 138 defer p.lock.Unlock() 139 for len(p.c) > n { 140 <-p.c 141 p.count-- 142 } 143 } 144 145 // Reserve is blocking until it returns an available tree 146 // it reuses free trees or creates a new one if size is not reached 147 // TODO: should use a context here 148 func (p *TreePool) reserve() *tree { 149 p.lock.Lock() 150 defer p.lock.Unlock() 151 var t *tree 152 if p.count == p.Capacity { 153 return <-p.c 154 } 155 select { 156 case t = <-p.c: 157 default: 158 t = newTree(p.SegmentSize, p.Depth) 159 p.count++ 160 } 161 return t 162 } 163 164 // release gives back a tree to the pool. 165 // this tree is guaranteed to be in reusable state 166 func (p *TreePool) release(t *tree) { 167 p.c <- t // can never fail ... 168 } 169 170 // tree is a reusable control structure representing a BMT 171 // organised in a binary tree 172 // Hasher uses a TreePool to obtain a tree for each chunk hash 173 // the tree is 'locked' while not in the pool 174 type tree struct { 175 leaves []*node // leaf nodes of the tree, other nodes accessible via parent links 176 cur int // index of rightmost currently open segment 177 offset int // offset (cursor position) within currently open segment 178 segment []byte // the rightmost open segment (not complete) 179 section []byte // the rightmost open section (double segment) 180 depth int // number of levels 181 result chan []byte // result channel 182 hash []byte // to record the result 183 span []byte // The span of the data subsumed under the chunk 184 } 185 186 // node is a reuseable segment hasher representing a node in a BMT 187 type node struct { 188 isLeft bool // whether it is left side of the parent double segment 189 parent *node // pointer to parent node in the BMT 190 state int32 // atomic increment impl concurrent boolean toggle 191 left, right []byte // this is where the content segment is set 192 } 193 194 // newNode constructs a segment hasher node in the BMT (used by newTree) 195 func newNode(index int, parent *node) *node { 196 return &node{ 197 parent: parent, 198 isLeft: index%2 == 0, 199 } 200 } 201 202 // Draw draws the BMT (badly) 203 func (t *tree) draw(hash []byte) string { 204 var left, right []string 205 var anc []*node 206 for i, n := range t.leaves { 207 left = append(left, fmt.Sprintf("%v", hashstr(n.left))) 208 if i%2 == 0 { 209 anc = append(anc, n.parent) 210 } 211 right = append(right, fmt.Sprintf("%v", hashstr(n.right))) 212 } 213 anc = t.leaves 214 var hashes [][]string 215 for l := 0; len(anc) > 0; l++ { 216 var nodes []*node 217 hash := []string{""} 218 for i, n := range anc { 219 hash = append(hash, fmt.Sprintf("%v|%v", hashstr(n.left), hashstr(n.right))) 220 if i%2 == 0 && n.parent != nil { 221 nodes = append(nodes, n.parent) 222 } 223 } 224 hash = append(hash, "") 225 hashes = append(hashes, hash) 226 anc = nodes 227 } 228 hashes = append(hashes, []string{"", fmt.Sprintf("%v", hashstr(hash)), ""}) 229 total := 60 230 del := " " 231 var rows []string 232 for i := len(hashes) - 1; i >= 0; i-- { 233 var textlen int 234 hash := hashes[i] 235 for _, s := range hash { 236 textlen += len(s) 237 } 238 if total < textlen { 239 total = textlen + len(hash) 240 } 241 delsize := (total - textlen) / (len(hash) - 1) 242 if delsize > len(del) { 243 delsize = len(del) 244 } 245 row := fmt.Sprintf("%v: %v", len(hashes)-i-1, strings.Join(hash, del[:delsize])) 246 rows = append(rows, row) 247 248 } 249 rows = append(rows, strings.Join(left, " ")) 250 rows = append(rows, strings.Join(right, " ")) 251 return strings.Join(rows, "\n") + "\n" 252 } 253 254 // newTree initialises a tree by building up the nodes of a BMT 255 // - segment size is stipulated to be the size of the hash 256 func newTree(segmentSize, depth int) *tree { 257 n := newNode(0, nil) 258 prevlevel := []*node{n} 259 // iterate over levels and creates 2^(depth-level) nodes 260 count := 2 261 for level := depth - 2; level >= 0; level-- { 262 nodes := make([]*node, count) 263 for i := 0; i < count; i++ { 264 parent := prevlevel[i/2] 265 nodes[i] = newNode(i, parent) 266 } 267 prevlevel = nodes 268 count *= 2 269 } 270 // the datanode level is the nodes on the last level 271 return &tree{ 272 leaves: prevlevel, 273 result: make(chan []byte, 1), 274 segment: make([]byte, segmentSize), 275 section: make([]byte, 2*segmentSize), 276 } 277 } 278 279 // methods needed by hash.Hash 280 281 // Size returns the size 282 func (h *Hasher) Size() int { 283 return h.pool.SegmentSize 284 } 285 286 // BlockSize returns the block size 287 func (h *Hasher) BlockSize() int { 288 return h.pool.SegmentSize 289 } 290 291 // Hash hashes the data and the span using the bmt hasher 292 func Hash(h *Hasher, span, data []byte) []byte { 293 h.ResetWithLength(span) 294 h.Write(data) 295 return h.Sum(nil) 296 } 297 298 // Datalength returns the maximum data size that is hashed by the hasher = 299 // segment count times segment size 300 func (h *Hasher) DataLength() int { 301 return h.pool.Datalength 302 } 303 304 // Sum returns the hash of the buffer 305 // hash.Hash interface Sum method appends the byte slice to the underlying 306 // data before it calculates and returns the hash of the chunk 307 // caller must make sure Sum is not called concurrently with Write, writeSection 308 // and WriteSegment (TODO:) 309 func (h *Hasher) Sum(b []byte) (r []byte) { 310 return h.sum(b, true, true) 311 } 312 313 // sum implements Sum taking parameters 314 // * if the tree is released right away 315 // * if sequential write is used (can read sections) 316 func (h *Hasher) sum(b []byte, release, section bool) (r []byte) { 317 t := h.bmt 318 bh := h.pool.hasher() 319 go h.writeSection(t.cur, t.section, true) 320 bmtHash := <-t.result 321 span := t.span 322 // fmt.Println(t.draw(bmtHash)) 323 if release { 324 h.releaseTree() 325 } 326 // b + sha3(span + BMT(pure_chunk)) 327 if span == nil { 328 return append(b, bmtHash...) 329 } 330 return doHash(bh, b, span, bmtHash) 331 } 332 333 // Hasher implements the SwarmHash interface 334 335 // Hasher implements the io.Writer interface 336 337 // Write fills the buffer to hash, 338 // with every full segment calls writeSection 339 func (h *Hasher) Write(b []byte) (int, error) { 340 l := len(b) 341 if l <= 0 { 342 return 0, nil 343 } 344 t := h.bmt 345 secsize := 2 * h.pool.SegmentSize 346 // calculate length of missing bit to complete current open section 347 smax := secsize - t.offset 348 // if at the beginning of chunk or middle of the section 349 if t.offset < secsize { 350 // fill up current segment from buffer 351 copy(t.section[t.offset:], b) 352 // if input buffer consumed and open section not complete, then 353 // advance offset and return 354 if smax == 0 { 355 smax = secsize 356 } 357 if l <= smax { 358 t.offset += l 359 return l, nil 360 } 361 } else { 362 if t.cur == h.pool.SegmentCount*2 { 363 return 0, nil 364 } 365 } 366 // read full segments and the last possibly partial segment from the input buffer 367 for smax < l { 368 // section complete; push to tree asynchronously 369 go h.writeSection(t.cur, t.section, false) 370 // reset section 371 t.section = make([]byte, secsize) 372 // copy from imput buffer at smax to right half of section 373 copy(t.section, b[smax:]) 374 // advance cursor 375 t.cur++ 376 // smax here represents successive offsets in the input buffer 377 smax += secsize 378 } 379 t.offset = l - smax + secsize 380 return l, nil 381 } 382 383 // Reset needs to be called before writing to the hasher 384 func (h *Hasher) Reset() { 385 h.getTree() 386 } 387 388 // Hasher implements the SwarmHash interface 389 390 // ResetWithLength needs to be called before writing to the hasher 391 // the argument is supposed to be the byte slice binary representation of 392 // the length of the data subsumed under the hash, i.e., span 393 func (h *Hasher) ResetWithLength(span []byte) { 394 h.Reset() 395 h.bmt.span = span 396 } 397 398 // releaseTree gives back the Tree to the pool whereby it unlocks 399 // it resets tree, segment and index 400 func (h *Hasher) releaseTree() { 401 t := h.bmt 402 if t != nil { 403 t.cur = 0 404 t.offset = 0 405 t.span = nil 406 t.hash = nil 407 h.bmt = nil 408 t.section = make([]byte, h.pool.SegmentSize*2) 409 t.segment = make([]byte, h.pool.SegmentSize) 410 h.pool.release(t) 411 } 412 } 413 414 // TODO: writeSegment writes the ith segment into the BMT tree 415 // func (h *Hasher) writeSegment(i int, s []byte) { 416 // go h.run(h.bmt.leaves[i/2], h.pool.hasher(), i%2 == 0, s) 417 // } 418 419 // writeSection writes the hash of i-th section into level 1 node of the BMT tree 420 func (h *Hasher) writeSection(i int, section []byte, final bool) { 421 // select the leaf node for the section 422 n := h.bmt.leaves[i] 423 isLeft := n.isLeft 424 n = n.parent 425 bh := h.pool.hasher() 426 // hash the section 427 s := doHash(bh, nil, section) 428 // write hash into parent node 429 if final { 430 // for the last segment use writeFinalNode 431 h.writeFinalNode(1, n, bh, isLeft, s) 432 } else { 433 h.writeNode(n, bh, isLeft, s) 434 } 435 } 436 437 // writeNode pushes the data to the node 438 // if it is the first of 2 sisters written the routine returns 439 // if it is the second, it calculates the hash and writes it 440 // to the parent node recursively 441 func (h *Hasher) writeNode(n *node, bh hash.Hash, isLeft bool, s []byte) { 442 level := 1 443 for { 444 // at the root of the bmt just write the result to the result channel 445 if n == nil { 446 h.bmt.result <- s 447 return 448 } 449 // otherwise assign child hash to branc 450 if isLeft { 451 n.left = s 452 } else { 453 n.right = s 454 } 455 // the child-thread first arriving will quit 456 if n.toggle() { 457 return 458 } 459 // the thread coming later now can be sure both left and right children are written 460 // it calculates the hash of left|right and pushes it to the parent 461 s = doHash(bh, nil, n.left, n.right) 462 isLeft = n.isLeft 463 n = n.parent 464 level++ 465 } 466 } 467 468 // writeFinalNode is following the path starting from the final datasegment to the 469 // BMT root via parents 470 // for unbalanced trees it fills in the missing right sister nodes using 471 // the pool's lookup table for BMT subtree root hashes for all-zero sections 472 // otherwise behaves like `writeNode` 473 func (h *Hasher) writeFinalNode(level int, n *node, bh hash.Hash, isLeft bool, s []byte) { 474 475 for { 476 // at the root of the bmt just write the result to the result channel 477 if n == nil { 478 if s != nil { 479 h.bmt.result <- s 480 } 481 return 482 } 483 var noHash bool 484 if isLeft { 485 // coming from left sister branch 486 // when the final section's path is going via left child node 487 // we include an all-zero subtree hash for the right level and toggle the node. 488 // when the path is going through right child node, nothing to do 489 n.right = h.pool.zerohashes[level] 490 if s != nil { 491 n.left = s 492 // if a left final node carries a hash, it must be the first (and only thread) 493 // so the toggle is already in passive state no need no call 494 // yet thread needs to carry on pushing hash to parent 495 } else { 496 // if again first thread then propagate nil and calculate no hash 497 noHash = n.toggle() 498 } 499 } else { 500 // right sister branch 501 // if s is nil, then thread arrived first at previous node and here there will be two, 502 // so no need to do anything 503 if s != nil { 504 n.right = s 505 noHash = n.toggle() 506 } else { 507 noHash = true 508 } 509 } 510 // the child-thread first arriving will just continue resetting s to nil 511 // the second thread now can be sure both left and right children are written 512 // it calculates the hash of left|right and pushes it to the parent 513 if noHash { 514 s = nil 515 } else { 516 s = doHash(bh, nil, n.left, n.right) 517 } 518 isLeft = n.isLeft 519 n = n.parent 520 level++ 521 } 522 } 523 524 // getTree obtains a BMT resource by reserving one from the pool 525 func (h *Hasher) getTree() *tree { 526 if h.bmt != nil { 527 return h.bmt 528 } 529 t := h.pool.reserve() 530 h.bmt = t 531 return t 532 } 533 534 // atomic bool toggle implementing a concurrent reusable 2-state object 535 // atomic addint with %2 implements atomic bool toggle 536 // it returns true if the toggler just put it in the active/waiting state 537 func (n *node) toggle() bool { 538 return atomic.AddInt32(&n.state, 1)%2 == 1 539 } 540 541 // calculates the hash of the data using hash.Hash 542 func doHash(h hash.Hash, b []byte, data ...[]byte) []byte { 543 h.Reset() 544 for _, v := range data { 545 h.Write(v) 546 } 547 return h.Sum(b) 548 } 549 550 func hashstr(b []byte) string { 551 end := len(b) 552 if end > 4 { 553 end = 4 554 } 555 return fmt.Sprintf("%x", b[:end]) 556 } 557 558 // calculateDepthFor calculates the depth (number of levels) in the BMT tree 559 func calculateDepthFor(n int) (d int) { 560 c := 2 561 for ; c < n; c *= 2 { 562 d++ 563 } 564 return d + 1 565 }