github.com/gobitfly/go-ethereum@v1.8.12/swarm/bmt/bmt.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package bmt provides a binary merkle tree implementation 18 package bmt 19 20 import ( 21 "fmt" 22 "hash" 23 "strings" 24 "sync" 25 "sync/atomic" 26 ) 27 28 /* 29 Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size 30 It is defined as the root hash of the binary merkle tree built over fixed size segments 31 of the underlying chunk using any base hash function (e.g keccak 256 SHA3). 32 Chunk with data shorter than the fixed size are hashed as if they had zero padding 33 34 BMT hash is used as the chunk hash function in swarm which in turn is the basis for the 35 128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash 36 37 The BMT is optimal for providing compact inclusion proofs, i.e. prove that a 38 segment is a substring of a chunk starting at a particular offset 39 The size of the underlying segments is fixed to the size of the base hash (called the resolution 40 of the BMT hash), Using Keccak256 SHA3 hash is 32 bytes, the EVM word size to optimize for on-chain BMT verification 41 as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash. 42 43 Two implementations are provided: 44 45 * RefHasher is optimized for code simplicity and meant as a reference implementation 46 that is simple to understand 47 * Hasher is optimized for speed taking advantage of concurrency with minimalistic 48 control structure to coordinate the concurrent routines 49 It implements the following interfaces 50 * standard golang hash.Hash 51 * SwarmHash 52 * io.Writer 53 * TODO: SegmentWriter 54 */ 55 56 const ( 57 // SegmentCount is the maximum number of segments of the underlying chunk 58 // Should be equal to max-chunk-data-size / hash-size 59 SegmentCount = 128 60 // PoolSize is the maximum number of bmt trees used by the hashers, i.e, 61 // the maximum number of concurrent BMT hashing operations performed by the same hasher 62 PoolSize = 8 63 ) 64 65 // BaseHasherFunc is a hash.Hash constructor function used for the base hash of the BMT. 66 // implemented by Keccak256 SHA3 sha3.NewKeccak256 67 type BaseHasherFunc func() hash.Hash 68 69 // Hasher a reusable hasher for fixed maximum size chunks representing a BMT 70 // - implements the hash.Hash interface 71 // - reuses a pool of trees for amortised memory allocation and resource control 72 // - supports order-agnostic concurrent segment writes (TODO:) 73 // as well as sequential read and write 74 // - the same hasher instance must not be called concurrently on more than one chunk 75 // - the same hasher instance is synchronously reuseable 76 // - Sum gives back the tree to the pool and guaranteed to leave 77 // the tree and itself in a state reusable for hashing a new chunk 78 // - generates and verifies segment inclusion proofs (TODO:) 79 type Hasher struct { 80 pool *TreePool // BMT resource pool 81 bmt *tree // prebuilt BMT resource for flowcontrol and proofs 82 } 83 84 // New creates a reusable Hasher 85 // implements the hash.Hash interface 86 // pulls a new tree from a resource pool for hashing each chunk 87 func New(p *TreePool) *Hasher { 88 return &Hasher{ 89 pool: p, 90 } 91 } 92 93 // TreePool provides a pool of trees used as resources by Hasher 94 // a tree popped from the pool is guaranteed to have clean state 95 // for hashing a new chunk 96 type TreePool struct { 97 lock sync.Mutex 98 c chan *tree // the channel to obtain a resource from the pool 99 hasher BaseHasherFunc // base hasher to use for the BMT levels 100 SegmentSize int // size of leaf segments, stipulated to be = hash size 101 SegmentCount int // the number of segments on the base level of the BMT 102 Capacity int // pool capacity, controls concurrency 103 Depth int // depth of the bmt trees = int(log2(segmentCount))+1 104 Datalength int // the total length of the data (count * size) 105 count int // current count of (ever) allocated resources 106 zerohashes [][]byte // lookup table for predictable padding subtrees for all levels 107 } 108 109 // NewTreePool creates a tree pool with hasher, segment size, segment count and capacity 110 // on Hasher.getTree it reuses free trees or creates a new one if capacity is not reached 111 func NewTreePool(hasher BaseHasherFunc, segmentCount, capacity int) *TreePool { 112 // initialises the zerohashes lookup table 113 depth := calculateDepthFor(segmentCount) 114 segmentSize := hasher().Size() 115 zerohashes := make([][]byte, depth) 116 zeros := make([]byte, segmentSize) 117 zerohashes[0] = zeros 118 h := hasher() 119 for i := 1; i < depth; i++ { 120 h.Reset() 121 h.Write(zeros) 122 h.Write(zeros) 123 zeros = h.Sum(nil) 124 zerohashes[i] = zeros 125 } 126 return &TreePool{ 127 c: make(chan *tree, capacity), 128 hasher: hasher, 129 SegmentSize: segmentSize, 130 SegmentCount: segmentCount, 131 Capacity: capacity, 132 Datalength: segmentCount * segmentSize, 133 Depth: depth, 134 zerohashes: zerohashes, 135 } 136 } 137 138 // Drain drains the pool until it has no more than n resources 139 func (p *TreePool) Drain(n int) { 140 p.lock.Lock() 141 defer p.lock.Unlock() 142 for len(p.c) > n { 143 <-p.c 144 p.count-- 145 } 146 } 147 148 // Reserve is blocking until it returns an available tree 149 // it reuses free trees or creates a new one if size is not reached 150 // TODO: should use a context here 151 func (p *TreePool) reserve() *tree { 152 p.lock.Lock() 153 defer p.lock.Unlock() 154 var t *tree 155 if p.count == p.Capacity { 156 return <-p.c 157 } 158 select { 159 case t = <-p.c: 160 default: 161 t = newTree(p.SegmentSize, p.Depth) 162 p.count++ 163 } 164 return t 165 } 166 167 // release gives back a tree to the pool. 168 // this tree is guaranteed to be in reusable state 169 func (p *TreePool) release(t *tree) { 170 p.c <- t // can never fail ... 171 } 172 173 // tree is a reusable control structure representing a BMT 174 // organised in a binary tree 175 // Hasher uses a TreePool to obtain a tree for each chunk hash 176 // the tree is 'locked' while not in the pool 177 type tree struct { 178 leaves []*node // leaf nodes of the tree, other nodes accessible via parent links 179 cur int // index of rightmost currently open segment 180 offset int // offset (cursor position) within currently open segment 181 segment []byte // the rightmost open segment (not complete) 182 section []byte // the rightmost open section (double segment) 183 depth int // number of levels 184 result chan []byte // result channel 185 hash []byte // to record the result 186 span []byte // The span of the data subsumed under the chunk 187 } 188 189 // node is a reuseable segment hasher representing a node in a BMT 190 type node struct { 191 isLeft bool // whether it is left side of the parent double segment 192 parent *node // pointer to parent node in the BMT 193 state int32 // atomic increment impl concurrent boolean toggle 194 left, right []byte // this is where the content segment is set 195 } 196 197 // newNode constructs a segment hasher node in the BMT (used by newTree) 198 func newNode(index int, parent *node) *node { 199 return &node{ 200 parent: parent, 201 isLeft: index%2 == 0, 202 } 203 } 204 205 // Draw draws the BMT (badly) 206 func (t *tree) draw(hash []byte) string { 207 var left, right []string 208 var anc []*node 209 for i, n := range t.leaves { 210 left = append(left, fmt.Sprintf("%v", hashstr(n.left))) 211 if i%2 == 0 { 212 anc = append(anc, n.parent) 213 } 214 right = append(right, fmt.Sprintf("%v", hashstr(n.right))) 215 } 216 anc = t.leaves 217 var hashes [][]string 218 for l := 0; len(anc) > 0; l++ { 219 var nodes []*node 220 hash := []string{""} 221 for i, n := range anc { 222 hash = append(hash, fmt.Sprintf("%v|%v", hashstr(n.left), hashstr(n.right))) 223 if i%2 == 0 && n.parent != nil { 224 nodes = append(nodes, n.parent) 225 } 226 } 227 hash = append(hash, "") 228 hashes = append(hashes, hash) 229 anc = nodes 230 } 231 hashes = append(hashes, []string{"", fmt.Sprintf("%v", hashstr(hash)), ""}) 232 total := 60 233 del := " " 234 var rows []string 235 for i := len(hashes) - 1; i >= 0; i-- { 236 var textlen int 237 hash := hashes[i] 238 for _, s := range hash { 239 textlen += len(s) 240 } 241 if total < textlen { 242 total = textlen + len(hash) 243 } 244 delsize := (total - textlen) / (len(hash) - 1) 245 if delsize > len(del) { 246 delsize = len(del) 247 } 248 row := fmt.Sprintf("%v: %v", len(hashes)-i-1, strings.Join(hash, del[:delsize])) 249 rows = append(rows, row) 250 251 } 252 rows = append(rows, strings.Join(left, " ")) 253 rows = append(rows, strings.Join(right, " ")) 254 return strings.Join(rows, "\n") + "\n" 255 } 256 257 // newTree initialises a tree by building up the nodes of a BMT 258 // - segment size is stipulated to be the size of the hash 259 func newTree(segmentSize, depth int) *tree { 260 n := newNode(0, nil) 261 prevlevel := []*node{n} 262 // iterate over levels and creates 2^(depth-level) nodes 263 count := 2 264 for level := depth - 2; level >= 0; level-- { 265 nodes := make([]*node, count) 266 for i := 0; i < count; i++ { 267 parent := prevlevel[i/2] 268 nodes[i] = newNode(i, parent) 269 } 270 prevlevel = nodes 271 count *= 2 272 } 273 // the datanode level is the nodes on the last level 274 return &tree{ 275 leaves: prevlevel, 276 result: make(chan []byte, 1), 277 segment: make([]byte, segmentSize), 278 section: make([]byte, 2*segmentSize), 279 } 280 } 281 282 // methods needed by hash.Hash 283 284 // Size returns the size 285 func (h *Hasher) Size() int { 286 return h.pool.SegmentSize 287 } 288 289 // BlockSize returns the block size 290 func (h *Hasher) BlockSize() int { 291 return h.pool.SegmentSize 292 } 293 294 // Hash hashes the data and the span using the bmt hasher 295 func Hash(h *Hasher, span, data []byte) []byte { 296 h.ResetWithLength(span) 297 h.Write(data) 298 return h.Sum(nil) 299 } 300 301 // Datalength returns the maximum data size that is hashed by the hasher = 302 // segment count times segment size 303 func (h *Hasher) DataLength() int { 304 return h.pool.Datalength 305 } 306 307 // Sum returns the hash of the buffer 308 // hash.Hash interface Sum method appends the byte slice to the underlying 309 // data before it calculates and returns the hash of the chunk 310 // caller must make sure Sum is not called concurrently with Write, writeSection 311 // and WriteSegment (TODO:) 312 func (h *Hasher) Sum(b []byte) (r []byte) { 313 return h.sum(b, true, true) 314 } 315 316 // sum implements Sum taking parameters 317 // * if the tree is released right away 318 // * if sequential write is used (can read sections) 319 func (h *Hasher) sum(b []byte, release, section bool) (r []byte) { 320 t := h.bmt 321 h.finalise(section) 322 if t.offset > 0 { // get the last node (double segment) 323 324 // padding the segment with zero 325 copy(t.segment[t.offset:], h.pool.zerohashes[0]) 326 } 327 if section { 328 if t.cur%2 == 1 { 329 // if just finished current segment, copy it to the right half of the chunk 330 copy(t.section[h.pool.SegmentSize:], t.segment) 331 } else { 332 // copy segment to front of section, zero pad the right half 333 copy(t.section, t.segment) 334 copy(t.section[h.pool.SegmentSize:], h.pool.zerohashes[0]) 335 } 336 h.writeSection(t.cur, t.section) 337 } else { 338 // TODO: h.writeSegment(t.cur, t.segment) 339 panic("SegmentWriter not implemented") 340 } 341 bmtHash := <-t.result 342 span := t.span 343 344 if release { 345 h.releaseTree() 346 } 347 // sha3(span + BMT(pure_chunk)) 348 if span == nil { 349 return bmtHash 350 } 351 bh := h.pool.hasher() 352 bh.Reset() 353 bh.Write(span) 354 bh.Write(bmtHash) 355 return bh.Sum(b) 356 } 357 358 // Hasher implements the SwarmHash interface 359 360 // Hasher implements the io.Writer interface 361 362 // Write fills the buffer to hash, 363 // with every full segment calls writeSection 364 func (h *Hasher) Write(b []byte) (int, error) { 365 l := len(b) 366 if l <= 0 { 367 return 0, nil 368 } 369 t := h.bmt 370 need := (h.pool.SegmentCount - t.cur) * h.pool.SegmentSize 371 if l < need { 372 need = l 373 } 374 // calculate missing bit to complete current open segment 375 rest := h.pool.SegmentSize - t.offset 376 if need < rest { 377 rest = need 378 } 379 copy(t.segment[t.offset:], b[:rest]) 380 need -= rest 381 size := (t.offset + rest) % h.pool.SegmentSize 382 // read full segments and the last possibly partial segment 383 for need > 0 { 384 // push all finished chunks we read 385 if t.cur%2 == 0 { 386 copy(t.section, t.segment) 387 } else { 388 copy(t.section[h.pool.SegmentSize:], t.segment) 389 h.writeSection(t.cur, t.section) 390 } 391 size = h.pool.SegmentSize 392 if need < size { 393 size = need 394 } 395 copy(t.segment, b[rest:rest+size]) 396 need -= size 397 rest += size 398 t.cur++ 399 } 400 t.offset = size % h.pool.SegmentSize 401 return l, nil 402 } 403 404 // Reset needs to be called before writing to the hasher 405 func (h *Hasher) Reset() { 406 h.getTree() 407 } 408 409 // Hasher implements the SwarmHash interface 410 411 // ResetWithLength needs to be called before writing to the hasher 412 // the argument is supposed to be the byte slice binary representation of 413 // the length of the data subsumed under the hash, i.e., span 414 func (h *Hasher) ResetWithLength(span []byte) { 415 h.Reset() 416 h.bmt.span = span 417 } 418 419 // releaseTree gives back the Tree to the pool whereby it unlocks 420 // it resets tree, segment and index 421 func (h *Hasher) releaseTree() { 422 t := h.bmt 423 if t != nil { 424 t.cur = 0 425 t.offset = 0 426 t.span = nil 427 t.hash = nil 428 h.bmt = nil 429 h.pool.release(t) 430 } 431 } 432 433 // TODO: writeSegment writes the ith segment into the BMT tree 434 // func (h *Hasher) writeSegment(i int, s []byte) { 435 // go h.run(h.bmt.leaves[i/2], h.pool.hasher(), i%2 == 0, s) 436 // } 437 438 // writeSection writes the hash of i/2-th segction into right level 1 node of the BMT tree 439 func (h *Hasher) writeSection(i int, section []byte) { 440 n := h.bmt.leaves[i/2] 441 isLeft := n.isLeft 442 n = n.parent 443 bh := h.pool.hasher() 444 bh.Write(section) 445 go func() { 446 sum := bh.Sum(nil) 447 if n == nil { 448 h.bmt.result <- sum 449 return 450 } 451 h.run(n, bh, isLeft, sum) 452 }() 453 } 454 455 // run pushes the data to the node 456 // if it is the first of 2 sisters written the routine returns 457 // if it is the second, it calculates the hash and writes it 458 // to the parent node recursively 459 func (h *Hasher) run(n *node, bh hash.Hash, isLeft bool, s []byte) { 460 for { 461 if isLeft { 462 n.left = s 463 } else { 464 n.right = s 465 } 466 // the child-thread first arriving will quit 467 if n.toggle() { 468 return 469 } 470 // the second thread now can be sure both left and right children are written 471 // it calculates the hash of left|right and take it to the next level 472 bh.Reset() 473 bh.Write(n.left) 474 bh.Write(n.right) 475 s = bh.Sum(nil) 476 477 // at the root of the bmt just write the result to the result channel 478 if n.parent == nil { 479 h.bmt.result <- s 480 return 481 } 482 483 // otherwise iterate on parent 484 isLeft = n.isLeft 485 n = n.parent 486 } 487 } 488 489 // finalise is following the path starting from the final datasegment to the 490 // BMT root via parents 491 // for unbalanced trees it fills in the missing right sister nodes using 492 // the pool's lookup table for BMT subtree root hashes for all-zero sections 493 func (h *Hasher) finalise(skip bool) { 494 t := h.bmt 495 isLeft := t.cur%2 == 0 496 n := t.leaves[t.cur/2] 497 for level := 0; n != nil; level++ { 498 // when the final segment's path is going via left child node 499 // we include an all-zero subtree hash for the right level and toggle the node. 500 // when the path is going through right child node, nothing to do 501 if isLeft && !skip { 502 n.right = h.pool.zerohashes[level] 503 n.toggle() 504 } 505 skip = false 506 isLeft = n.isLeft 507 n = n.parent 508 } 509 } 510 511 // getTree obtains a BMT resource by reserving one from the pool 512 func (h *Hasher) getTree() *tree { 513 if h.bmt != nil { 514 return h.bmt 515 } 516 t := h.pool.reserve() 517 h.bmt = t 518 return t 519 } 520 521 // atomic bool toggle implementing a concurrent reusable 2-state object 522 // atomic addint with %2 implements atomic bool toggle 523 // it returns true if the toggler just put it in the active/waiting state 524 func (n *node) toggle() bool { 525 return atomic.AddInt32(&n.state, 1)%2 == 1 526 } 527 528 func hashstr(b []byte) string { 529 end := len(b) 530 if end > 4 { 531 end = 4 532 } 533 return fmt.Sprintf("%x", b[:end]) 534 } 535 536 // calculateDepthFor calculates the depth (number of levels) in the BMT tree 537 func calculateDepthFor(n int) (d int) { 538 c := 2 539 for ; c < n; c *= 2 { 540 d++ 541 } 542 return d + 1 543 }