github.com/waltonchain/waltonchain_gwtc_src@v1.1.4-0.20201225072101-8a298c95a819/bmt/bmt.go (about) 1 // Copyright 2017 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-wtc library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-wtc library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package bmt provides a binary merkle tree implementation 18 package bmt 19 20 import ( 21 "fmt" 22 "hash" 23 "io" 24 "strings" 25 "sync" 26 "sync/atomic" 27 ) 28 29 /* 30 Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size 31 It is defined as the root hash of the binary merkle tree built over fixed size segments 32 of the underlying chunk using any base hash function (e.g keccak 256 SHA3) 33 34 It is used as the chunk hash function in swarm which in turn is the basis for the 35 128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash 36 37 The BMT is optimal for providing compact inclusion proofs, i.e. prove that a 38 segment is a substring of a chunk starting at a particular offset 39 The size of the underlying segments is fixed at 32 bytes (called the resolution 40 of the BMT hash), the EVM word size to optimize for on-chain BMT verification 41 as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash. 42 43 Two implementations are provided: 44 45 * RefHasher is optimized for code simplicity and meant as a reference implementation 46 * Hasher is optimized for speed taking advantage of concurrency with minimalistic 47 control structure to coordinate the concurrent routines 48 It implements the ChunkHash interface as well as the go standard hash.Hash interface 49 50 */ 51 52 const ( 53 // DefaultSegmentCount is the maximum number of segments of the underlying chunk 54 DefaultSegmentCount = 128 // Should be equal to storage.DefaultBranches 55 // DefaultPoolSize is the maximum number of bmt trees used by the hashers, i.e, 56 // the maximum number of concurrent BMT hashing operations performed by the same hasher 57 DefaultPoolSize = 8 58 ) 59 60 // BaseHasher is a hash.Hash constructor function used for the base hash of the BMT. 61 type BaseHasher func() hash.Hash 62 63 // Hasher a reusable hasher for fixed maximum size chunks representing a BMT 64 // implements the hash.Hash interface 65 // reuse pool of Tree-s for amortised memory allocation and resource control 66 // supports order-agnostic concurrent segment writes 67 // as well as sequential read and write 68 // can not be called concurrently on more than one chunk 69 // can be further appended after Sum 70 // Reset gives back the Tree to the pool and guaranteed to leave 71 // the tree and itself in a state reusable for hashing a new chunk 72 type Hasher struct { 73 pool *TreePool // BMT resource pool 74 bmt *Tree // prebuilt BMT resource for flowcontrol and proofs 75 blocksize int // segment size (size of hash) also for hash.Hash 76 count int // segment count 77 size int // for hash.Hash same as hashsize 78 cur int // cursor position for righmost currently open chunk 79 segment []byte // the rightmost open segment (not complete) 80 depth int // index of last level 81 result chan []byte // result channel 82 hash []byte // to record the result 83 max int32 // max segments for SegmentWriter interface 84 blockLength []byte // The block length that needes to be added in Sum 85 } 86 87 // New creates a reusable Hasher 88 // implements the hash.Hash interface 89 // pulls a new Tree from a resource pool for hashing each chunk 90 func New(p *TreePool) *Hasher { 91 return &Hasher{ 92 pool: p, 93 depth: depth(p.SegmentCount), 94 size: p.SegmentSize, 95 blocksize: p.SegmentSize, 96 count: p.SegmentCount, 97 result: make(chan []byte), 98 } 99 } 100 101 // Node is a reuseable segment hasher representing a node in a BMT 102 // it allows for continued writes after a Sum 103 // and is left in completely reusable state after Reset 104 type Node struct { 105 level, index int // position of node for information/logging only 106 initial bool // first and last node 107 root bool // whether the node is root to a smaller BMT 108 isLeft bool // whether it is left side of the parent double segment 109 unbalanced bool // indicates if a node has only the left segment 110 parent *Node // BMT connections 111 state int32 // atomic increment impl concurrent boolean toggle 112 left, right []byte 113 } 114 115 // NewNode constructor for segment hasher nodes in the BMT 116 func NewNode(level, index int, parent *Node) *Node { 117 return &Node{ 118 parent: parent, 119 level: level, 120 index: index, 121 initial: index == 0, 122 isLeft: index%2 == 0, 123 } 124 } 125 126 // TreePool provides a pool of Trees used as resources by Hasher 127 // a Tree popped from the pool is guaranteed to have clean state 128 // for hashing a new chunk 129 // Hasher Reset releases the Tree to the pool 130 type TreePool struct { 131 lock sync.Mutex 132 c chan *Tree 133 hasher BaseHasher 134 SegmentSize int 135 SegmentCount int 136 Capacity int 137 count int 138 } 139 140 // NewTreePool creates a Tree pool with hasher, segment size, segment count and capacity 141 // on GetTree it reuses free Trees or creates a new one if size is not reached 142 func NewTreePool(hasher BaseHasher, segmentCount, capacity int) *TreePool { 143 return &TreePool{ 144 c: make(chan *Tree, capacity), 145 hasher: hasher, 146 SegmentSize: hasher().Size(), 147 SegmentCount: segmentCount, 148 Capacity: capacity, 149 } 150 } 151 152 // Drain drains the pool uptil it has no more than n resources 153 func (self *TreePool) Drain(n int) { 154 self.lock.Lock() 155 defer self.lock.Unlock() 156 for len(self.c) > n { 157 <-self.c 158 self.count-- 159 } 160 } 161 162 // Reserve is blocking until it returns an available Tree 163 // it reuses free Trees or creates a new one if size is not reached 164 func (self *TreePool) Reserve() *Tree { 165 self.lock.Lock() 166 defer self.lock.Unlock() 167 var t *Tree 168 if self.count == self.Capacity { 169 return <-self.c 170 } 171 select { 172 case t = <-self.c: 173 default: 174 t = NewTree(self.hasher, self.SegmentSize, self.SegmentCount) 175 self.count++ 176 } 177 return t 178 } 179 180 // Release gives back a Tree to the pool. 181 // This Tree is guaranteed to be in reusable state 182 // does not need locking 183 func (self *TreePool) Release(t *Tree) { 184 self.c <- t // can never fail but... 185 } 186 187 // Tree is a reusable control structure representing a BMT 188 // organised in a binary tree 189 // Hasher uses a TreePool to pick one for each chunk hash 190 // the Tree is 'locked' while not in the pool 191 type Tree struct { 192 leaves []*Node 193 } 194 195 // Draw draws the BMT (badly) 196 func (self *Tree) Draw(hash []byte, d int) string { 197 var left, right []string 198 var anc []*Node 199 for i, n := range self.leaves { 200 left = append(left, fmt.Sprintf("%v", hashstr(n.left))) 201 if i%2 == 0 { 202 anc = append(anc, n.parent) 203 } 204 right = append(right, fmt.Sprintf("%v", hashstr(n.right))) 205 } 206 anc = self.leaves 207 var hashes [][]string 208 for l := 0; len(anc) > 0; l++ { 209 var nodes []*Node 210 hash := []string{""} 211 for i, n := range anc { 212 hash = append(hash, fmt.Sprintf("%v|%v", hashstr(n.left), hashstr(n.right))) 213 if i%2 == 0 && n.parent != nil { 214 nodes = append(nodes, n.parent) 215 } 216 } 217 hash = append(hash, "") 218 hashes = append(hashes, hash) 219 anc = nodes 220 } 221 hashes = append(hashes, []string{"", fmt.Sprintf("%v", hashstr(hash)), ""}) 222 total := 60 223 del := " " 224 var rows []string 225 for i := len(hashes) - 1; i >= 0; i-- { 226 var textlen int 227 hash := hashes[i] 228 for _, s := range hash { 229 textlen += len(s) 230 } 231 if total < textlen { 232 total = textlen + len(hash) 233 } 234 delsize := (total - textlen) / (len(hash) - 1) 235 if delsize > len(del) { 236 delsize = len(del) 237 } 238 row := fmt.Sprintf("%v: %v", len(hashes)-i-1, strings.Join(hash, del[:delsize])) 239 rows = append(rows, row) 240 241 } 242 rows = append(rows, strings.Join(left, " ")) 243 rows = append(rows, strings.Join(right, " ")) 244 return strings.Join(rows, "\n") + "\n" 245 } 246 247 // NewTree initialises the Tree by building up the nodes of a BMT 248 // segment size is stipulated to be the size of the hash 249 // segmentCount needs to be positive integer and does not need to be 250 // a power of two and can even be an odd number 251 // segmentSize * segmentCount determines the maximum chunk size 252 // hashed using the tree 253 func NewTree(hasher BaseHasher, segmentSize, segmentCount int) *Tree { 254 n := NewNode(0, 0, nil) 255 n.root = true 256 prevlevel := []*Node{n} 257 // iterate over levels and creates 2^level nodes 258 level := 1 259 count := 2 260 for d := 1; d <= depth(segmentCount); d++ { 261 nodes := make([]*Node, count) 262 for i := 0; i < len(nodes); i++ { 263 var parent *Node 264 parent = prevlevel[i/2] 265 t := NewNode(level, i, parent) 266 nodes[i] = t 267 } 268 prevlevel = nodes 269 level++ 270 count *= 2 271 } 272 // the datanode level is the nodes on the last level where 273 return &Tree{ 274 leaves: prevlevel, 275 } 276 } 277 278 // methods needed by hash.Hash 279 280 // Size returns the size 281 func (self *Hasher) Size() int { 282 return self.size 283 } 284 285 // BlockSize returns the block size 286 func (self *Hasher) BlockSize() int { 287 return self.blocksize 288 } 289 290 // Sum returns the hash of the buffer 291 // hash.Hash interface Sum method appends the byte slice to the underlying 292 // data before it calculates and returns the hash of the chunk 293 func (self *Hasher) Sum(b []byte) (r []byte) { 294 t := self.bmt 295 i := self.cur 296 n := t.leaves[i] 297 j := i 298 // must run strictly before all nodes calculate 299 // datanodes are guaranteed to have a parent 300 if len(self.segment) > self.size && i > 0 && n.parent != nil { 301 n = n.parent 302 } else { 303 i *= 2 304 } 305 d := self.finalise(n, i) 306 self.writeSegment(j, self.segment, d) 307 c := <-self.result 308 self.releaseTree() 309 310 // sha3(length + BMT(pure_chunk)) 311 if self.blockLength == nil { 312 return c 313 } 314 res := self.pool.hasher() 315 res.Reset() 316 res.Write(self.blockLength) 317 res.Write(c) 318 return res.Sum(nil) 319 } 320 321 // Hasher implements the SwarmHash interface 322 323 // Hash waits for the hasher result and returns it 324 // caller must call this on a BMT Hasher being written to 325 func (self *Hasher) Hash() []byte { 326 return <-self.result 327 } 328 329 // Hasher implements the io.Writer interface 330 331 // Write fills the buffer to hash 332 // with every full segment complete launches a hasher go routine 333 // that shoots up the BMT 334 func (self *Hasher) Write(b []byte) (int, error) { 335 l := len(b) 336 if l <= 0 { 337 return 0, nil 338 } 339 s := self.segment 340 i := self.cur 341 count := (self.count + 1) / 2 342 need := self.count*self.size - self.cur*2*self.size 343 size := self.size 344 if need > size { 345 size *= 2 346 } 347 if l < need { 348 need = l 349 } 350 // calculate missing bit to complete current open segment 351 rest := size - len(s) 352 if need < rest { 353 rest = need 354 } 355 s = append(s, b[:rest]...) 356 need -= rest 357 // read full segments and the last possibly partial segment 358 for need > 0 && i < count-1 { 359 // push all finished chunks we read 360 self.writeSegment(i, s, self.depth) 361 need -= size 362 if need < 0 { 363 size += need 364 } 365 s = b[rest : rest+size] 366 rest += size 367 i++ 368 } 369 self.segment = s 370 self.cur = i 371 // otherwise, we can assume len(s) == 0, so all buffer is read and chunk is not yet full 372 return l, nil 373 } 374 375 // Hasher implements the io.ReaderFrom interface 376 377 // ReadFrom reads from io.Reader and appends to the data to hash using Write 378 // it reads so that chunk to hash is maximum length or reader reaches EOF 379 // caller must Reset the hasher prior to call 380 func (self *Hasher) ReadFrom(r io.Reader) (m int64, err error) { 381 bufsize := self.size*self.count - self.size*self.cur - len(self.segment) 382 buf := make([]byte, bufsize) 383 var read int 384 for { 385 var n int 386 n, err = r.Read(buf) 387 read += n 388 if err == io.EOF || read == len(buf) { 389 hash := self.Sum(buf[:n]) 390 if read == len(buf) { 391 err = NewEOC(hash) 392 } 393 break 394 } 395 if err != nil { 396 break 397 } 398 n, err = self.Write(buf[:n]) 399 if err != nil { 400 break 401 } 402 } 403 return int64(read), err 404 } 405 406 // Reset needs to be called before writing to the hasher 407 func (self *Hasher) Reset() { 408 self.getTree() 409 self.blockLength = nil 410 } 411 412 // Hasher implements the SwarmHash interface 413 414 // ResetWithLength needs to be called before writing to the hasher 415 // the argument is supposed to be the byte slice binary representation of 416 // the legth of the data subsumed under the hash 417 func (self *Hasher) ResetWithLength(l []byte) { 418 self.Reset() 419 self.blockLength = l 420 421 } 422 423 // Release gives back the Tree to the pool whereby it unlocks 424 // it resets tree, segment and index 425 func (self *Hasher) releaseTree() { 426 if self.bmt != nil { 427 n := self.bmt.leaves[self.cur] 428 for ; n != nil; n = n.parent { 429 n.unbalanced = false 430 if n.parent != nil { 431 n.root = false 432 } 433 } 434 self.pool.Release(self.bmt) 435 self.bmt = nil 436 437 } 438 self.cur = 0 439 self.segment = nil 440 } 441 442 func (self *Hasher) writeSegment(i int, s []byte, d int) { 443 h := self.pool.hasher() 444 n := self.bmt.leaves[i] 445 446 if len(s) > self.size && n.parent != nil { 447 go func() { 448 h.Reset() 449 h.Write(s) 450 s = h.Sum(nil) 451 452 if n.root { 453 self.result <- s 454 return 455 } 456 self.run(n.parent, h, d, n.index, s) 457 }() 458 return 459 } 460 go self.run(n, h, d, i*2, s) 461 } 462 463 func (self *Hasher) run(n *Node, h hash.Hash, d int, i int, s []byte) { 464 isLeft := i%2 == 0 465 for { 466 if isLeft { 467 n.left = s 468 } else { 469 n.right = s 470 } 471 if !n.unbalanced && n.toggle() { 472 return 473 } 474 if !n.unbalanced || !isLeft || i == 0 && d == 0 { 475 h.Reset() 476 h.Write(n.left) 477 h.Write(n.right) 478 s = h.Sum(nil) 479 480 } else { 481 s = append(n.left, n.right...) 482 } 483 484 self.hash = s 485 if n.root { 486 self.result <- s 487 return 488 } 489 490 isLeft = n.isLeft 491 n = n.parent 492 i++ 493 } 494 } 495 496 // getTree obtains a BMT resource by reserving one from the pool 497 func (self *Hasher) getTree() *Tree { 498 if self.bmt != nil { 499 return self.bmt 500 } 501 t := self.pool.Reserve() 502 self.bmt = t 503 return t 504 } 505 506 // atomic bool toggle implementing a concurrent reusable 2-state object 507 // atomic addint with %2 implements atomic bool toggle 508 // it returns true if the toggler just put it in the active/waiting state 509 func (self *Node) toggle() bool { 510 return atomic.AddInt32(&self.state, 1)%2 == 1 511 } 512 513 func hashstr(b []byte) string { 514 end := len(b) 515 if end > 4 { 516 end = 4 517 } 518 return fmt.Sprintf("%x", b[:end]) 519 } 520 521 func depth(n int) (d int) { 522 for l := (n - 1) / 2; l > 0; l /= 2 { 523 d++ 524 } 525 return d 526 } 527 528 // finalise is following the zigzags on the tree belonging 529 // to the final datasegment 530 func (self *Hasher) finalise(n *Node, i int) (d int) { 531 isLeft := i%2 == 0 532 for { 533 // when the final segment's path is going via left segments 534 // the incoming data is pushed to the parent upon pulling the left 535 // we do not need toogle the state since this condition is 536 // detectable 537 n.unbalanced = isLeft 538 n.right = nil 539 if n.initial { 540 n.root = true 541 return d 542 } 543 isLeft = n.isLeft 544 n = n.parent 545 d++ 546 } 547 } 548 549 // EOC (end of chunk) implements the error interface 550 type EOC struct { 551 Hash []byte // read the hash of the chunk off the error 552 } 553 554 // Error returns the error string 555 func (self *EOC) Error() string { 556 return fmt.Sprintf("hasher limit reached, chunk hash: %x", self.Hash) 557 } 558 559 // NewEOC creates new end of chunk error with the hash 560 func NewEOC(hash []byte) *EOC { 561 return &EOC{hash} 562 }