github.com/aquanetwork/aquachain@v1.7.8/opt/bmt/bmt.go (about) 1 // Copyright 2017 The aquachain Authors 2 // This file is part of the aquachain library. 3 // 4 // The aquachain library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The aquachain library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the aquachain library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package bmt provides a binary merkle tree implementation 18 package bmt 19 20 import ( 21 "fmt" 22 "hash" 23 "io" 24 "strings" 25 "sync" 26 "sync/atomic" 27 ) 28 29 /* 30 Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size 31 It is defined as the root hash of the binary merkle tree built over fixed size segments 32 of the underlying chunk using any base hash function (e.g keccak 256 SHA3) 33 34 It is used as the chunk hash function in swarm which in turn is the basis for the 35 128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash 36 37 The BMT is optimal for providing compact inclusion proofs, i.e. prove that a 38 segment is a substring of a chunk starting at a particular offset 39 The size of the underlying segments is fixed at 32 bytes (called the resolution 40 of the BMT hash), the EVM word size to optimize for on-chain BMT verification 41 as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash. 42 43 Two implementations are provided: 44 45 * RefHasher is optimized for code simplicity and meant as a reference implementation 46 * Hasher is optimized for speed taking advantage of concurrency with minimalistic 47 control structure to coordinate the concurrent routines 48 It implements the ChunkHash interface as well as the go standard hash.Hash interface 49 50 */ 51 52 const ( 53 // DefaultSegmentCount is the maximum number of segments of the underlying chunk 54 DefaultSegmentCount = 128 // Should be equal to storage.DefaultBranches 55 // DefaultPoolSize is the maximum number of bmt trees used by the hashers, i.e, 56 // the maximum number of concurrent BMT hashing operations performed by the same hasher 57 DefaultPoolSize = 8 58 ) 59 60 // BaseHasher is a hash.Hash constructor function used for the base hash of the BMT. 61 type BaseHasher func() hash.Hash 62 63 // Hasher a reusable hasher for fixed maximum size chunks representing a BMT 64 // implements the hash.Hash interface 65 // reuse pool of Tree-s for amortised memory allocation and resource control 66 // supports order-agnostic concurrent segment writes 67 // as well as sequential read and write 68 // can not be called concurrently on more than one chunk 69 // can be further appended after Sum 70 // Reset gives back the Tree to the pool and guaranteed to leave 71 // the tree and itself in a state reusable for hashing a new chunk 72 type Hasher struct { 73 pool *TreePool // BMT resource pool 74 bmt *Tree // prebuilt BMT resource for flowcontrol and proofs 75 blocksize int // segment size (size of hash) also for hash.Hash 76 count int // segment count 77 size int // for hash.Hash same as hashsize 78 cur int // cursor position for righmost currently open chunk 79 segment []byte // the rightmost open segment (not complete) 80 depth int // index of last level 81 result chan []byte // result channel 82 hash []byte // to record the result 83 blockLength []byte // The block length that needes to be added in Sum 84 } 85 86 // New creates a reusable Hasher 87 // implements the hash.Hash interface 88 // pulls a new Tree from a resource pool for hashing each chunk 89 func New(p *TreePool) *Hasher { 90 return &Hasher{ 91 pool: p, 92 depth: depth(p.SegmentCount), 93 size: p.SegmentSize, 94 blocksize: p.SegmentSize, 95 count: p.SegmentCount, 96 result: make(chan []byte), 97 } 98 } 99 100 // Node is a reuseable segment hasher representing a node in a BMT 101 // it allows for continued writes after a Sum 102 // and is left in completely reusable state after Reset 103 type Node struct { 104 level, index int // position of node for information/logging only 105 initial bool // first and last node 106 root bool // whether the node is root to a smaller BMT 107 isLeft bool // whether it is left side of the parent double segment 108 unbalanced bool // indicates if a node has only the left segment 109 parent *Node // BMT connections 110 state int32 // atomic increment impl concurrent boolean toggle 111 left, right []byte 112 } 113 114 // NewNode constructor for segment hasher nodes in the BMT 115 func NewNode(level, index int, parent *Node) *Node { 116 return &Node{ 117 parent: parent, 118 level: level, 119 index: index, 120 initial: index == 0, 121 isLeft: index%2 == 0, 122 } 123 } 124 125 // TreePool provides a pool of Trees used as resources by Hasher 126 // a Tree popped from the pool is guaranteed to have clean state 127 // for hashing a new chunk 128 // Hasher Reset releases the Tree to the pool 129 type TreePool struct { 130 lock sync.Mutex 131 c chan *Tree 132 hasher BaseHasher 133 SegmentSize int 134 SegmentCount int 135 Capacity int 136 count int 137 } 138 139 // NewTreePool creates a Tree pool with hasher, segment size, segment count and capacity 140 // on GetTree it reuses free Trees or creates a new one if size is not reached 141 func NewTreePool(hasher BaseHasher, segmentCount, capacity int) *TreePool { 142 return &TreePool{ 143 c: make(chan *Tree, capacity), 144 hasher: hasher, 145 SegmentSize: hasher().Size(), 146 SegmentCount: segmentCount, 147 Capacity: capacity, 148 } 149 } 150 151 // Drain drains the pool uptil it has no more than n resources 152 func (self *TreePool) Drain(n int) { 153 self.lock.Lock() 154 defer self.lock.Unlock() 155 for len(self.c) > n { 156 <-self.c 157 self.count-- 158 } 159 } 160 161 // Reserve is blocking until it returns an available Tree 162 // it reuses free Trees or creates a new one if size is not reached 163 func (self *TreePool) Reserve() *Tree { 164 self.lock.Lock() 165 defer self.lock.Unlock() 166 var t *Tree 167 if self.count == self.Capacity { 168 return <-self.c 169 } 170 select { 171 case t = <-self.c: 172 default: 173 t = NewTree(self.hasher, self.SegmentSize, self.SegmentCount) 174 self.count++ 175 } 176 return t 177 } 178 179 // Release gives back a Tree to the pool. 180 // This Tree is guaranteed to be in reusable state 181 // does not need locking 182 func (self *TreePool) Release(t *Tree) { 183 self.c <- t // can never fail but... 184 } 185 186 // Tree is a reusable control structure representing a BMT 187 // organised in a binary tree 188 // Hasher uses a TreePool to pick one for each chunk hash 189 // the Tree is 'locked' while not in the pool 190 type Tree struct { 191 leaves []*Node 192 } 193 194 // Draw draws the BMT (badly) 195 func (self *Tree) Draw(hash []byte, d int) string { 196 var left, right []string 197 var anc []*Node 198 for i, n := range self.leaves { 199 left = append(left, fmt.Sprintf("%v", hashstr(n.left))) 200 if i%2 == 0 { 201 anc = append(anc, n.parent) 202 } 203 right = append(right, fmt.Sprintf("%v", hashstr(n.right))) 204 } 205 anc = self.leaves 206 var hashes [][]string 207 for l := 0; len(anc) > 0; l++ { 208 var nodes []*Node 209 hash := []string{""} 210 for i, n := range anc { 211 hash = append(hash, fmt.Sprintf("%v|%v", hashstr(n.left), hashstr(n.right))) 212 if i%2 == 0 && n.parent != nil { 213 nodes = append(nodes, n.parent) 214 } 215 } 216 hash = append(hash, "") 217 hashes = append(hashes, hash) 218 anc = nodes 219 } 220 hashes = append(hashes, []string{"", fmt.Sprintf("%v", hashstr(hash)), ""}) 221 total := 60 222 del := " " 223 var rows []string 224 for i := len(hashes) - 1; i >= 0; i-- { 225 var textlen int 226 hash := hashes[i] 227 for _, s := range hash { 228 textlen += len(s) 229 } 230 if total < textlen { 231 total = textlen + len(hash) 232 } 233 delsize := (total - textlen) / (len(hash) - 1) 234 if delsize > len(del) { 235 delsize = len(del) 236 } 237 row := fmt.Sprintf("%v: %v", len(hashes)-i-1, strings.Join(hash, del[:delsize])) 238 rows = append(rows, row) 239 240 } 241 rows = append(rows, strings.Join(left, " ")) 242 rows = append(rows, strings.Join(right, " ")) 243 return strings.Join(rows, "\n") + "\n" 244 } 245 246 // NewTree initialises the Tree by building up the nodes of a BMT 247 // segment size is stipulated to be the size of the hash 248 // segmentCount needs to be positive integer and does not need to be 249 // a power of two and can even be an odd number 250 // segmentSize * segmentCount determines the maximum chunk size 251 // hashed using the tree 252 func NewTree(hasher BaseHasher, segmentSize, segmentCount int) *Tree { 253 n := NewNode(0, 0, nil) 254 n.root = true 255 prevlevel := []*Node{n} 256 // iterate over levels and creates 2^level nodes 257 level := 1 258 count := 2 259 for d := 1; d <= depth(segmentCount); d++ { 260 nodes := make([]*Node, count) 261 for i := 0; i < len(nodes); i++ { 262 parent := prevlevel[i/2] 263 t := NewNode(level, i, parent) 264 nodes[i] = t 265 } 266 prevlevel = nodes 267 level++ 268 count *= 2 269 } 270 // the datanode level is the nodes on the last level where 271 return &Tree{ 272 leaves: prevlevel, 273 } 274 } 275 276 // methods needed by hash.Hash 277 278 // Size returns the size 279 func (self *Hasher) Size() int { 280 return self.size 281 } 282 283 // BlockSize returns the block size 284 func (self *Hasher) BlockSize() int { 285 return self.blocksize 286 } 287 288 // Sum returns the hash of the buffer 289 // hash.Hash interface Sum method appends the byte slice to the underlying 290 // data before it calculates and returns the hash of the chunk 291 func (self *Hasher) Sum(b []byte) (r []byte) { 292 t := self.bmt 293 i := self.cur 294 n := t.leaves[i] 295 j := i 296 // must run strictly before all nodes calculate 297 // datanodes are guaranteed to have a parent 298 if len(self.segment) > self.size && i > 0 && n.parent != nil { 299 n = n.parent 300 } else { 301 i *= 2 302 } 303 d := self.finalise(n, i) 304 self.writeSegment(j, self.segment, d) 305 c := <-self.result 306 self.releaseTree() 307 308 // sha3(length + BMT(pure_chunk)) 309 if self.blockLength == nil { 310 return c 311 } 312 res := self.pool.hasher() 313 res.Reset() 314 res.Write(self.blockLength) 315 res.Write(c) 316 return res.Sum(nil) 317 } 318 319 // Hasher implements the SwarmHash interface 320 321 // Hash waits for the hasher result and returns it 322 // caller must call this on a BMT Hasher being written to 323 func (self *Hasher) Hash() []byte { 324 return <-self.result 325 } 326 327 // Hasher implements the io.Writer interface 328 329 // Write fills the buffer to hash 330 // with every full segment complete launches a hasher go routine 331 // that shoots up the BMT 332 func (self *Hasher) Write(b []byte) (int, error) { 333 l := len(b) 334 if l <= 0 { 335 return 0, nil 336 } 337 s := self.segment 338 i := self.cur 339 count := (self.count + 1) / 2 340 need := self.count*self.size - self.cur*2*self.size 341 size := self.size 342 if need > size { 343 size *= 2 344 } 345 if l < need { 346 need = l 347 } 348 // calculate missing bit to complete current open segment 349 rest := size - len(s) 350 if need < rest { 351 rest = need 352 } 353 s = append(s, b[:rest]...) 354 need -= rest 355 // read full segments and the last possibly partial segment 356 for need > 0 && i < count-1 { 357 // push all finished chunks we read 358 self.writeSegment(i, s, self.depth) 359 need -= size 360 if need < 0 { 361 size += need 362 } 363 s = b[rest : rest+size] 364 rest += size 365 i++ 366 } 367 self.segment = s 368 self.cur = i 369 // otherwise, we can assume len(s) == 0, so all buffer is read and chunk is not yet full 370 return l, nil 371 } 372 373 // Hasher implements the io.ReaderFrom interface 374 375 // ReadFrom reads from io.Reader and appends to the data to hash using Write 376 // it reads so that chunk to hash is maximum length or reader reaches EOF 377 // caller must Reset the hasher prior to call 378 func (self *Hasher) ReadFrom(r io.Reader) (m int64, err error) { 379 bufsize := self.size*self.count - self.size*self.cur - len(self.segment) 380 buf := make([]byte, bufsize) 381 var read int 382 for { 383 var n int 384 n, err = r.Read(buf) 385 read += n 386 if err == io.EOF || read == len(buf) { 387 hash := self.Sum(buf[:n]) 388 if read == len(buf) { 389 err = NewEOC(hash) 390 } 391 break 392 } 393 if err != nil { 394 break 395 } 396 n, err = self.Write(buf[:n]) 397 if err != nil { 398 break 399 } 400 } 401 return int64(read), err 402 } 403 404 // Reset needs to be called before writing to the hasher 405 func (self *Hasher) Reset() { 406 self.getTree() 407 self.blockLength = nil 408 } 409 410 // Hasher implements the SwarmHash interface 411 412 // ResetWithLength needs to be called before writing to the hasher 413 // the argument is supposed to be the byte slice binary representation of 414 // the legth of the data subsumed under the hash 415 func (self *Hasher) ResetWithLength(l []byte) { 416 self.Reset() 417 self.blockLength = l 418 419 } 420 421 // Release gives back the Tree to the pool whereby it unlocks 422 // it resets tree, segment and index 423 func (self *Hasher) releaseTree() { 424 if self.bmt != nil { 425 n := self.bmt.leaves[self.cur] 426 for ; n != nil; n = n.parent { 427 n.unbalanced = false 428 if n.parent != nil { 429 n.root = false 430 } 431 } 432 self.pool.Release(self.bmt) 433 self.bmt = nil 434 435 } 436 self.cur = 0 437 self.segment = nil 438 } 439 440 func (self *Hasher) writeSegment(i int, s []byte, d int) { 441 h := self.pool.hasher() 442 n := self.bmt.leaves[i] 443 444 if len(s) > self.size && n.parent != nil { 445 go func() { 446 h.Reset() 447 h.Write(s) 448 s = h.Sum(nil) 449 450 if n.root { 451 self.result <- s 452 return 453 } 454 self.run(n.parent, h, d, n.index, s) 455 }() 456 return 457 } 458 go self.run(n, h, d, i*2, s) 459 } 460 461 func (self *Hasher) run(n *Node, h hash.Hash, d int, i int, s []byte) { 462 isLeft := i%2 == 0 463 for { 464 if isLeft { 465 n.left = s 466 } else { 467 n.right = s 468 } 469 if !n.unbalanced && n.toggle() { 470 return 471 } 472 if !n.unbalanced || !isLeft || i == 0 && d == 0 { 473 h.Reset() 474 h.Write(n.left) 475 h.Write(n.right) 476 s = h.Sum(nil) 477 478 } else { 479 s = append(n.left, n.right...) 480 } 481 482 self.hash = s 483 if n.root { 484 self.result <- s 485 return 486 } 487 488 isLeft = n.isLeft 489 n = n.parent 490 i++ 491 } 492 } 493 494 // getTree obtains a BMT resource by reserving one from the pool 495 func (self *Hasher) getTree() *Tree { 496 if self.bmt != nil { 497 return self.bmt 498 } 499 t := self.pool.Reserve() 500 self.bmt = t 501 return t 502 } 503 504 // atomic bool toggle implementing a concurrent reusable 2-state object 505 // atomic addint with %2 implements atomic bool toggle 506 // it returns true if the toggler just put it in the active/waiting state 507 func (self *Node) toggle() bool { 508 return atomic.AddInt32(&self.state, 1)%2 == 1 509 } 510 511 func hashstr(b []byte) string { 512 end := len(b) 513 if end > 4 { 514 end = 4 515 } 516 return fmt.Sprintf("%x", b[:end]) 517 } 518 519 func depth(n int) (d int) { 520 for l := (n - 1) / 2; l > 0; l /= 2 { 521 d++ 522 } 523 return d 524 } 525 526 // finalise is following the zigzags on the tree belonging 527 // to the final datasegment 528 func (self *Hasher) finalise(n *Node, i int) (d int) { 529 isLeft := i%2 == 0 530 for { 531 // when the final segment's path is going via left segments 532 // the incoming data is pushed to the parent upon pulling the left 533 // we do not need toogle the state since this condition is 534 // detectable 535 n.unbalanced = isLeft 536 n.right = nil 537 if n.initial { 538 n.root = true 539 return d 540 } 541 isLeft = n.isLeft 542 n = n.parent 543 d++ 544 } 545 } 546 547 // EOC (end of chunk) implements the error interface 548 type EOC struct { 549 Hash []byte // read the hash of the chunk off the error 550 } 551 552 // Error returns the error string 553 func (self *EOC) Error() string { 554 return fmt.Sprintf("hasher limit reached, chunk hash: %x", self.Hash) 555 } 556 557 // NewEOC creates new end of chunk error with the hash 558 func NewEOC(hash []byte) *EOC { 559 return &EOC{hash} 560 }