github.com/waltonchain/waltonchain_gwtc_src@v1.1.4-0.20201225072101-8a298c95a819/bmt/bmt.go (about)

     1  // Copyright 2017 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-wtc library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-wtc library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  // Package bmt provides a binary merkle tree implementation
    18  package bmt
    19  
    20  import (
    21  	"fmt"
    22  	"hash"
    23  	"io"
    24  	"strings"
    25  	"sync"
    26  	"sync/atomic"
    27  )
    28  
    29  /*
    30  Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size
    31  It is defined as the root hash of the binary merkle tree built over fixed size segments
    32  of the underlying chunk using any base hash function (e.g keccak 256 SHA3)
    33  
    34  It is used as the chunk hash function in swarm which in turn is the basis for the
    35  128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash
    36  
    37  The BMT is optimal for providing compact inclusion proofs, i.e. prove that a
    38  segment is a substring of a chunk starting at a particular offset
    39  The size of the underlying segments is fixed at 32 bytes (called the resolution
    40  of the BMT hash), the EVM word size to optimize for on-chain BMT verification
    41  as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash.
    42  
    43  Two implementations are provided:
    44  
    45  * RefHasher is optimized for code simplicity and meant as a reference implementation
    46  * Hasher is optimized for speed taking advantage of concurrency with minimalistic
    47    control structure to coordinate the concurrent routines
    48    It implements the ChunkHash interface as well as the go standard hash.Hash interface
    49  
    50  */
    51  
    52  const (
    53  	// DefaultSegmentCount is the maximum number of segments of the underlying chunk
    54  	DefaultSegmentCount = 128 // Should be equal to storage.DefaultBranches
    55  	// DefaultPoolSize is the maximum number of bmt trees used by the hashers, i.e,
    56  	// the maximum number of concurrent BMT hashing operations performed by the same hasher
    57  	DefaultPoolSize = 8
    58  )
    59  
    60  // BaseHasher is a hash.Hash constructor function used for the base hash of the  BMT.
    61  type BaseHasher func() hash.Hash
    62  
    63  // Hasher a reusable hasher for fixed maximum size chunks representing a BMT
    64  // implements the hash.Hash interface
    65  // reuse pool of Tree-s for amortised memory allocation and resource control
    66  // supports order-agnostic concurrent segment writes
    67  // as well as sequential read and write
    68  // can not be called concurrently on more than one chunk
    69  // can be further appended after Sum
    70  // Reset gives back the Tree to the pool and guaranteed to leave
    71  // the tree and itself in a state reusable for hashing a new chunk
    72  type Hasher struct {
    73  	pool        *TreePool   // BMT resource pool
    74  	bmt         *Tree       // prebuilt BMT resource for flowcontrol and proofs
    75  	blocksize   int         // segment size (size of hash) also for hash.Hash
    76  	count       int         // segment count
    77  	size        int         // for hash.Hash same as hashsize
    78  	cur         int         // cursor position for righmost currently open chunk
    79  	segment     []byte      // the rightmost open segment (not complete)
    80  	depth       int         // index of last level
    81  	result      chan []byte // result channel
    82  	hash        []byte      // to record the result
    83  	max         int32       // max segments for SegmentWriter interface
    84  	blockLength []byte      // The block length that needes to be added in Sum
    85  }
    86  
    87  // New creates a reusable Hasher
    88  // implements the hash.Hash interface
    89  // pulls a new Tree from a resource pool for hashing each chunk
    90  func New(p *TreePool) *Hasher {
    91  	return &Hasher{
    92  		pool:      p,
    93  		depth:     depth(p.SegmentCount),
    94  		size:      p.SegmentSize,
    95  		blocksize: p.SegmentSize,
    96  		count:     p.SegmentCount,
    97  		result:    make(chan []byte),
    98  	}
    99  }
   100  
   101  // Node is a reuseable segment hasher representing a node in a BMT
   102  // it allows for continued writes after a Sum
   103  // and is left in completely reusable state after Reset
   104  type Node struct {
   105  	level, index int   // position of node for information/logging only
   106  	initial      bool  // first and last node
   107  	root         bool  // whether the node is root to a smaller BMT
   108  	isLeft       bool  // whether it is left side of the parent double segment
   109  	unbalanced   bool  // indicates if a node has only the left segment
   110  	parent       *Node // BMT connections
   111  	state        int32 // atomic increment impl concurrent boolean toggle
   112  	left, right  []byte
   113  }
   114  
   115  // NewNode constructor for segment hasher nodes in the BMT
   116  func NewNode(level, index int, parent *Node) *Node {
   117  	return &Node{
   118  		parent:  parent,
   119  		level:   level,
   120  		index:   index,
   121  		initial: index == 0,
   122  		isLeft:  index%2 == 0,
   123  	}
   124  }
   125  
   126  // TreePool provides a pool of Trees used as resources by Hasher
   127  // a Tree popped from the pool is guaranteed to have clean state
   128  // for hashing a new chunk
   129  // Hasher Reset releases the Tree to the pool
   130  type TreePool struct {
   131  	lock         sync.Mutex
   132  	c            chan *Tree
   133  	hasher       BaseHasher
   134  	SegmentSize  int
   135  	SegmentCount int
   136  	Capacity     int
   137  	count        int
   138  }
   139  
   140  // NewTreePool creates a Tree pool with hasher, segment size, segment count and capacity
   141  // on GetTree it reuses free Trees or creates a new one if size is not reached
   142  func NewTreePool(hasher BaseHasher, segmentCount, capacity int) *TreePool {
   143  	return &TreePool{
   144  		c:            make(chan *Tree, capacity),
   145  		hasher:       hasher,
   146  		SegmentSize:  hasher().Size(),
   147  		SegmentCount: segmentCount,
   148  		Capacity:     capacity,
   149  	}
   150  }
   151  
   152  // Drain drains the pool uptil it has no more than n resources
   153  func (self *TreePool) Drain(n int) {
   154  	self.lock.Lock()
   155  	defer self.lock.Unlock()
   156  	for len(self.c) > n {
   157  		<-self.c
   158  		self.count--
   159  	}
   160  }
   161  
   162  // Reserve is blocking until it returns an available Tree
   163  // it reuses free Trees or creates a new one if size is not reached
   164  func (self *TreePool) Reserve() *Tree {
   165  	self.lock.Lock()
   166  	defer self.lock.Unlock()
   167  	var t *Tree
   168  	if self.count == self.Capacity {
   169  		return <-self.c
   170  	}
   171  	select {
   172  	case t = <-self.c:
   173  	default:
   174  		t = NewTree(self.hasher, self.SegmentSize, self.SegmentCount)
   175  		self.count++
   176  	}
   177  	return t
   178  }
   179  
   180  // Release gives back a Tree to the pool.
   181  // This Tree is guaranteed to be in reusable state
   182  // does not need locking
   183  func (self *TreePool) Release(t *Tree) {
   184  	self.c <- t // can never fail but...
   185  }
   186  
   187  // Tree is a reusable control structure representing a BMT
   188  // organised in a binary tree
   189  // Hasher uses a TreePool to pick one for each chunk hash
   190  // the Tree is 'locked' while not in the pool
   191  type Tree struct {
   192  	leaves []*Node
   193  }
   194  
   195  // Draw draws the BMT (badly)
   196  func (self *Tree) Draw(hash []byte, d int) string {
   197  	var left, right []string
   198  	var anc []*Node
   199  	for i, n := range self.leaves {
   200  		left = append(left, fmt.Sprintf("%v", hashstr(n.left)))
   201  		if i%2 == 0 {
   202  			anc = append(anc, n.parent)
   203  		}
   204  		right = append(right, fmt.Sprintf("%v", hashstr(n.right)))
   205  	}
   206  	anc = self.leaves
   207  	var hashes [][]string
   208  	for l := 0; len(anc) > 0; l++ {
   209  		var nodes []*Node
   210  		hash := []string{""}
   211  		for i, n := range anc {
   212  			hash = append(hash, fmt.Sprintf("%v|%v", hashstr(n.left), hashstr(n.right)))
   213  			if i%2 == 0 && n.parent != nil {
   214  				nodes = append(nodes, n.parent)
   215  			}
   216  		}
   217  		hash = append(hash, "")
   218  		hashes = append(hashes, hash)
   219  		anc = nodes
   220  	}
   221  	hashes = append(hashes, []string{"", fmt.Sprintf("%v", hashstr(hash)), ""})
   222  	total := 60
   223  	del := "                             "
   224  	var rows []string
   225  	for i := len(hashes) - 1; i >= 0; i-- {
   226  		var textlen int
   227  		hash := hashes[i]
   228  		for _, s := range hash {
   229  			textlen += len(s)
   230  		}
   231  		if total < textlen {
   232  			total = textlen + len(hash)
   233  		}
   234  		delsize := (total - textlen) / (len(hash) - 1)
   235  		if delsize > len(del) {
   236  			delsize = len(del)
   237  		}
   238  		row := fmt.Sprintf("%v: %v", len(hashes)-i-1, strings.Join(hash, del[:delsize]))
   239  		rows = append(rows, row)
   240  
   241  	}
   242  	rows = append(rows, strings.Join(left, "  "))
   243  	rows = append(rows, strings.Join(right, "  "))
   244  	return strings.Join(rows, "\n") + "\n"
   245  }
   246  
   247  // NewTree initialises the Tree by building up the nodes of a BMT
   248  // segment size is stipulated to be the size of the hash
   249  // segmentCount needs to be positive integer and does not need to be
   250  // a power of two and can even be an odd number
   251  // segmentSize * segmentCount determines the maximum chunk size
   252  // hashed using the tree
   253  func NewTree(hasher BaseHasher, segmentSize, segmentCount int) *Tree {
   254  	n := NewNode(0, 0, nil)
   255  	n.root = true
   256  	prevlevel := []*Node{n}
   257  	// iterate over levels and creates 2^level nodes
   258  	level := 1
   259  	count := 2
   260  	for d := 1; d <= depth(segmentCount); d++ {
   261  		nodes := make([]*Node, count)
   262  		for i := 0; i < len(nodes); i++ {
   263  			var parent *Node
   264  			parent = prevlevel[i/2]
   265  			t := NewNode(level, i, parent)
   266  			nodes[i] = t
   267  		}
   268  		prevlevel = nodes
   269  		level++
   270  		count *= 2
   271  	}
   272  	// the datanode level is the nodes on the last level where
   273  	return &Tree{
   274  		leaves: prevlevel,
   275  	}
   276  }
   277  
   278  // methods needed by hash.Hash
   279  
   280  // Size returns the size
   281  func (self *Hasher) Size() int {
   282  	return self.size
   283  }
   284  
   285  // BlockSize returns the block size
   286  func (self *Hasher) BlockSize() int {
   287  	return self.blocksize
   288  }
   289  
   290  // Sum returns the hash of the buffer
   291  // hash.Hash interface Sum method appends the byte slice to the underlying
   292  // data before it calculates and returns the hash of the chunk
   293  func (self *Hasher) Sum(b []byte) (r []byte) {
   294  	t := self.bmt
   295  	i := self.cur
   296  	n := t.leaves[i]
   297  	j := i
   298  	// must run strictly before all nodes calculate
   299  	// datanodes are guaranteed to have a parent
   300  	if len(self.segment) > self.size && i > 0 && n.parent != nil {
   301  		n = n.parent
   302  	} else {
   303  		i *= 2
   304  	}
   305  	d := self.finalise(n, i)
   306  	self.writeSegment(j, self.segment, d)
   307  	c := <-self.result
   308  	self.releaseTree()
   309  
   310  	// sha3(length + BMT(pure_chunk))
   311  	if self.blockLength == nil {
   312  		return c
   313  	}
   314  	res := self.pool.hasher()
   315  	res.Reset()
   316  	res.Write(self.blockLength)
   317  	res.Write(c)
   318  	return res.Sum(nil)
   319  }
   320  
   321  // Hasher implements the SwarmHash interface
   322  
   323  // Hash waits for the hasher result and returns it
   324  // caller must call this on a BMT Hasher being written to
   325  func (self *Hasher) Hash() []byte {
   326  	return <-self.result
   327  }
   328  
   329  // Hasher implements the io.Writer interface
   330  
   331  // Write fills the buffer to hash
   332  // with every full segment complete launches a hasher go routine
   333  // that shoots up the BMT
   334  func (self *Hasher) Write(b []byte) (int, error) {
   335  	l := len(b)
   336  	if l <= 0 {
   337  		return 0, nil
   338  	}
   339  	s := self.segment
   340  	i := self.cur
   341  	count := (self.count + 1) / 2
   342  	need := self.count*self.size - self.cur*2*self.size
   343  	size := self.size
   344  	if need > size {
   345  		size *= 2
   346  	}
   347  	if l < need {
   348  		need = l
   349  	}
   350  	// calculate missing bit to complete current open segment
   351  	rest := size - len(s)
   352  	if need < rest {
   353  		rest = need
   354  	}
   355  	s = append(s, b[:rest]...)
   356  	need -= rest
   357  	// read full segments and the last possibly partial segment
   358  	for need > 0 && i < count-1 {
   359  		// push all finished chunks we read
   360  		self.writeSegment(i, s, self.depth)
   361  		need -= size
   362  		if need < 0 {
   363  			size += need
   364  		}
   365  		s = b[rest : rest+size]
   366  		rest += size
   367  		i++
   368  	}
   369  	self.segment = s
   370  	self.cur = i
   371  	// otherwise, we can assume len(s) == 0, so all buffer is read and chunk is not yet full
   372  	return l, nil
   373  }
   374  
   375  // Hasher implements the io.ReaderFrom interface
   376  
   377  // ReadFrom reads from io.Reader and appends to the data to hash using Write
   378  // it reads so that chunk to hash is maximum length or reader reaches EOF
   379  // caller must Reset the hasher prior to call
   380  func (self *Hasher) ReadFrom(r io.Reader) (m int64, err error) {
   381  	bufsize := self.size*self.count - self.size*self.cur - len(self.segment)
   382  	buf := make([]byte, bufsize)
   383  	var read int
   384  	for {
   385  		var n int
   386  		n, err = r.Read(buf)
   387  		read += n
   388  		if err == io.EOF || read == len(buf) {
   389  			hash := self.Sum(buf[:n])
   390  			if read == len(buf) {
   391  				err = NewEOC(hash)
   392  			}
   393  			break
   394  		}
   395  		if err != nil {
   396  			break
   397  		}
   398  		n, err = self.Write(buf[:n])
   399  		if err != nil {
   400  			break
   401  		}
   402  	}
   403  	return int64(read), err
   404  }
   405  
   406  // Reset needs to be called before writing to the hasher
   407  func (self *Hasher) Reset() {
   408  	self.getTree()
   409  	self.blockLength = nil
   410  }
   411  
   412  // Hasher implements the SwarmHash interface
   413  
   414  // ResetWithLength needs to be called before writing to the hasher
   415  // the argument is supposed to be the byte slice binary representation of
   416  // the legth of the data subsumed under the hash
   417  func (self *Hasher) ResetWithLength(l []byte) {
   418  	self.Reset()
   419  	self.blockLength = l
   420  
   421  }
   422  
   423  // Release gives back the Tree to the pool whereby it unlocks
   424  // it resets tree, segment and index
   425  func (self *Hasher) releaseTree() {
   426  	if self.bmt != nil {
   427  		n := self.bmt.leaves[self.cur]
   428  		for ; n != nil; n = n.parent {
   429  			n.unbalanced = false
   430  			if n.parent != nil {
   431  				n.root = false
   432  			}
   433  		}
   434  		self.pool.Release(self.bmt)
   435  		self.bmt = nil
   436  
   437  	}
   438  	self.cur = 0
   439  	self.segment = nil
   440  }
   441  
   442  func (self *Hasher) writeSegment(i int, s []byte, d int) {
   443  	h := self.pool.hasher()
   444  	n := self.bmt.leaves[i]
   445  
   446  	if len(s) > self.size && n.parent != nil {
   447  		go func() {
   448  			h.Reset()
   449  			h.Write(s)
   450  			s = h.Sum(nil)
   451  
   452  			if n.root {
   453  				self.result <- s
   454  				return
   455  			}
   456  			self.run(n.parent, h, d, n.index, s)
   457  		}()
   458  		return
   459  	}
   460  	go self.run(n, h, d, i*2, s)
   461  }
   462  
   463  func (self *Hasher) run(n *Node, h hash.Hash, d int, i int, s []byte) {
   464  	isLeft := i%2 == 0
   465  	for {
   466  		if isLeft {
   467  			n.left = s
   468  		} else {
   469  			n.right = s
   470  		}
   471  		if !n.unbalanced && n.toggle() {
   472  			return
   473  		}
   474  		if !n.unbalanced || !isLeft || i == 0 && d == 0 {
   475  			h.Reset()
   476  			h.Write(n.left)
   477  			h.Write(n.right)
   478  			s = h.Sum(nil)
   479  
   480  		} else {
   481  			s = append(n.left, n.right...)
   482  		}
   483  
   484  		self.hash = s
   485  		if n.root {
   486  			self.result <- s
   487  			return
   488  		}
   489  
   490  		isLeft = n.isLeft
   491  		n = n.parent
   492  		i++
   493  	}
   494  }
   495  
   496  // getTree obtains a BMT resource by reserving one from the pool
   497  func (self *Hasher) getTree() *Tree {
   498  	if self.bmt != nil {
   499  		return self.bmt
   500  	}
   501  	t := self.pool.Reserve()
   502  	self.bmt = t
   503  	return t
   504  }
   505  
   506  // atomic bool toggle implementing a concurrent reusable 2-state object
   507  // atomic addint with %2 implements atomic bool toggle
   508  // it returns true if the toggler just put it in the active/waiting state
   509  func (self *Node) toggle() bool {
   510  	return atomic.AddInt32(&self.state, 1)%2 == 1
   511  }
   512  
   513  func hashstr(b []byte) string {
   514  	end := len(b)
   515  	if end > 4 {
   516  		end = 4
   517  	}
   518  	return fmt.Sprintf("%x", b[:end])
   519  }
   520  
   521  func depth(n int) (d int) {
   522  	for l := (n - 1) / 2; l > 0; l /= 2 {
   523  		d++
   524  	}
   525  	return d
   526  }
   527  
   528  // finalise is following the zigzags on the tree belonging
   529  // to the final datasegment
   530  func (self *Hasher) finalise(n *Node, i int) (d int) {
   531  	isLeft := i%2 == 0
   532  	for {
   533  		// when the final segment's path is going via left segments
   534  		// the incoming data is pushed to the parent upon pulling the left
   535  		// we do not need toogle the state since this condition is
   536  		// detectable
   537  		n.unbalanced = isLeft
   538  		n.right = nil
   539  		if n.initial {
   540  			n.root = true
   541  			return d
   542  		}
   543  		isLeft = n.isLeft
   544  		n = n.parent
   545  		d++
   546  	}
   547  }
   548  
   549  // EOC (end of chunk) implements the error interface
   550  type EOC struct {
   551  	Hash []byte // read the hash of the chunk off the error
   552  }
   553  
   554  // Error returns the error string
   555  func (self *EOC) Error() string {
   556  	return fmt.Sprintf("hasher limit reached, chunk hash: %x", self.Hash)
   557  }
   558  
   559  // NewEOC creates new end of chunk error with the hash
   560  func NewEOC(hash []byte) *EOC {
   561  	return &EOC{hash}
   562  }