github.com/muhammedhassanm/blockchain@v0.0.0-20200120143007-697261defd4d/go-ethereum-master/swarm/bmt/bmt.go (about)

     1  // Copyright 2018 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  // Package bmt provides a binary merkle tree implementation
    18  package bmt
    19  
    20  import (
    21  	"fmt"
    22  	"hash"
    23  	"strings"
    24  	"sync"
    25  	"sync/atomic"
    26  )
    27  
    28  /*
    29  Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size
    30  It is defined as the root hash of the binary merkle tree built over fixed size segments
    31  of the underlying chunk using any base hash function (e.g keccak 256 SHA3).
    32  Chunk with data shorter than the fixed size are hashed as if they had zero padding
    33  
    34  BMT hash is used as the chunk hash function in swarm which in turn is the basis for the
    35  128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash
    36  
    37  The BMT is optimal for providing compact inclusion proofs, i.e. prove that a
    38  segment is a substring of a chunk starting at a particular offset
    39  The size of the underlying segments is fixed to the size of the base hash (called the resolution
    40  of the BMT hash), Using Keccak256 SHA3 hash is 32 bytes, the EVM word size to optimize for on-chain BMT verification
    41  as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash.
    42  
    43  Two implementations are provided:
    44  
    45  * RefHasher is optimized for code simplicity and meant as a reference implementation
    46    that is simple to understand
    47  * Hasher is optimized for speed taking advantage of concurrency with minimalistic
    48    control structure to coordinate the concurrent routines
    49    It implements the following interfaces
    50  	* standard golang hash.Hash
    51  	* SwarmHash
    52  	* io.Writer
    53  	* TODO: SegmentWriter
    54  */
    55  
    56  const (
    57  	// SegmentCount is the maximum number of segments of the underlying chunk
    58  	// Should be equal to max-chunk-data-size / hash-size
    59  	SegmentCount = 128
    60  	// PoolSize is the maximum number of bmt trees used by the hashers, i.e,
    61  	// the maximum number of concurrent BMT hashing operations performed by the same hasher
    62  	PoolSize = 8
    63  )
    64  
    65  // BaseHasherFunc is a hash.Hash constructor function used for the base hash of the BMT.
    66  // implemented by Keccak256 SHA3 sha3.NewKeccak256
    67  type BaseHasherFunc func() hash.Hash
    68  
    69  // Hasher a reusable hasher for fixed maximum size chunks representing a BMT
    70  // - implements the hash.Hash interface
    71  // - reuses a pool of trees for amortised memory allocation and resource control
    72  // - supports order-agnostic concurrent segment writes (TODO:)
    73  //   as well as sequential read and write
    74  // - the same hasher instance must not be called concurrently on more than one chunk
    75  // - the same hasher instance is synchronously reuseable
    76  // - Sum gives back the tree to the pool and guaranteed to leave
    77  //   the tree and itself in a state reusable for hashing a new chunk
    78  // - generates and verifies segment inclusion proofs (TODO:)
    79  type Hasher struct {
    80  	pool *TreePool // BMT resource pool
    81  	bmt  *tree     // prebuilt BMT resource for flowcontrol and proofs
    82  }
    83  
    84  // New creates a reusable Hasher
    85  // implements the hash.Hash interface
    86  // pulls a new tree from a resource pool for hashing each chunk
    87  func New(p *TreePool) *Hasher {
    88  	return &Hasher{
    89  		pool: p,
    90  	}
    91  }
    92  
    93  // TreePool provides a pool of trees used as resources by Hasher
    94  // a tree popped from the pool is guaranteed to have clean state
    95  // for hashing a new chunk
    96  type TreePool struct {
    97  	lock         sync.Mutex
    98  	c            chan *tree     // the channel to obtain a resource from the pool
    99  	hasher       BaseHasherFunc // base hasher to use for the BMT levels
   100  	SegmentSize  int            // size of leaf segments, stipulated to be = hash size
   101  	SegmentCount int            // the number of segments on the base level of the BMT
   102  	Capacity     int            // pool capacity, controls concurrency
   103  	Depth        int            // depth of the bmt trees = int(log2(segmentCount))+1
   104  	Datalength   int            // the total length of the data (count * size)
   105  	count        int            // current count of (ever) allocated resources
   106  	zerohashes   [][]byte       // lookup table for predictable padding subtrees for all levels
   107  }
   108  
   109  // NewTreePool creates a tree pool with hasher, segment size, segment count and capacity
   110  // on Hasher.getTree it reuses free trees or creates a new one if capacity is not reached
   111  func NewTreePool(hasher BaseHasherFunc, segmentCount, capacity int) *TreePool {
   112  	// initialises the zerohashes lookup table
   113  	depth := calculateDepthFor(segmentCount)
   114  	segmentSize := hasher().Size()
   115  	zerohashes := make([][]byte, depth)
   116  	zeros := make([]byte, segmentSize)
   117  	zerohashes[0] = zeros
   118  	h := hasher()
   119  	for i := 1; i < depth; i++ {
   120  		zeros = doHash(h, nil, zeros, zeros)
   121  		zerohashes[i] = zeros
   122  	}
   123  	return &TreePool{
   124  		c:            make(chan *tree, capacity),
   125  		hasher:       hasher,
   126  		SegmentSize:  segmentSize,
   127  		SegmentCount: segmentCount,
   128  		Capacity:     capacity,
   129  		Datalength:   segmentCount * segmentSize,
   130  		Depth:        depth,
   131  		zerohashes:   zerohashes,
   132  	}
   133  }
   134  
   135  // Drain drains the pool until it has no more than n resources
   136  func (p *TreePool) Drain(n int) {
   137  	p.lock.Lock()
   138  	defer p.lock.Unlock()
   139  	for len(p.c) > n {
   140  		<-p.c
   141  		p.count--
   142  	}
   143  }
   144  
   145  // Reserve is blocking until it returns an available tree
   146  // it reuses free trees or creates a new one if size is not reached
   147  // TODO: should use a context here
   148  func (p *TreePool) reserve() *tree {
   149  	p.lock.Lock()
   150  	defer p.lock.Unlock()
   151  	var t *tree
   152  	if p.count == p.Capacity {
   153  		return <-p.c
   154  	}
   155  	select {
   156  	case t = <-p.c:
   157  	default:
   158  		t = newTree(p.SegmentSize, p.Depth)
   159  		p.count++
   160  	}
   161  	return t
   162  }
   163  
   164  // release gives back a tree to the pool.
   165  // this tree is guaranteed to be in reusable state
   166  func (p *TreePool) release(t *tree) {
   167  	p.c <- t // can never fail ...
   168  }
   169  
   170  // tree is a reusable control structure representing a BMT
   171  // organised in a binary tree
   172  // Hasher uses a TreePool to obtain a tree for each chunk hash
   173  // the tree is 'locked' while not in the pool
   174  type tree struct {
   175  	leaves  []*node     // leaf nodes of the tree, other nodes accessible via parent links
   176  	cur     int         // index of rightmost currently open segment
   177  	offset  int         // offset (cursor position) within currently open segment
   178  	segment []byte      // the rightmost open segment (not complete)
   179  	section []byte      // the rightmost open section (double segment)
   180  	depth   int         // number of levels
   181  	result  chan []byte // result channel
   182  	hash    []byte      // to record the result
   183  	span    []byte      // The span of the data subsumed under the chunk
   184  }
   185  
   186  // node is a reuseable segment hasher representing a node in a BMT
   187  type node struct {
   188  	isLeft      bool   // whether it is left side of the parent double segment
   189  	parent      *node  // pointer to parent node in the BMT
   190  	state       int32  // atomic increment impl concurrent boolean toggle
   191  	left, right []byte // this is where the content segment is set
   192  }
   193  
   194  // newNode constructs a segment hasher node in the BMT (used by newTree)
   195  func newNode(index int, parent *node) *node {
   196  	return &node{
   197  		parent: parent,
   198  		isLeft: index%2 == 0,
   199  	}
   200  }
   201  
   202  // Draw draws the BMT (badly)
   203  func (t *tree) draw(hash []byte) string {
   204  	var left, right []string
   205  	var anc []*node
   206  	for i, n := range t.leaves {
   207  		left = append(left, fmt.Sprintf("%v", hashstr(n.left)))
   208  		if i%2 == 0 {
   209  			anc = append(anc, n.parent)
   210  		}
   211  		right = append(right, fmt.Sprintf("%v", hashstr(n.right)))
   212  	}
   213  	anc = t.leaves
   214  	var hashes [][]string
   215  	for l := 0; len(anc) > 0; l++ {
   216  		var nodes []*node
   217  		hash := []string{""}
   218  		for i, n := range anc {
   219  			hash = append(hash, fmt.Sprintf("%v|%v", hashstr(n.left), hashstr(n.right)))
   220  			if i%2 == 0 && n.parent != nil {
   221  				nodes = append(nodes, n.parent)
   222  			}
   223  		}
   224  		hash = append(hash, "")
   225  		hashes = append(hashes, hash)
   226  		anc = nodes
   227  	}
   228  	hashes = append(hashes, []string{"", fmt.Sprintf("%v", hashstr(hash)), ""})
   229  	total := 60
   230  	del := "                             "
   231  	var rows []string
   232  	for i := len(hashes) - 1; i >= 0; i-- {
   233  		var textlen int
   234  		hash := hashes[i]
   235  		for _, s := range hash {
   236  			textlen += len(s)
   237  		}
   238  		if total < textlen {
   239  			total = textlen + len(hash)
   240  		}
   241  		delsize := (total - textlen) / (len(hash) - 1)
   242  		if delsize > len(del) {
   243  			delsize = len(del)
   244  		}
   245  		row := fmt.Sprintf("%v: %v", len(hashes)-i-1, strings.Join(hash, del[:delsize]))
   246  		rows = append(rows, row)
   247  
   248  	}
   249  	rows = append(rows, strings.Join(left, "  "))
   250  	rows = append(rows, strings.Join(right, "  "))
   251  	return strings.Join(rows, "\n") + "\n"
   252  }
   253  
   254  // newTree initialises a tree by building up the nodes of a BMT
   255  // - segment size is stipulated to be the size of the hash
   256  func newTree(segmentSize, depth int) *tree {
   257  	n := newNode(0, nil)
   258  	prevlevel := []*node{n}
   259  	// iterate over levels and creates 2^(depth-level) nodes
   260  	count := 2
   261  	for level := depth - 2; level >= 0; level-- {
   262  		nodes := make([]*node, count)
   263  		for i := 0; i < count; i++ {
   264  			parent := prevlevel[i/2]
   265  			nodes[i] = newNode(i, parent)
   266  		}
   267  		prevlevel = nodes
   268  		count *= 2
   269  	}
   270  	// the datanode level is the nodes on the last level
   271  	return &tree{
   272  		leaves:  prevlevel,
   273  		result:  make(chan []byte, 1),
   274  		segment: make([]byte, segmentSize),
   275  		section: make([]byte, 2*segmentSize),
   276  	}
   277  }
   278  
   279  // methods needed by hash.Hash
   280  
   281  // Size returns the size
   282  func (h *Hasher) Size() int {
   283  	return h.pool.SegmentSize
   284  }
   285  
   286  // BlockSize returns the block size
   287  func (h *Hasher) BlockSize() int {
   288  	return h.pool.SegmentSize
   289  }
   290  
   291  // Hash hashes the data and the span using the bmt hasher
   292  func Hash(h *Hasher, span, data []byte) []byte {
   293  	h.ResetWithLength(span)
   294  	h.Write(data)
   295  	return h.Sum(nil)
   296  }
   297  
   298  // Datalength returns the maximum data size that is hashed by the hasher =
   299  // segment count times segment size
   300  func (h *Hasher) DataLength() int {
   301  	return h.pool.Datalength
   302  }
   303  
   304  // Sum returns the hash of the buffer
   305  // hash.Hash interface Sum method appends the byte slice to the underlying
   306  // data before it calculates and returns the hash of the chunk
   307  // caller must make sure Sum is not called concurrently with Write, writeSection
   308  // and WriteSegment (TODO:)
   309  func (h *Hasher) Sum(b []byte) (r []byte) {
   310  	return h.sum(b, true, true)
   311  }
   312  
   313  // sum implements Sum taking parameters
   314  // * if the tree is released right away
   315  // * if sequential write is used (can read sections)
   316  func (h *Hasher) sum(b []byte, release, section bool) (r []byte) {
   317  	t := h.bmt
   318  	bh := h.pool.hasher()
   319  	go h.writeSection(t.cur, t.section, true)
   320  	bmtHash := <-t.result
   321  	span := t.span
   322  	// fmt.Println(t.draw(bmtHash))
   323  	if release {
   324  		h.releaseTree()
   325  	}
   326  	// b + sha3(span + BMT(pure_chunk))
   327  	if span == nil {
   328  		return append(b, bmtHash...)
   329  	}
   330  	return doHash(bh, b, span, bmtHash)
   331  }
   332  
   333  // Hasher implements the SwarmHash interface
   334  
   335  // Hasher implements the io.Writer interface
   336  
   337  // Write fills the buffer to hash,
   338  // with every full segment calls writeSection
   339  func (h *Hasher) Write(b []byte) (int, error) {
   340  	l := len(b)
   341  	if l <= 0 {
   342  		return 0, nil
   343  	}
   344  	t := h.bmt
   345  	secsize := 2 * h.pool.SegmentSize
   346  	// calculate length of missing bit to complete current open section
   347  	smax := secsize - t.offset
   348  	// if at the beginning of chunk or middle of the section
   349  	if t.offset < secsize {
   350  		// fill up current segment from buffer
   351  		copy(t.section[t.offset:], b)
   352  		// if input buffer consumed and open section not complete, then
   353  		// advance offset and return
   354  		if smax == 0 {
   355  			smax = secsize
   356  		}
   357  		if l <= smax {
   358  			t.offset += l
   359  			return l, nil
   360  		}
   361  	} else {
   362  		if t.cur == h.pool.SegmentCount*2 {
   363  			return 0, nil
   364  		}
   365  	}
   366  	// read full segments and the last possibly partial segment from the input buffer
   367  	for smax < l {
   368  		// section complete; push to tree asynchronously
   369  		go h.writeSection(t.cur, t.section, false)
   370  		// reset section
   371  		t.section = make([]byte, secsize)
   372  		// copy from imput buffer at smax to right half of section
   373  		copy(t.section, b[smax:])
   374  		// advance cursor
   375  		t.cur++
   376  		// smax here represents successive offsets in the input buffer
   377  		smax += secsize
   378  	}
   379  	t.offset = l - smax + secsize
   380  	return l, nil
   381  }
   382  
   383  // Reset needs to be called before writing to the hasher
   384  func (h *Hasher) Reset() {
   385  	h.getTree()
   386  }
   387  
   388  // Hasher implements the SwarmHash interface
   389  
   390  // ResetWithLength needs to be called before writing to the hasher
   391  // the argument is supposed to be the byte slice binary representation of
   392  // the length of the data subsumed under the hash, i.e., span
   393  func (h *Hasher) ResetWithLength(span []byte) {
   394  	h.Reset()
   395  	h.bmt.span = span
   396  }
   397  
   398  // releaseTree gives back the Tree to the pool whereby it unlocks
   399  // it resets tree, segment and index
   400  func (h *Hasher) releaseTree() {
   401  	t := h.bmt
   402  	if t != nil {
   403  		t.cur = 0
   404  		t.offset = 0
   405  		t.span = nil
   406  		t.hash = nil
   407  		h.bmt = nil
   408  		t.section = make([]byte, h.pool.SegmentSize*2)
   409  		t.segment = make([]byte, h.pool.SegmentSize)
   410  		h.pool.release(t)
   411  	}
   412  }
   413  
   414  // TODO: writeSegment writes the ith segment into the BMT tree
   415  // func (h *Hasher) writeSegment(i int, s []byte) {
   416  // 	go h.run(h.bmt.leaves[i/2], h.pool.hasher(), i%2 == 0, s)
   417  // }
   418  
   419  // writeSection writes the hash of i-th section into level 1 node of the BMT tree
   420  func (h *Hasher) writeSection(i int, section []byte, final bool) {
   421  	// select the leaf node for the section
   422  	n := h.bmt.leaves[i]
   423  	isLeft := n.isLeft
   424  	n = n.parent
   425  	bh := h.pool.hasher()
   426  	// hash the section
   427  	s := doHash(bh, nil, section)
   428  	// write hash into parent node
   429  	if final {
   430  		// for the last segment use writeFinalNode
   431  		h.writeFinalNode(1, n, bh, isLeft, s)
   432  	} else {
   433  		h.writeNode(n, bh, isLeft, s)
   434  	}
   435  }
   436  
   437  // writeNode pushes the data to the node
   438  // if it is the first of 2 sisters written the routine returns
   439  // if it is the second, it calculates the hash and writes it
   440  // to the parent node recursively
   441  func (h *Hasher) writeNode(n *node, bh hash.Hash, isLeft bool, s []byte) {
   442  	level := 1
   443  	for {
   444  		// at the root of the bmt just write the result to the result channel
   445  		if n == nil {
   446  			h.bmt.result <- s
   447  			return
   448  		}
   449  		// otherwise assign child hash to branc
   450  		if isLeft {
   451  			n.left = s
   452  		} else {
   453  			n.right = s
   454  		}
   455  		// the child-thread first arriving will quit
   456  		if n.toggle() {
   457  			return
   458  		}
   459  		// the thread coming later now can be sure both left and right children are written
   460  		// it calculates the hash of left|right and pushes it to the parent
   461  		s = doHash(bh, nil, n.left, n.right)
   462  		isLeft = n.isLeft
   463  		n = n.parent
   464  		level++
   465  	}
   466  }
   467  
   468  // writeFinalNode is following the path starting from the final datasegment to the
   469  // BMT root via parents
   470  // for unbalanced trees it fills in the missing right sister nodes using
   471  // the pool's lookup table for BMT subtree root hashes for all-zero sections
   472  // otherwise behaves like `writeNode`
   473  func (h *Hasher) writeFinalNode(level int, n *node, bh hash.Hash, isLeft bool, s []byte) {
   474  
   475  	for {
   476  		// at the root of the bmt just write the result to the result channel
   477  		if n == nil {
   478  			if s != nil {
   479  				h.bmt.result <- s
   480  			}
   481  			return
   482  		}
   483  		var noHash bool
   484  		if isLeft {
   485  			// coming from left sister branch
   486  			// when the final section's path is going via left child node
   487  			// we include an all-zero subtree hash for the right level and toggle the node.
   488  			// when the path is going through right child node, nothing to do
   489  			n.right = h.pool.zerohashes[level]
   490  			if s != nil {
   491  				n.left = s
   492  				// if a left final node carries a hash, it must be the first (and only thread)
   493  				// so the toggle is already in passive state no need no call
   494  				// yet thread needs to carry on pushing hash to parent
   495  			} else {
   496  				// if again first thread then propagate nil and calculate no hash
   497  				noHash = n.toggle()
   498  			}
   499  		} else {
   500  			// right sister branch
   501  			// if s is nil, then thread arrived first at previous node and here there will be two,
   502  			// so no need to do anything
   503  			if s != nil {
   504  				n.right = s
   505  				noHash = n.toggle()
   506  			} else {
   507  				noHash = true
   508  			}
   509  		}
   510  		// the child-thread first arriving will just continue resetting s to nil
   511  		// the second thread now can be sure both left and right children are written
   512  		// it calculates the hash of left|right and pushes it to the parent
   513  		if noHash {
   514  			s = nil
   515  		} else {
   516  			s = doHash(bh, nil, n.left, n.right)
   517  		}
   518  		isLeft = n.isLeft
   519  		n = n.parent
   520  		level++
   521  	}
   522  }
   523  
   524  // getTree obtains a BMT resource by reserving one from the pool
   525  func (h *Hasher) getTree() *tree {
   526  	if h.bmt != nil {
   527  		return h.bmt
   528  	}
   529  	t := h.pool.reserve()
   530  	h.bmt = t
   531  	return t
   532  }
   533  
   534  // atomic bool toggle implementing a concurrent reusable 2-state object
   535  // atomic addint with %2 implements atomic bool toggle
   536  // it returns true if the toggler just put it in the active/waiting state
   537  func (n *node) toggle() bool {
   538  	return atomic.AddInt32(&n.state, 1)%2 == 1
   539  }
   540  
   541  // calculates the hash of the data using hash.Hash
   542  func doHash(h hash.Hash, b []byte, data ...[]byte) []byte {
   543  	h.Reset()
   544  	for _, v := range data {
   545  		h.Write(v)
   546  	}
   547  	return h.Sum(b)
   548  }
   549  
   550  func hashstr(b []byte) string {
   551  	end := len(b)
   552  	if end > 4 {
   553  		end = 4
   554  	}
   555  	return fmt.Sprintf("%x", b[:end])
   556  }
   557  
   558  // calculateDepthFor calculates the depth (number of levels) in the BMT tree
   559  func calculateDepthFor(n int) (d int) {
   560  	c := 2
   561  	for ; c < n; c *= 2 {
   562  		d++
   563  	}
   564  	return d + 1
   565  }