github.com/gobitfly/go-ethereum@v1.8.12/swarm/bmt/bmt.go (about)

     1  // Copyright 2018 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  // Package bmt provides a binary merkle tree implementation
    18  package bmt
    19  
    20  import (
    21  	"fmt"
    22  	"hash"
    23  	"strings"
    24  	"sync"
    25  	"sync/atomic"
    26  )
    27  
    28  /*
    29  Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size
    30  It is defined as the root hash of the binary merkle tree built over fixed size segments
    31  of the underlying chunk using any base hash function (e.g keccak 256 SHA3).
    32  Chunk with data shorter than the fixed size are hashed as if they had zero padding
    33  
    34  BMT hash is used as the chunk hash function in swarm which in turn is the basis for the
    35  128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash
    36  
    37  The BMT is optimal for providing compact inclusion proofs, i.e. prove that a
    38  segment is a substring of a chunk starting at a particular offset
    39  The size of the underlying segments is fixed to the size of the base hash (called the resolution
    40  of the BMT hash), Using Keccak256 SHA3 hash is 32 bytes, the EVM word size to optimize for on-chain BMT verification
    41  as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash.
    42  
    43  Two implementations are provided:
    44  
    45  * RefHasher is optimized for code simplicity and meant as a reference implementation
    46    that is simple to understand
    47  * Hasher is optimized for speed taking advantage of concurrency with minimalistic
    48    control structure to coordinate the concurrent routines
    49    It implements the following interfaces
    50  	* standard golang hash.Hash
    51  	* SwarmHash
    52  	* io.Writer
    53  	* TODO: SegmentWriter
    54  */
    55  
    56  const (
    57  	// SegmentCount is the maximum number of segments of the underlying chunk
    58  	// Should be equal to max-chunk-data-size / hash-size
    59  	SegmentCount = 128
    60  	// PoolSize is the maximum number of bmt trees used by the hashers, i.e,
    61  	// the maximum number of concurrent BMT hashing operations performed by the same hasher
    62  	PoolSize = 8
    63  )
    64  
    65  // BaseHasherFunc is a hash.Hash constructor function used for the base hash of the BMT.
    66  // implemented by Keccak256 SHA3 sha3.NewKeccak256
    67  type BaseHasherFunc func() hash.Hash
    68  
    69  // Hasher a reusable hasher for fixed maximum size chunks representing a BMT
    70  // - implements the hash.Hash interface
    71  // - reuses a pool of trees for amortised memory allocation and resource control
    72  // - supports order-agnostic concurrent segment writes (TODO:)
    73  //   as well as sequential read and write
    74  // - the same hasher instance must not be called concurrently on more than one chunk
    75  // - the same hasher instance is synchronously reuseable
    76  // - Sum gives back the tree to the pool and guaranteed to leave
    77  //   the tree and itself in a state reusable for hashing a new chunk
    78  // - generates and verifies segment inclusion proofs (TODO:)
    79  type Hasher struct {
    80  	pool *TreePool // BMT resource pool
    81  	bmt  *tree     // prebuilt BMT resource for flowcontrol and proofs
    82  }
    83  
    84  // New creates a reusable Hasher
    85  // implements the hash.Hash interface
    86  // pulls a new tree from a resource pool for hashing each chunk
    87  func New(p *TreePool) *Hasher {
    88  	return &Hasher{
    89  		pool: p,
    90  	}
    91  }
    92  
    93  // TreePool provides a pool of trees used as resources by Hasher
    94  // a tree popped from the pool is guaranteed to have clean state
    95  // for hashing a new chunk
    96  type TreePool struct {
    97  	lock         sync.Mutex
    98  	c            chan *tree     // the channel to obtain a resource from the pool
    99  	hasher       BaseHasherFunc // base hasher to use for the BMT levels
   100  	SegmentSize  int            // size of leaf segments, stipulated to be = hash size
   101  	SegmentCount int            // the number of segments on the base level of the BMT
   102  	Capacity     int            // pool capacity, controls concurrency
   103  	Depth        int            // depth of the bmt trees = int(log2(segmentCount))+1
   104  	Datalength   int            // the total length of the data (count * size)
   105  	count        int            // current count of (ever) allocated resources
   106  	zerohashes   [][]byte       // lookup table for predictable padding subtrees for all levels
   107  }
   108  
   109  // NewTreePool creates a tree pool with hasher, segment size, segment count and capacity
   110  // on Hasher.getTree it reuses free trees or creates a new one if capacity is not reached
   111  func NewTreePool(hasher BaseHasherFunc, segmentCount, capacity int) *TreePool {
   112  	// initialises the zerohashes lookup table
   113  	depth := calculateDepthFor(segmentCount)
   114  	segmentSize := hasher().Size()
   115  	zerohashes := make([][]byte, depth)
   116  	zeros := make([]byte, segmentSize)
   117  	zerohashes[0] = zeros
   118  	h := hasher()
   119  	for i := 1; i < depth; i++ {
   120  		h.Reset()
   121  		h.Write(zeros)
   122  		h.Write(zeros)
   123  		zeros = h.Sum(nil)
   124  		zerohashes[i] = zeros
   125  	}
   126  	return &TreePool{
   127  		c:            make(chan *tree, capacity),
   128  		hasher:       hasher,
   129  		SegmentSize:  segmentSize,
   130  		SegmentCount: segmentCount,
   131  		Capacity:     capacity,
   132  		Datalength:   segmentCount * segmentSize,
   133  		Depth:        depth,
   134  		zerohashes:   zerohashes,
   135  	}
   136  }
   137  
   138  // Drain drains the pool until it has no more than n resources
   139  func (p *TreePool) Drain(n int) {
   140  	p.lock.Lock()
   141  	defer p.lock.Unlock()
   142  	for len(p.c) > n {
   143  		<-p.c
   144  		p.count--
   145  	}
   146  }
   147  
   148  // Reserve is blocking until it returns an available tree
   149  // it reuses free trees or creates a new one if size is not reached
   150  // TODO: should use a context here
   151  func (p *TreePool) reserve() *tree {
   152  	p.lock.Lock()
   153  	defer p.lock.Unlock()
   154  	var t *tree
   155  	if p.count == p.Capacity {
   156  		return <-p.c
   157  	}
   158  	select {
   159  	case t = <-p.c:
   160  	default:
   161  		t = newTree(p.SegmentSize, p.Depth)
   162  		p.count++
   163  	}
   164  	return t
   165  }
   166  
   167  // release gives back a tree to the pool.
   168  // this tree is guaranteed to be in reusable state
   169  func (p *TreePool) release(t *tree) {
   170  	p.c <- t // can never fail ...
   171  }
   172  
   173  // tree is a reusable control structure representing a BMT
   174  // organised in a binary tree
   175  // Hasher uses a TreePool to obtain a tree for each chunk hash
   176  // the tree is 'locked' while not in the pool
   177  type tree struct {
   178  	leaves  []*node     // leaf nodes of the tree, other nodes accessible via parent links
   179  	cur     int         // index of rightmost currently open segment
   180  	offset  int         // offset (cursor position) within currently open segment
   181  	segment []byte      // the rightmost open segment (not complete)
   182  	section []byte      // the rightmost open section (double segment)
   183  	depth   int         // number of levels
   184  	result  chan []byte // result channel
   185  	hash    []byte      // to record the result
   186  	span    []byte      // The span of the data subsumed under the chunk
   187  }
   188  
   189  // node is a reuseable segment hasher representing a node in a BMT
   190  type node struct {
   191  	isLeft      bool   // whether it is left side of the parent double segment
   192  	parent      *node  // pointer to parent node in the BMT
   193  	state       int32  // atomic increment impl concurrent boolean toggle
   194  	left, right []byte // this is where the content segment is set
   195  }
   196  
   197  // newNode constructs a segment hasher node in the BMT (used by newTree)
   198  func newNode(index int, parent *node) *node {
   199  	return &node{
   200  		parent: parent,
   201  		isLeft: index%2 == 0,
   202  	}
   203  }
   204  
   205  // Draw draws the BMT (badly)
   206  func (t *tree) draw(hash []byte) string {
   207  	var left, right []string
   208  	var anc []*node
   209  	for i, n := range t.leaves {
   210  		left = append(left, fmt.Sprintf("%v", hashstr(n.left)))
   211  		if i%2 == 0 {
   212  			anc = append(anc, n.parent)
   213  		}
   214  		right = append(right, fmt.Sprintf("%v", hashstr(n.right)))
   215  	}
   216  	anc = t.leaves
   217  	var hashes [][]string
   218  	for l := 0; len(anc) > 0; l++ {
   219  		var nodes []*node
   220  		hash := []string{""}
   221  		for i, n := range anc {
   222  			hash = append(hash, fmt.Sprintf("%v|%v", hashstr(n.left), hashstr(n.right)))
   223  			if i%2 == 0 && n.parent != nil {
   224  				nodes = append(nodes, n.parent)
   225  			}
   226  		}
   227  		hash = append(hash, "")
   228  		hashes = append(hashes, hash)
   229  		anc = nodes
   230  	}
   231  	hashes = append(hashes, []string{"", fmt.Sprintf("%v", hashstr(hash)), ""})
   232  	total := 60
   233  	del := "                             "
   234  	var rows []string
   235  	for i := len(hashes) - 1; i >= 0; i-- {
   236  		var textlen int
   237  		hash := hashes[i]
   238  		for _, s := range hash {
   239  			textlen += len(s)
   240  		}
   241  		if total < textlen {
   242  			total = textlen + len(hash)
   243  		}
   244  		delsize := (total - textlen) / (len(hash) - 1)
   245  		if delsize > len(del) {
   246  			delsize = len(del)
   247  		}
   248  		row := fmt.Sprintf("%v: %v", len(hashes)-i-1, strings.Join(hash, del[:delsize]))
   249  		rows = append(rows, row)
   250  
   251  	}
   252  	rows = append(rows, strings.Join(left, "  "))
   253  	rows = append(rows, strings.Join(right, "  "))
   254  	return strings.Join(rows, "\n") + "\n"
   255  }
   256  
   257  // newTree initialises a tree by building up the nodes of a BMT
   258  // - segment size is stipulated to be the size of the hash
   259  func newTree(segmentSize, depth int) *tree {
   260  	n := newNode(0, nil)
   261  	prevlevel := []*node{n}
   262  	// iterate over levels and creates 2^(depth-level) nodes
   263  	count := 2
   264  	for level := depth - 2; level >= 0; level-- {
   265  		nodes := make([]*node, count)
   266  		for i := 0; i < count; i++ {
   267  			parent := prevlevel[i/2]
   268  			nodes[i] = newNode(i, parent)
   269  		}
   270  		prevlevel = nodes
   271  		count *= 2
   272  	}
   273  	// the datanode level is the nodes on the last level
   274  	return &tree{
   275  		leaves:  prevlevel,
   276  		result:  make(chan []byte, 1),
   277  		segment: make([]byte, segmentSize),
   278  		section: make([]byte, 2*segmentSize),
   279  	}
   280  }
   281  
   282  // methods needed by hash.Hash
   283  
   284  // Size returns the size
   285  func (h *Hasher) Size() int {
   286  	return h.pool.SegmentSize
   287  }
   288  
   289  // BlockSize returns the block size
   290  func (h *Hasher) BlockSize() int {
   291  	return h.pool.SegmentSize
   292  }
   293  
   294  // Hash hashes the data and the span using the bmt hasher
   295  func Hash(h *Hasher, span, data []byte) []byte {
   296  	h.ResetWithLength(span)
   297  	h.Write(data)
   298  	return h.Sum(nil)
   299  }
   300  
   301  // Datalength returns the maximum data size that is hashed by the hasher =
   302  // segment count times segment size
   303  func (h *Hasher) DataLength() int {
   304  	return h.pool.Datalength
   305  }
   306  
   307  // Sum returns the hash of the buffer
   308  // hash.Hash interface Sum method appends the byte slice to the underlying
   309  // data before it calculates and returns the hash of the chunk
   310  // caller must make sure Sum is not called concurrently with Write, writeSection
   311  // and WriteSegment (TODO:)
   312  func (h *Hasher) Sum(b []byte) (r []byte) {
   313  	return h.sum(b, true, true)
   314  }
   315  
   316  // sum implements Sum taking parameters
   317  // * if the tree is released right away
   318  // * if sequential write is used (can read sections)
   319  func (h *Hasher) sum(b []byte, release, section bool) (r []byte) {
   320  	t := h.bmt
   321  	h.finalise(section)
   322  	if t.offset > 0 { // get the last node (double segment)
   323  
   324  		// padding the segment  with zero
   325  		copy(t.segment[t.offset:], h.pool.zerohashes[0])
   326  	}
   327  	if section {
   328  		if t.cur%2 == 1 {
   329  			// if just finished current segment, copy it to the right half of the chunk
   330  			copy(t.section[h.pool.SegmentSize:], t.segment)
   331  		} else {
   332  			// copy segment to front of section, zero pad the right half
   333  			copy(t.section, t.segment)
   334  			copy(t.section[h.pool.SegmentSize:], h.pool.zerohashes[0])
   335  		}
   336  		h.writeSection(t.cur, t.section)
   337  	} else {
   338  		// TODO: h.writeSegment(t.cur, t.segment)
   339  		panic("SegmentWriter not implemented")
   340  	}
   341  	bmtHash := <-t.result
   342  	span := t.span
   343  
   344  	if release {
   345  		h.releaseTree()
   346  	}
   347  	// sha3(span + BMT(pure_chunk))
   348  	if span == nil {
   349  		return bmtHash
   350  	}
   351  	bh := h.pool.hasher()
   352  	bh.Reset()
   353  	bh.Write(span)
   354  	bh.Write(bmtHash)
   355  	return bh.Sum(b)
   356  }
   357  
   358  // Hasher implements the SwarmHash interface
   359  
   360  // Hasher implements the io.Writer interface
   361  
   362  // Write fills the buffer to hash,
   363  // with every full segment calls writeSection
   364  func (h *Hasher) Write(b []byte) (int, error) {
   365  	l := len(b)
   366  	if l <= 0 {
   367  		return 0, nil
   368  	}
   369  	t := h.bmt
   370  	need := (h.pool.SegmentCount - t.cur) * h.pool.SegmentSize
   371  	if l < need {
   372  		need = l
   373  	}
   374  	// calculate missing bit to complete current open segment
   375  	rest := h.pool.SegmentSize - t.offset
   376  	if need < rest {
   377  		rest = need
   378  	}
   379  	copy(t.segment[t.offset:], b[:rest])
   380  	need -= rest
   381  	size := (t.offset + rest) % h.pool.SegmentSize
   382  	// read full segments and the last possibly partial segment
   383  	for need > 0 {
   384  		// push all finished chunks we read
   385  		if t.cur%2 == 0 {
   386  			copy(t.section, t.segment)
   387  		} else {
   388  			copy(t.section[h.pool.SegmentSize:], t.segment)
   389  			h.writeSection(t.cur, t.section)
   390  		}
   391  		size = h.pool.SegmentSize
   392  		if need < size {
   393  			size = need
   394  		}
   395  		copy(t.segment, b[rest:rest+size])
   396  		need -= size
   397  		rest += size
   398  		t.cur++
   399  	}
   400  	t.offset = size % h.pool.SegmentSize
   401  	return l, nil
   402  }
   403  
   404  // Reset needs to be called before writing to the hasher
   405  func (h *Hasher) Reset() {
   406  	h.getTree()
   407  }
   408  
   409  // Hasher implements the SwarmHash interface
   410  
   411  // ResetWithLength needs to be called before writing to the hasher
   412  // the argument is supposed to be the byte slice binary representation of
   413  // the length of the data subsumed under the hash, i.e., span
   414  func (h *Hasher) ResetWithLength(span []byte) {
   415  	h.Reset()
   416  	h.bmt.span = span
   417  }
   418  
   419  // releaseTree gives back the Tree to the pool whereby it unlocks
   420  // it resets tree, segment and index
   421  func (h *Hasher) releaseTree() {
   422  	t := h.bmt
   423  	if t != nil {
   424  		t.cur = 0
   425  		t.offset = 0
   426  		t.span = nil
   427  		t.hash = nil
   428  		h.bmt = nil
   429  		h.pool.release(t)
   430  	}
   431  }
   432  
   433  // TODO: writeSegment writes the ith segment into the BMT tree
   434  // func (h *Hasher) writeSegment(i int, s []byte) {
   435  // 	go h.run(h.bmt.leaves[i/2], h.pool.hasher(), i%2 == 0, s)
   436  // }
   437  
   438  // writeSection writes the hash of i/2-th segction into right level 1 node of the BMT tree
   439  func (h *Hasher) writeSection(i int, section []byte) {
   440  	n := h.bmt.leaves[i/2]
   441  	isLeft := n.isLeft
   442  	n = n.parent
   443  	bh := h.pool.hasher()
   444  	bh.Write(section)
   445  	go func() {
   446  		sum := bh.Sum(nil)
   447  		if n == nil {
   448  			h.bmt.result <- sum
   449  			return
   450  		}
   451  		h.run(n, bh, isLeft, sum)
   452  	}()
   453  }
   454  
   455  // run pushes the data to the node
   456  // if it is the first of 2 sisters written the routine returns
   457  // if it is the second, it calculates the hash and writes it
   458  // to the parent node recursively
   459  func (h *Hasher) run(n *node, bh hash.Hash, isLeft bool, s []byte) {
   460  	for {
   461  		if isLeft {
   462  			n.left = s
   463  		} else {
   464  			n.right = s
   465  		}
   466  		// the child-thread first arriving will quit
   467  		if n.toggle() {
   468  			return
   469  		}
   470  		// the second thread now can be sure both left and right children are written
   471  		// it calculates the hash of left|right and take it to the next level
   472  		bh.Reset()
   473  		bh.Write(n.left)
   474  		bh.Write(n.right)
   475  		s = bh.Sum(nil)
   476  
   477  		// at the root of the bmt just write the result to the result channel
   478  		if n.parent == nil {
   479  			h.bmt.result <- s
   480  			return
   481  		}
   482  
   483  		// otherwise iterate on parent
   484  		isLeft = n.isLeft
   485  		n = n.parent
   486  	}
   487  }
   488  
   489  // finalise is following the path starting from the final datasegment to the
   490  // BMT root via parents
   491  // for unbalanced trees it fills in the missing right sister nodes using
   492  // the pool's lookup table for BMT subtree root hashes for all-zero sections
   493  func (h *Hasher) finalise(skip bool) {
   494  	t := h.bmt
   495  	isLeft := t.cur%2 == 0
   496  	n := t.leaves[t.cur/2]
   497  	for level := 0; n != nil; level++ {
   498  		// when the final segment's path is going via left child node
   499  		// we include an all-zero subtree hash for the right level and toggle the node.
   500  		// when the path is going through right child node, nothing to do
   501  		if isLeft && !skip {
   502  			n.right = h.pool.zerohashes[level]
   503  			n.toggle()
   504  		}
   505  		skip = false
   506  		isLeft = n.isLeft
   507  		n = n.parent
   508  	}
   509  }
   510  
   511  // getTree obtains a BMT resource by reserving one from the pool
   512  func (h *Hasher) getTree() *tree {
   513  	if h.bmt != nil {
   514  		return h.bmt
   515  	}
   516  	t := h.pool.reserve()
   517  	h.bmt = t
   518  	return t
   519  }
   520  
   521  // atomic bool toggle implementing a concurrent reusable 2-state object
   522  // atomic addint with %2 implements atomic bool toggle
   523  // it returns true if the toggler just put it in the active/waiting state
   524  func (n *node) toggle() bool {
   525  	return atomic.AddInt32(&n.state, 1)%2 == 1
   526  }
   527  
   528  func hashstr(b []byte) string {
   529  	end := len(b)
   530  	if end > 4 {
   531  		end = 4
   532  	}
   533  	return fmt.Sprintf("%x", b[:end])
   534  }
   535  
   536  // calculateDepthFor calculates the depth (number of levels) in the BMT tree
   537  func calculateDepthFor(n int) (d int) {
   538  	c := 2
   539  	for ; c < n; c *= 2 {
   540  		d++
   541  	}
   542  	return d + 1
   543  }