github.com/letsencrypt/trillian@v1.1.2-0.20180615153820-ae375a99d36a/merkle/sparse_merkle_tree.go (about)

     1  // Copyright 2016 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package merkle
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"errors"
    21  	"fmt"
    22  	"math/big"
    23  	"sync"
    24  
    25  	"github.com/golang/glog"
    26  	"github.com/google/trillian/merkle/hashers"
    27  	"github.com/google/trillian/storage"
    28  )
    29  
    30  // For more information about how Sparse Merkle Trees work see the Revocation Transparency
    31  // paper in the docs directory. Note that applications are not limited to X.509 certificates
    32  // and this implementation handles arbitrary data.
    33  
    34  // SparseMerkleTreeReader knows how to read data from a TreeStorage transaction
    35  // to provide proofs etc.
    36  type SparseMerkleTreeReader struct {
    37  	tx           storage.ReadOnlyTreeTX
    38  	hasher       hashers.MapHasher
    39  	treeRevision int64
    40  }
    41  
    42  // runTXFunc is the interface for a function which produces something which can
    43  // be passed as the last argument to MapStorage.ReadWriteTransaction.
    44  type runTXFunc func(context.Context, func(context.Context, storage.MapTreeTX) error) error
    45  
    46  // SparseMerkleTreeWriter knows how to store/update a stored sparse Merkle tree
    47  // via a TreeStorage transaction.
    48  type SparseMerkleTreeWriter struct {
    49  	hasher       hashers.MapHasher
    50  	treeRevision int64
    51  	tree         Subtree
    52  }
    53  
    54  type indexAndHash struct {
    55  	index []byte
    56  	hash  []byte
    57  }
    58  
    59  // rootHashOrError represents a (sub-)tree root hash, or an error which
    60  // prevented the calculation from completing.
    61  // TODO(gdbelvin): represent an empty subtree with a nil hash?
    62  type rootHashOrError struct {
    63  	hash []byte
    64  	err  error
    65  }
    66  
    67  // Subtree is an interface which must be implemented by subtree workers.
    68  // Currently there's only a locally sharded go-routine based implementation,
    69  // the the idea is that an RPC based sharding implementation could be created
    70  // and dropped in.
    71  type Subtree interface {
    72  	// SetLeaf sets a single leaf hash for integration into a sparse Merkle tree.
    73  	SetLeaf(ctx context.Context, index []byte, hash []byte) error
    74  
    75  	// CalculateRoot instructs the subtree worker to start calculating the root
    76  	// hash of its tree.  It is an error to call SetLeaf() after calling this
    77  	// method.
    78  	CalculateRoot()
    79  
    80  	// RootHash returns the calculated root hash for this subtree, if the root
    81  	// hash has not yet been calculated, this method will block until it is.
    82  	RootHash() ([]byte, error)
    83  }
    84  
    85  // getSubtreeFunc is essentially a factory method for getting child subtrees.
    86  type getSubtreeFunc func(ctx context.Context, prefix []byte) (Subtree, error)
    87  
    88  // subtreeWriter knows how to calculate and store nodes for a subtree.
    89  type subtreeWriter struct {
    90  	treeID int64
    91  	// prefix is the path to the root of this subtree in the full tree.
    92  	// i.e. all paths/indices under this tree share the same prefix.
    93  	prefix []byte
    94  
    95  	// subtreeDepth is the number of levels this subtree contains.
    96  	subtreeDepth int
    97  
    98  	// leafQueue is the work-queue containing leaves to be integrated into the
    99  	// subtree.
   100  	leafQueue chan func() (*indexAndHash, error)
   101  
   102  	// root is channel of size 1 from which the subtree root can be read once it
   103  	// has been calculated.
   104  	root chan rootHashOrError
   105  
   106  	// childMutex protects access to children.
   107  	childMutex sync.RWMutex
   108  
   109  	// children is a map of child-subtrees by stringified prefix.
   110  	children map[string]Subtree
   111  
   112  	runTX        runTXFunc
   113  	treeRevision int64
   114  
   115  	hasher hashers.MapHasher
   116  
   117  	getSubtree getSubtreeFunc
   118  }
   119  
   120  // getOrCreateChildSubtree returns, or creates and returns, a subtree for the
   121  // specified childPrefix.
   122  func (s *subtreeWriter) getOrCreateChildSubtree(ctx context.Context, childPrefix []byte) (Subtree, error) {
   123  	// TODO(al): figure out we actually need these copies and remove them if not.
   124  	//           If we do then tidy up with a copyBytes helper.
   125  	cp := append(make([]byte, 0, len(childPrefix)), childPrefix...)
   126  	childPrefixStr := string(cp)
   127  	s.childMutex.Lock()
   128  	defer s.childMutex.Unlock()
   129  
   130  	subtree := s.children[childPrefixStr]
   131  	var err error
   132  	if subtree == nil {
   133  		subtree, err = s.getSubtree(ctx, cp)
   134  		if err != nil {
   135  			return nil, err
   136  		}
   137  		s.children[childPrefixStr] = subtree
   138  
   139  		// Since a new subtree worker is being created we'll add a future to
   140  		// to the leafQueue such that calculation of *this* subtree's root will
   141  		// incorporate the newly calculated child subtree root.
   142  		s.leafQueue <- func() (*indexAndHash, error) {
   143  			// RootHash blocks until the root is available (or it's errored out)
   144  			h, err := subtree.RootHash()
   145  			if err != nil {
   146  				return nil, err
   147  			}
   148  			return &indexAndHash{
   149  				index: cp,
   150  				hash:  h,
   151  			}, nil
   152  		}
   153  	}
   154  	return subtree, nil
   155  }
   156  
   157  // SetLeaf sets a single leaf hash for incorporation into the sparse Merkle tree.
   158  // index is the full path of the leaf, starting from the root (not the subtree's root).
   159  func (s *subtreeWriter) SetLeaf(ctx context.Context, index []byte, hash []byte) error {
   160  	depth := len(index) * 8
   161  	absSubtreeDepth := len(s.prefix)*8 + s.subtreeDepth
   162  
   163  	switch {
   164  	case depth < absSubtreeDepth:
   165  		return fmt.Errorf("depth: %d, want >= %d", depth, absSubtreeDepth)
   166  
   167  	case depth > absSubtreeDepth:
   168  		childPrefix := index[:absSubtreeDepth/8]
   169  		subtree, err := s.getOrCreateChildSubtree(ctx, childPrefix)
   170  		if err != nil {
   171  			return err
   172  		}
   173  
   174  		return subtree.SetLeaf(ctx, index, hash)
   175  
   176  	default: // depth == absSubtreeDepth:
   177  		s.leafQueue <- func() (*indexAndHash, error) {
   178  			return &indexAndHash{index: index, hash: hash}, nil
   179  		}
   180  		return nil
   181  	}
   182  }
   183  
   184  // CalculateRoot initiates the process of calculating the subtree root.
   185  // The leafQueue is closed.
   186  func (s *subtreeWriter) CalculateRoot() {
   187  	close(s.leafQueue)
   188  
   189  	for _, v := range s.children {
   190  		v.CalculateRoot()
   191  	}
   192  }
   193  
   194  // RootHash returns the calculated subtree root hash, blocking if necessary.
   195  func (s *subtreeWriter) RootHash() ([]byte, error) {
   196  	r := <-s.root
   197  	return r.hash, r.err
   198  }
   199  
   200  // buildSubtree is the worker function which calculates the root hash.
   201  // The root chan will have had exactly one entry placed in it, and have been
   202  // subsequently closed when this method exits.
   203  func (s *subtreeWriter) buildSubtree(ctx context.Context) {
   204  	defer close(s.root)
   205  	var root []byte
   206  	err := s.runTX(ctx, func(ctx context.Context, tx storage.MapTreeTX) error {
   207  		root = []byte{}
   208  		leaves := make([]HStar2LeafHash, 0, len(s.leafQueue))
   209  		nodesToStore := make([]storage.Node, 0, len(s.leafQueue)*2)
   210  
   211  		for leaf := range s.leafQueue {
   212  			ih, err := leaf()
   213  			if err != nil {
   214  				return err
   215  			}
   216  			nodeID := storage.NewNodeIDFromPrefixSuffix(ih.index, storage.Suffix{}, s.hasher.BitLen())
   217  
   218  			leaves = append(leaves, HStar2LeafHash{
   219  				Index:    nodeID.BigInt(),
   220  				LeafHash: ih.hash,
   221  			})
   222  			nodesToStore = append(nodesToStore,
   223  				storage.Node{
   224  					NodeID:       nodeID,
   225  					Hash:         ih.hash,
   226  					NodeRevision: s.treeRevision,
   227  				})
   228  		}
   229  
   230  		// calculate new root, and intermediate nodes:
   231  		hs2 := NewHStar2(s.treeID, s.hasher)
   232  		var err error
   233  		root, err = hs2.HStar2Nodes(s.prefix, s.subtreeDepth, leaves,
   234  			func(depth int, index *big.Int) ([]byte, error) {
   235  				nodeID := storage.NewNodeIDFromBigInt(depth, index, s.hasher.BitLen())
   236  				glog.V(4).Infof("buildSubtree.get(%x, %d) nid: %x, %v",
   237  					index.Bytes(), depth, nodeID.Path, nodeID.PrefixLenBits)
   238  				nodes, err := tx.GetMerkleNodes(ctx, s.treeRevision, []storage.NodeID{nodeID})
   239  				if err != nil {
   240  					return nil, err
   241  				}
   242  				if len(nodes) == 0 {
   243  					return nil, nil
   244  				}
   245  				if got, want := nodes[0].NodeID, nodeID; !got.Equivalent(want) {
   246  					return nil, fmt.Errorf("got node %v from storage, want %v", got, want)
   247  				}
   248  				if got, want := nodes[0].NodeRevision, s.treeRevision; got > want {
   249  					return nil, fmt.Errorf("got node revision %d, want <= %d", got, want)
   250  				}
   251  				return nodes[0].Hash, nil
   252  			},
   253  			func(depth int, index *big.Int, h []byte) error {
   254  				// Don't store the root node of the subtree - that's part of the parent
   255  				// tree.
   256  				if depth == len(s.prefix)*8 && len(s.prefix) > 0 {
   257  					return nil
   258  				}
   259  				nodeID := storage.NewNodeIDFromBigInt(depth, index, s.hasher.BitLen())
   260  				glog.V(4).Infof("buildSubtree.set(%x, %v) nid: %x, %v : %x",
   261  					index.Bytes(), depth, nodeID.Path, nodeID.PrefixLenBits, h)
   262  				nodesToStore = append(nodesToStore,
   263  					storage.Node{
   264  						NodeID:       nodeID,
   265  						Hash:         h,
   266  						NodeRevision: s.treeRevision,
   267  					})
   268  				return nil
   269  			})
   270  		if err != nil {
   271  			return err
   272  		}
   273  
   274  		// write nodes back to storage
   275  		return tx.SetMerkleNodes(ctx, nodesToStore)
   276  	})
   277  	if err != nil {
   278  		s.root <- rootHashOrError{nil, err}
   279  		return
   280  	}
   281  
   282  	// send calculated root hash
   283  	s.root <- rootHashOrError{root, nil}
   284  }
   285  
   286  var (
   287  	// ErrNoSuchRevision is returned when a request is made for information about
   288  	// a tree revision which does not exist.
   289  	ErrNoSuchRevision = errors.New("no such revision")
   290  )
   291  
   292  // NewSparseMerkleTreeReader returns a new SparseMerkleTreeReader, reading at
   293  // the specified tree revision, using the passed in MapHasher for calculating
   294  // and verifying tree hashes read via tx.
   295  func NewSparseMerkleTreeReader(rev int64, h hashers.MapHasher, tx storage.ReadOnlyTreeTX) *SparseMerkleTreeReader {
   296  	return &SparseMerkleTreeReader{
   297  		tx:           tx,
   298  		hasher:       h,
   299  		treeRevision: rev,
   300  	}
   301  }
   302  
   303  func leafQueueSize(depths []int) int {
   304  	if len(depths) == 1 {
   305  		return 1024
   306  	}
   307  	// for higher levels make sure we've got enough space if all leaves turn out
   308  	// to be sub-tree futures...
   309  	return 1 << uint(depths[0])
   310  }
   311  
   312  // newLocalSubtreeWriter creates a new local go-routine based subtree worker.
   313  func newLocalSubtreeWriter(ctx context.Context, treeID, rev int64, prefix []byte, depths []int, runTX runTXFunc, h hashers.MapHasher) (Subtree, error) {
   314  	tree := subtreeWriter{
   315  		treeID:       treeID,
   316  		treeRevision: rev,
   317  		// TODO(al): figure out if we actually need these copies and remove it not.
   318  		prefix:       append(make([]byte, 0, len(prefix)), prefix...),
   319  		subtreeDepth: depths[0],
   320  		leafQueue:    make(chan func() (*indexAndHash, error), leafQueueSize(depths)),
   321  		root:         make(chan rootHashOrError, 1),
   322  		children:     make(map[string]Subtree),
   323  		runTX:        runTX,
   324  		hasher:       h,
   325  		getSubtree: func(ctx context.Context, p []byte) (Subtree, error) {
   326  			myPrefix := bytes.Join([][]byte{prefix, p}, []byte{})
   327  			return newLocalSubtreeWriter(ctx, treeID, rev, myPrefix, depths[1:], runTX, h)
   328  		},
   329  	}
   330  
   331  	// TODO(al): probably shouldn't be spawning go routines willy-nilly like
   332  	// this, but it'll do for now.
   333  	go tree.buildSubtree(ctx)
   334  	return &tree, nil
   335  }
   336  
   337  // NewSparseMerkleTreeWriter returns a new SparseMerkleTreeWriter, which will
   338  // write data back into the tree at the specified revision, using the passed
   339  // in MapHasher to calculate/verify tree hashes, storing via tx.
   340  func NewSparseMerkleTreeWriter(ctx context.Context, treeID, rev int64, h hashers.MapHasher, runTX runTXFunc) (*SparseMerkleTreeWriter, error) {
   341  	// TODO(al): allow the tree layering sizes to be customisable somehow.
   342  	const topSubtreeSize = 8 // must be a multiple of 8 for now.
   343  	tree, err := newLocalSubtreeWriter(ctx, treeID, rev, []byte{}, []int{topSubtreeSize, h.Size()*8 - topSubtreeSize}, runTX, h)
   344  	if err != nil {
   345  		return nil, err
   346  	}
   347  	return &SparseMerkleTreeWriter{
   348  		hasher:       h,
   349  		tree:         tree,
   350  		treeRevision: rev,
   351  	}, nil
   352  }
   353  
   354  // RootAtRevision returns the sparse Merkle tree root hash at the specified
   355  // revision, or ErrNoSuchRevision if the requested revision doesn't exist.
   356  func (s SparseMerkleTreeReader) RootAtRevision(ctx context.Context, rev int64) ([]byte, error) {
   357  	rootNodeID := storage.NewEmptyNodeID(256)
   358  	nodes, err := s.tx.GetMerkleNodes(ctx, rev, []storage.NodeID{rootNodeID})
   359  	if err != nil {
   360  		return nil, err
   361  	}
   362  	switch {
   363  	case len(nodes) == 0:
   364  		return nil, ErrNoSuchRevision
   365  	case len(nodes) > 1:
   366  		return nil, fmt.Errorf("expected 1 node, but got %d", len(nodes))
   367  	}
   368  	// Sanity check the nodeID
   369  	if !nodes[0].NodeID.Equivalent(rootNodeID) {
   370  		return nil, fmt.Errorf("unexpected node returned with ID: %v", nodes[0].NodeID)
   371  	}
   372  	// Sanity check the revision
   373  	if nodes[0].NodeRevision > rev {
   374  		return nil, fmt.Errorf("unexpected node revision returned: %d > %d", nodes[0].NodeRevision, rev)
   375  	}
   376  	return nodes[0].Hash, nil
   377  }
   378  
   379  // InclusionProof returns an inclusion (or non-inclusion) proof for the
   380  // specified key at the specified revision.
   381  // If the revision does not exist it will return ErrNoSuchRevision error.
   382  func (s SparseMerkleTreeReader) InclusionProof(ctx context.Context, rev int64, index []byte) ([][]byte, error) {
   383  	nid := storage.NewNodeIDFromHash(index)
   384  	sibs := nid.Siblings()
   385  	nodes, err := s.tx.GetMerkleNodes(ctx, rev, sibs)
   386  	if err != nil {
   387  		return nil, err
   388  	}
   389  
   390  	nodeMap := make(map[string]*storage.Node)
   391  	glog.V(2).Infof("Got Nodes: ")
   392  	for _, n := range nodes {
   393  		n := n // need this or we'll end up with the same node hash repeated in the map
   394  		glog.V(2).Infof("   %x, %d: %x", n.NodeID.Path, len(n.NodeID.String()), n.Hash)
   395  		nodeMap[n.NodeID.String()] = &n
   396  	}
   397  
   398  	// We're building a full proof from a combination of whichever nodes we got
   399  	// back from the storage layer, and the set of "null" hashes.
   400  	r := make([][]byte, len(sibs))
   401  	// For each proof element:
   402  	for i := 0; i < len(r); i++ {
   403  		proofID := sibs[i]
   404  		pNode := nodeMap[proofID.String()]
   405  		if pNode == nil {
   406  			// we have no node for this level from storage, so the client will use
   407  			// the null hash.
   408  			continue
   409  		}
   410  		r[i] = pNode.Hash
   411  		delete(nodeMap, proofID.String())
   412  	}
   413  
   414  	// Make sure we used up all the returned nodes, otherwise something's gone wrong.
   415  	if remaining := len(nodeMap); remaining != 0 {
   416  		return nil, fmt.Errorf("failed to consume all returned nodes; got %d nodes, but %d remain(s) unused", len(nodes), remaining)
   417  	}
   418  	return r, nil
   419  }
   420  
   421  // SetLeaves adds a batch of leaves to the in-flight tree update.
   422  func (s *SparseMerkleTreeWriter) SetLeaves(ctx context.Context, leaves []HashKeyValue) error {
   423  	for _, l := range leaves {
   424  		if err := s.tree.SetLeaf(ctx, l.HashedKey, l.HashedValue); err != nil {
   425  			return err
   426  		}
   427  	}
   428  	return nil
   429  }
   430  
   431  // CalculateRoot calculates the new root hash including the newly added leaves.
   432  func (s *SparseMerkleTreeWriter) CalculateRoot() ([]byte, error) {
   433  	s.tree.CalculateRoot()
   434  	return s.tree.RootHash()
   435  }
   436  
   437  // HashKeyValue represents a Hash(key)-Hash(value) pair.
   438  type HashKeyValue struct {
   439  	// HashedKey is the hash of the key data
   440  	HashedKey []byte
   441  
   442  	// HashedValue is the hash of the value data.
   443  	HashedValue []byte
   444  }