github.com/bartle-stripe/trillian@v1.2.1/storage/tools/dump_tree/dumplib/dumplib.go (about)

     1  // Copyright 2017 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package dumplib
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"crypto"
    21  	"crypto/sha256"
    22  	"crypto/x509"
    23  	"encoding/base64"
    24  	"encoding/binary"
    25  	"encoding/hex"
    26  	"fmt"
    27  	"sort"
    28  	"strconv"
    29  	"strings"
    30  	"time"
    31  
    32  	"github.com/golang/glog"
    33  	"github.com/golang/protobuf/proto"
    34  	"github.com/golang/protobuf/ptypes"
    35  	"github.com/golang/protobuf/ptypes/any"
    36  	"github.com/google/trillian"
    37  	"github.com/google/trillian/crypto/keys/der"
    38  	"github.com/google/trillian/crypto/keys/pem"
    39  	"github.com/google/trillian/crypto/keyspb"
    40  	"github.com/google/trillian/crypto/sigpb"
    41  	"github.com/google/trillian/log"
    42  	"github.com/google/trillian/merkle/hashers"
    43  	"github.com/google/trillian/merkle/rfc6962"
    44  	"github.com/google/trillian/monitoring"
    45  	"github.com/google/trillian/quota"
    46  	"github.com/google/trillian/storage"
    47  	"github.com/google/trillian/storage/cache"
    48  	"github.com/google/trillian/storage/memory"
    49  	"github.com/google/trillian/storage/storagepb"
    50  	"github.com/google/trillian/trees"
    51  	"github.com/google/trillian/types"
    52  	"github.com/google/trillian/util"
    53  
    54  	tcrypto "github.com/google/trillian/crypto"
    55  )
    56  
    57  var (
    58  	leafHashesFlag bool
    59  )
    60  
    61  // A 32 bit magic number that is written at the start of record io files to identify the format.
    62  const recordIOMagic int32 = 0x3ed7230a
    63  
    64  type treeAndRev struct {
    65  	fullKey  string
    66  	subtree  *storagepb.SubtreeProto
    67  	revision int
    68  }
    69  
    70  // summarizeProto is an output formatter function that produces a single line summary.
    71  func summarizeProto(s *storagepb.SubtreeProto) string {
    72  	summary := fmt.Sprintf("p: %-20s d: %d lc: %3d ic: %3d rh:%s\n",
    73  		hex.EncodeToString(s.Prefix),
    74  		s.Depth,
    75  		len(s.Leaves),
    76  		s.InternalNodeCount,
    77  		hex.EncodeToString(s.RootHash))
    78  
    79  	if leafHashesFlag {
    80  		for prefix, hash := range s.Leaves {
    81  			dp, err := base64.StdEncoding.DecodeString(prefix)
    82  			if err != nil {
    83  				glog.Fatalf("Failed to decode leaf prefix: %v", err)
    84  			}
    85  			summary += fmt.Sprintf("%s -> %s\n", hex.EncodeToString(dp), hex.EncodeToString(hash))
    86  		}
    87  	}
    88  
    89  	return summary
    90  }
    91  
    92  // fullProto is an output formatter function that produces a single line in proto text format.
    93  func fullProto(s *storagepb.SubtreeProto) string {
    94  	return fmt.Sprintf("%s\n", proto.MarshalTextString(s))
    95  }
    96  
    97  // recordIOProto is an output formatter that produces binary recordio format
    98  func recordIOProto(s *storagepb.SubtreeProto) string {
    99  	buf := new(bytes.Buffer)
   100  	data, err := proto.Marshal(s)
   101  	if err != nil {
   102  		glog.Fatalf("Failed to marshal subtree proto: %v", err)
   103  	}
   104  	dataLen := int64(len(data))
   105  	if err = binary.Write(buf, binary.BigEndian, dataLen); err != nil {
   106  		glog.Fatalf("binary.Write failed: %v", err)
   107  	}
   108  	var compLen int64
   109  	if err = binary.Write(buf, binary.BigEndian, compLen); err != nil {
   110  		glog.Fatalf("binary.Write failed: %v", err)
   111  	}
   112  	// buffer.Write() always returns a nil error
   113  	buf.Write(data)
   114  
   115  	return buf.String()
   116  }
   117  
   118  // This is a copy of the logserver private key from the testdata directory
   119  var logPrivKeyPEM = `
   120  -----BEGIN EC PRIVATE KEY-----
   121  Proc-Type: 4,ENCRYPTED
   122  DEK-Info: DES-CBC,D95ECC664FF4BDEC
   123  
   124  Xy3zzHFwlFwjE8L1NCngJAFbu3zFf4IbBOCsz6Fa790utVNdulZncNCl2FMK3U2T
   125  sdoiTW8ymO+qgwcNrqvPVmjFRBtkN0Pn5lgbWhN/aK3TlS9IYJ/EShbMUzjgVzie
   126  S9+/31whWcH/FLeLJx4cBzvhgCtfquwA+s5ojeLYYsk=
   127  -----END EC PRIVATE KEY-----`
   128  
   129  // And the corresponding public key
   130  var logPubKeyPEM = `
   131  -----BEGIN PUBLIC KEY-----
   132  MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEywnWicNEQ8bn3GXcGpA+tiU4VL70
   133  Ws9xezgQPrg96YGsFrF6KYG68iqyHDlQ+4FWuKfGKXHn3ooVtB/pfawb5Q==
   134  -----END PUBLIC KEY-----`
   135  
   136  func sequence(tree *trillian.Tree, seq *log.Sequencer, count, batchSize int) {
   137  	glog.Infof("Sequencing batch of size %d", count)
   138  	sequenced, err := seq.IntegrateBatch(context.TODO(), tree, batchSize, 0, 24*time.Hour)
   139  
   140  	if err != nil {
   141  		glog.Fatalf("IntegrateBatch got: %v, want: no err", err)
   142  	}
   143  
   144  	if got, want := sequenced, count; got != want {
   145  		glog.Fatalf("IntegrateBatch got: %d sequenced, want: %d", got, want)
   146  	}
   147  }
   148  
   149  func getPrivateKey(pemPath, pemPassword string) (*any.Any, crypto.Signer) {
   150  	pemSigner, err := pem.UnmarshalPrivateKey(pemPath, pemPassword)
   151  	if err != nil {
   152  		glog.Fatalf("UnmarshalPrivateKey(): %v", err)
   153  	}
   154  	pemDer, err := der.MarshalPrivateKey(pemSigner)
   155  	if err != nil {
   156  		glog.Fatalf("MarshalPrivateKey(): %v", err)
   157  	}
   158  	anyPrivKey, err := ptypes.MarshalAny(&keyspb.PrivateKey{Der: pemDer})
   159  	if err != nil {
   160  		glog.Fatalf("MarshalAny(%v): %v", pemDer, err)
   161  	}
   162  
   163  	return anyPrivKey, pemSigner
   164  }
   165  
   166  func getPublicKey(keyPEM string) []byte {
   167  	key, err := pem.UnmarshalPublicKey(keyPEM)
   168  	if err != nil {
   169  		panic(err)
   170  	}
   171  
   172  	keyDER, err := x509.MarshalPKIXPublicKey(key)
   173  	if err != nil {
   174  		panic(err)
   175  	}
   176  	return keyDER
   177  }
   178  
   179  func createTree(as storage.AdminStorage, ls storage.LogStorage) (*trillian.Tree, *tcrypto.Signer) {
   180  	ctx := context.TODO()
   181  	privKey, _ := getPrivateKey(logPrivKeyPEM, "towel")
   182  	pubKey := getPublicKey(logPubKeyPEM)
   183  	tree := &trillian.Tree{
   184  		TreeType:           trillian.TreeType_LOG,
   185  		TreeState:          trillian.TreeState_ACTIVE,
   186  		HashAlgorithm:      sigpb.DigitallySigned_SHA256,
   187  		HashStrategy:       trillian.HashStrategy_RFC6962_SHA256,
   188  		SignatureAlgorithm: sigpb.DigitallySigned_ECDSA,
   189  		PrivateKey:         privKey,
   190  		PublicKey:          &keyspb.PublicKey{Der: pubKey},
   191  		MaxRootDuration:    ptypes.DurationProto(0 * time.Millisecond),
   192  	}
   193  	createdTree, err := storage.CreateTree(ctx, as, tree)
   194  	if err != nil {
   195  		glog.Fatalf("Create tree: %v", err)
   196  	}
   197  
   198  	hasher, err := hashers.NewLogHasher(tree.HashStrategy)
   199  	if err != nil {
   200  		glog.Fatalf("NewLogHasher: %v", err)
   201  	}
   202  	tSigner, err := trees.Signer(ctx, createdTree)
   203  	if err != nil {
   204  		glog.Fatalf("Creating signer: %v", err)
   205  	}
   206  
   207  	sthZero, err := tSigner.SignLogRoot(&types.LogRootV1{
   208  		RootHash: hasher.EmptyRoot(),
   209  	})
   210  	if err != nil {
   211  		glog.Fatalf("SignLogRoot: %v", err)
   212  	}
   213  
   214  	err = ls.ReadWriteTransaction(ctx, createdTree, func(ctx context.Context, tx storage.LogTreeTX) error {
   215  		if err := tx.StoreSignedLogRoot(ctx, *sthZero); err != nil {
   216  			glog.Fatalf("StoreSignedLogRoot: %v", err)
   217  		}
   218  		return nil
   219  	})
   220  	if err != nil {
   221  		glog.Fatalf("ReadWriteTransaction: %v", err)
   222  	}
   223  
   224  	return createdTree, tSigner
   225  }
   226  
   227  // Options are the commandline arguments one can pass to Main
   228  type Options struct {
   229  	TreeSize, BatchSize                          int
   230  	LeafFormat                                   string
   231  	LatestRevision, Summary, HexKeys, LeafHashes bool
   232  	RecordIO, Rebuild, Traverse, DumpLeaves      bool
   233  }
   234  
   235  // Main runs the dump_tree tool
   236  func Main(args Options) string {
   237  	validateFlagsOrDie(args.Summary, args.RecordIO)
   238  
   239  	leafHashesFlag = args.LeafHashes
   240  
   241  	glog.Info("Initializing memory log storage")
   242  	ls := memory.NewLogStorage(monitoring.InertMetricFactory{})
   243  	as := memory.NewAdminStorage(ls)
   244  	tree, tSigner := createTree(as, ls)
   245  
   246  	seq := log.NewSequencer(rfc6962.DefaultHasher,
   247  		util.SystemTimeSource{},
   248  		ls,
   249  		tSigner,
   250  		nil,
   251  		quota.Noop())
   252  
   253  	// Create the initial tree head at size 0, which is required. And then sequence the leaves.
   254  	sequence(tree, seq, 0, args.BatchSize)
   255  	sequenceLeaves(ls, seq, tree, args.TreeSize, args.BatchSize, args.LeafFormat)
   256  
   257  	// Read the latest STH back
   258  	var root types.LogRootV1
   259  	err := ls.ReadWriteTransaction(context.TODO(), tree, func(ctx context.Context, tx storage.LogTreeTX) error {
   260  		var err error
   261  		sth, err := tx.LatestSignedLogRoot(context.TODO())
   262  		if err != nil {
   263  			glog.Fatalf("LatestSignedLogRoot: %v", err)
   264  		}
   265  		if err := root.UnmarshalBinary(sth.LogRoot); err != nil {
   266  			return fmt.Errorf("could not parse current log root: %v", err)
   267  		}
   268  
   269  		glog.Infof("STH at size %d has hash %s@%d",
   270  			root.TreeSize,
   271  			hex.EncodeToString(root.RootHash),
   272  			root.Revision)
   273  		return nil
   274  	})
   275  	if err != nil {
   276  		glog.Fatalf("ReadWriteTransaction: %v", err)
   277  	}
   278  
   279  	// All leaves are now sequenced into the tree. The current state is what we need.
   280  	glog.Info("Producing output")
   281  
   282  	if args.Traverse {
   283  		return traverseTreeStorage(ls, tree, args.TreeSize, int64(root.Revision))
   284  	}
   285  
   286  	if args.DumpLeaves {
   287  		return dumpLeaves(ls, tree, args.TreeSize)
   288  	}
   289  
   290  	var formatter func(*storagepb.SubtreeProto) string
   291  	switch {
   292  	case args.Summary:
   293  		formatter = summarizeProto
   294  	case args.RecordIO:
   295  		formatter = recordIOProto
   296  		recordIOHdr()
   297  	default:
   298  		formatter = fullProto
   299  	}
   300  
   301  	hasher, err := hashers.NewLogHasher(trillian.HashStrategy_RFC6962_SHA256)
   302  	if err != nil {
   303  		glog.Fatalf("Failed to create a log hasher: %v", err)
   304  	}
   305  	repopFunc := cache.LogPopulateFunc(hasher)
   306  
   307  	if args.LatestRevision {
   308  		return latestRevisions(ls, tree.TreeId, repopFunc, formatter, args.Rebuild, args.HexKeys)
   309  	}
   310  	return allRevisions(ls, tree.TreeId, repopFunc, formatter, args.Rebuild, args.HexKeys)
   311  }
   312  
   313  func allRevisions(ls storage.LogStorage, treeID int64, repopFunc storage.PopulateSubtreeFunc, of func(*storagepb.SubtreeProto) string, rebuildInternal, hexKeysFlag bool) string {
   314  	out := new(bytes.Buffer)
   315  	memory.DumpSubtrees(ls, treeID, func(k string, v *storagepb.SubtreeProto) {
   316  		if rebuildInternal {
   317  			repopFunc(v)
   318  		}
   319  		if hexKeysFlag {
   320  			hexKeys(v)
   321  		}
   322  		fmt.Fprint(out, of(v))
   323  	})
   324  	return out.String()
   325  }
   326  
   327  func latestRevisions(ls storage.LogStorage, treeID int64, repopFunc storage.PopulateSubtreeFunc, of func(*storagepb.SubtreeProto) string, rebuildInternal, hexKeysFlag bool) string {
   328  	out := new(bytes.Buffer)
   329  	// vMap maps subtree prefixes (as strings) to the corresponding subtree proto and its revision
   330  	vMap := make(map[string]treeAndRev)
   331  	memory.DumpSubtrees(ls, treeID, func(k string, v *storagepb.SubtreeProto) {
   332  		// Relies on the btree key space for subtrees being /tree_id/subtree/<id>/<revision>
   333  		pieces := strings.Split(k, "/")
   334  		if got, want := len(pieces), 5; got != want {
   335  			glog.Fatalf("Wrong no of Btree subtree key segments. Got: %d, want: %d", got, want)
   336  		}
   337  
   338  		subID := pieces[3]
   339  		subtree := vMap[subID]
   340  		rev, err := strconv.Atoi(pieces[4])
   341  		if err != nil {
   342  			glog.Fatalf("Bad subtree key: %v", k)
   343  		}
   344  
   345  		if rev > subtree.revision {
   346  			vMap[subID] = treeAndRev{
   347  				fullKey:  k,
   348  				subtree:  v,
   349  				revision: rev,
   350  			}
   351  		}
   352  	})
   353  
   354  	// Store the keys in sorted order
   355  	var sKeys []string
   356  	for k := range vMap {
   357  		sKeys = append(sKeys, k)
   358  	}
   359  	sort.Strings(sKeys)
   360  
   361  	// The map should now contain the latest revisions per subtree
   362  	for _, k := range sKeys {
   363  		v := vMap[k]
   364  		if rebuildInternal {
   365  			repopFunc(v.subtree)
   366  		}
   367  		if hexKeysFlag {
   368  			hexKeys(v.subtree)
   369  		}
   370  
   371  		fmt.Fprint(out, of(v.subtree))
   372  	}
   373  	return out.String()
   374  }
   375  
   376  func validateFlagsOrDie(summary, recordIO bool) {
   377  	if summary && recordIO {
   378  		glog.Fatal("-summary and -recordio are mutually exclusive flags")
   379  	}
   380  }
   381  
   382  func sequenceLeaves(ls storage.LogStorage, seq *log.Sequencer, tree *trillian.Tree, treeSize, batchSize int, leafDataFormat string) {
   383  	glog.Info("Queuing work")
   384  	for l := 0; l < treeSize; l++ {
   385  		glog.V(1).Infof("Queuing leaf %d", l)
   386  
   387  		leafData := []byte(fmt.Sprintf(leafDataFormat, l))
   388  		err := ls.ReadWriteTransaction(context.TODO(), tree, func(ctx context.Context, tx storage.LogTreeTX) error {
   389  			hash := sha256.Sum256(leafData)
   390  			lh := []byte(hash[:])
   391  			leaf := trillian.LogLeaf{LeafValue: leafData, LeafIdentityHash: lh, MerkleLeafHash: lh}
   392  			leaves := []*trillian.LogLeaf{&leaf}
   393  
   394  			if _, err := tx.QueueLeaves(context.TODO(), leaves, time.Now()); err != nil {
   395  				glog.Fatalf("QueueLeaves got: %v, want: no err", err)
   396  			}
   397  			return nil
   398  		})
   399  		if err != nil {
   400  			glog.Fatalf("ReadWriteTransaction: %v", err)
   401  		}
   402  
   403  		if l > 0 && l%batchSize == 0 {
   404  			sequence(tree, seq, batchSize, batchSize)
   405  		}
   406  	}
   407  	glog.Info("Finished queueing")
   408  	// Handle anything left over
   409  	left := treeSize % batchSize
   410  	if left == 0 {
   411  		left = batchSize
   412  	}
   413  	sequence(tree, seq, left, batchSize)
   414  	glog.Info("Finished sequencing")
   415  }
   416  
   417  func traverseTreeStorage(ls storage.LogStorage, tree *trillian.Tree, ts int, rev int64) string {
   418  	out := new(bytes.Buffer)
   419  	nodesAtLevel := int64(ts)
   420  
   421  	tx, err := ls.SnapshotForTree(context.TODO(), tree)
   422  	if err != nil {
   423  		glog.Fatalf("SnapshotForTree: %v", err)
   424  	}
   425  	defer func() {
   426  		if err := tx.Commit(); err != nil {
   427  			glog.Fatalf("TX Commit(): %v", err)
   428  		}
   429  	}()
   430  
   431  	levels := int64(0)
   432  	n := nodesAtLevel
   433  	for n > 0 {
   434  		levels++
   435  		n = n >> 1
   436  	}
   437  
   438  	// Because of the way we store subtrees omitting internal RHS nodes with one sibling there
   439  	// is an extra level stored for trees that don't have a number of leaves that is a power
   440  	// of 2. We account for this here and in the loop below.
   441  	if !isPerfectTree(int64(ts)) {
   442  		levels++
   443  	}
   444  
   445  	for level := int64(0); level < levels; level++ {
   446  		for node := int64(0); node < nodesAtLevel; node++ {
   447  			// We're going to request one node at a time, which would normally be slow but we have
   448  			// the tree in RAM so it's not a real problem.
   449  			nodeID, err := storage.NewNodeIDForTreeCoords(level, node, 64)
   450  			if err != nil {
   451  				glog.Fatalf("NewNodeIDForTreeCoords: (%d, %d): got: %v, want: no err", level, node, err)
   452  			}
   453  
   454  			nodes, err := tx.GetMerkleNodes(context.TODO(), rev, []storage.NodeID{nodeID})
   455  			if err != nil {
   456  				glog.Fatalf("GetMerkleNodes: %s: %v", nodeID.CoordString(), err)
   457  			}
   458  			if len(nodes) != 1 {
   459  				glog.Fatalf("GetMerkleNodes: %s: want 1 node got: %v", nodeID.CoordString(), nodes)
   460  			}
   461  
   462  			fmt.Fprintf(out, "%6d %6d -> %s\n", level, node, hex.EncodeToString(nodes[0].Hash))
   463  		}
   464  
   465  		nodesAtLevel = nodesAtLevel >> 1
   466  		fmt.Println()
   467  		// This handles the extra level in non-perfect trees
   468  		if nodesAtLevel == 0 {
   469  			nodesAtLevel = 1
   470  		}
   471  	}
   472  	return out.String()
   473  }
   474  
   475  func dumpLeaves(ls storage.LogStorage, tree *trillian.Tree, ts int) string {
   476  	out := new(bytes.Buffer)
   477  	tx, err := ls.SnapshotForTree(context.TODO(), tree)
   478  	if err != nil {
   479  		glog.Fatalf("SnapshotForTree: %v", err)
   480  	}
   481  	defer func() {
   482  		if err := tx.Commit(); err != nil {
   483  			glog.Fatalf("TX Commit(): got: %v", err)
   484  		}
   485  	}()
   486  
   487  	for l := int64(0); l < int64(ts); l++ {
   488  		leaves, err := tx.GetLeavesByIndex(context.TODO(), []int64{l})
   489  		if err != nil {
   490  			glog.Fatalf("GetLeavesByIndex for index %d got: %v", l, err)
   491  		}
   492  		fmt.Fprintf(out, "%6d:%s\n", l, leaves[0].LeafValue)
   493  	}
   494  	return out.String()
   495  }
   496  
   497  func hexMap(in map[string][]byte) map[string][]byte {
   498  	m := make(map[string][]byte)
   499  
   500  	for k, v := range in {
   501  		unb64, err := base64.StdEncoding.DecodeString(k)
   502  		if err != nil {
   503  			glog.Fatalf("Could not decode key as base 64: %s got: %v", k, err)
   504  		}
   505  		m[hex.EncodeToString(unb64)] = v
   506  	}
   507  
   508  	return m
   509  }
   510  
   511  func hexKeys(s *storagepb.SubtreeProto) {
   512  	s.Leaves = hexMap(s.Leaves)
   513  	s.InternalNodes = hexMap(s.InternalNodes)
   514  }
   515  
   516  func isPerfectTree(x int64) bool {
   517  	return x != 0 && (x&(x-1) == 0)
   518  }
   519  
   520  func recordIOHdr() {
   521  	buf := new(bytes.Buffer)
   522  	err := binary.Write(buf, binary.BigEndian, recordIOMagic)
   523  	if err != nil {
   524  		glog.Fatalf("binary.Write failed: %v", err)
   525  	}
   526  	fmt.Print(buf.String())
   527  }