github.com/bartle-stripe/trillian@v1.2.1/storage/cloudspanner/log_storage.go (about)

     1  // Copyright 2018 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cloudspanner
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"math/rand"
    22  	"sort"
    23  	"sync"
    24  	"time"
    25  
    26  	"cloud.google.com/go/spanner"
    27  	"github.com/golang/glog"
    28  	"github.com/golang/protobuf/ptypes"
    29  	"github.com/google/trillian"
    30  	"github.com/google/trillian/merkle/hashers"
    31  	"github.com/google/trillian/storage"
    32  	"github.com/google/trillian/storage/cache"
    33  	"github.com/google/trillian/storage/cloudspanner/spannerpb"
    34  	"github.com/google/trillian/types"
    35  	"google.golang.org/grpc/codes"
    36  	"google.golang.org/grpc/status"
    37  )
    38  
    39  const (
    40  	leafDataTbl            = "LeafData"
    41  	seqDataByMerkleHashIdx = "SequenceByMerkleHash"
    42  	seqDataTbl             = "SequencedLeafData"
    43  	unseqTable             = "Unsequenced"
    44  
    45  	unsequencedCountSQL = "SELECT Unsequenced.TreeID, COUNT(1) FROM Unsequenced GROUP BY TreeID"
    46  
    47  	// t.TreeType: 1 = Log, 3 = PreorderedLog.
    48  	// t.TreeState: 1 = Active, 5 = Draining.
    49  	getActiveLogIDsSQL = `SELECT t.TreeID FROM TreeRoots t
    50  													WHERE (t.TreeType = 1 OR t.TreeType = 3)
    51  													AND (t.TreeState = 1 OR t.TreeState = 5)
    52  													AND t.Deleted=false`
    53  )
    54  
    55  // LogStorageOptions are tuning, experiments and workarounds that can be used.
    56  type LogStorageOptions struct {
    57  	TreeStorageOptions
    58  
    59  	// DequeueAcrossMerkleBuckets controls whether DequeueLeaves will only dequeue
    60  	// from within the chosen Time+Merkle bucket, or whether it will attempt to
    61  	// continue reading from contiguous Merkle buckets until a sufficient number
    62  	// of leaves have been dequeued, or the entire Time bucket has been read.
    63  	DequeueAcrossMerkleBuckets bool
    64  	// DequeueAcrossMerkleBucketsRangeFraction specifies the fraction of Merkle
    65  	// keyspace to dequeue from when using multi-bucket-dequeue.
    66  	DequeueAcrossMerkleBucketsRangeFraction float64
    67  }
    68  
    69  var (
    70  	// MaxUnsequencedCountStaleness configures the read-staleness limit for the
    71  	// spanner query to retrieve the number of unsequenced certs.
    72  	MaxUnsequencedCountStaleness = 5 * time.Minute
    73  
    74  	// Spanner DB columns:
    75  	colExtraData               = "ExtraData"
    76  	colLeafValue               = "LeafValue"
    77  	colLeafIdentityHash        = "LeafIdentityHash"
    78  	colMerkleLeafHash          = "MerkleLeafHash"
    79  	colSequenceNumber          = "SequenceNumber"
    80  	colQueueTimestampNanos     = "QueueTimestampNanos"
    81  	colIntegrateTimestampNanos = "IntegrateTimestampNanos"
    82  )
    83  
    84  // NewLogStorage initialises and returns a new LogStorage.
    85  func NewLogStorage(client *spanner.Client) storage.LogStorage {
    86  	return NewLogStorageWithOpts(client, LogStorageOptions{})
    87  }
    88  
    89  // NewLogStorageWithOpts initialises and returns a new LogStorage.
    90  // The opts parameter can be used to enable custom workarounds.
    91  func NewLogStorageWithOpts(client *spanner.Client, opts LogStorageOptions) storage.LogStorage {
    92  	if opts.DequeueAcrossMerkleBucketsRangeFraction <= 0 || opts.DequeueAcrossMerkleBucketsRangeFraction > 1.0 {
    93  		opts.DequeueAcrossMerkleBucketsRangeFraction = 1.0
    94  	}
    95  	ret := &logStorage{
    96  		ts:   newTreeStorageWithOpts(client, opts.TreeStorageOptions),
    97  		opts: opts,
    98  	}
    99  
   100  	return ret
   101  }
   102  
   103  // logStorage provides a Cloud Spanner backed trillian.LogStorage implementation.
   104  // See third_party/golang/trillian/storage/log_storage.go for more details.
   105  type logStorage struct {
   106  	// ts provides the merkle-tree level primitives which are built upon by this
   107  	// logStorage.
   108  	ts *treeStorage
   109  
   110  	// Additional options applied to this logStorage
   111  	opts LogStorageOptions
   112  }
   113  
   114  func (ls *logStorage) CheckDatabaseAccessible(ctx context.Context) error {
   115  	return checkDatabaseAccessible(ctx, ls.ts.client)
   116  }
   117  
   118  func (ls *logStorage) Snapshot(ctx context.Context) (storage.ReadOnlyLogTX, error) {
   119  	var staleness spanner.TimestampBound
   120  	if ls.opts.ReadOnlyStaleness > 0 {
   121  		staleness = spanner.ExactStaleness(ls.opts.ReadOnlyStaleness)
   122  	} else {
   123  		staleness = spanner.StrongRead()
   124  	}
   125  
   126  	snapshotTX := &snapshotTX{
   127  		client: ls.ts.client,
   128  		stx:    ls.ts.client.ReadOnlyTransaction().WithTimestampBound(staleness),
   129  		ls:     ls,
   130  	}
   131  	return &readOnlyLogTX{snapshotTX}, nil
   132  }
   133  
   134  func newLogCache(tree *trillian.Tree) (cache.SubtreeCache, error) {
   135  	hasher, err := hashers.NewLogHasher(tree.HashStrategy)
   136  	if err != nil {
   137  		return cache.SubtreeCache{}, err
   138  	}
   139  	return cache.NewLogSubtreeCache(defLogStrata, hasher), nil
   140  }
   141  
   142  func (ls *logStorage) begin(ctx context.Context, tree *trillian.Tree, readonly bool, stx spanRead) (*logTX, error) {
   143  	tx, err := ls.ts.begin(ctx, tree, newLogCache, stx)
   144  	if err != nil {
   145  		return nil, err
   146  	}
   147  
   148  	// Sanity check tx.config
   149  	if cfg, ok := tx.config.(*spannerpb.LogStorageConfig); !ok || cfg == nil {
   150  		return nil, fmt.Errorf("unexpected config type for LOG tree %v: %T", tx.treeID, tx.config)
   151  	}
   152  
   153  	return &logTX{
   154  		ls:       ls,
   155  		dequeued: make(map[string]*QueuedEntry),
   156  		treeTX:   tx,
   157  	}, nil
   158  }
   159  
   160  func (ls *logStorage) BeginForTree(ctx context.Context, treeID int64) (storage.LogTreeTX, error) {
   161  	return nil, ErrNotImplemented
   162  }
   163  
   164  func (ls *logStorage) ReadWriteTransaction(ctx context.Context, tree *trillian.Tree, f storage.LogTXFunc) error {
   165  	_, err := ls.ts.client.ReadWriteTransaction(ctx, func(ctx context.Context, stx *spanner.ReadWriteTransaction) error {
   166  		tx, err := ls.begin(ctx, tree, false /* readonly */, stx)
   167  		if err != nil {
   168  			return err
   169  		}
   170  		if err := f(ctx, tx); err != nil {
   171  			return err
   172  		}
   173  		return tx.flushSubtrees()
   174  	})
   175  	return err
   176  }
   177  
   178  func (ls *logStorage) SnapshotForTree(ctx context.Context, tree *trillian.Tree) (storage.ReadOnlyLogTreeTX, error) {
   179  	return ls.begin(ctx, tree, true /* readonly */, ls.ts.client.ReadOnlyTransaction())
   180  }
   181  
   182  func (ls *logStorage) QueueLeaves(ctx context.Context, tree *trillian.Tree, leaves []*trillian.LogLeaf, qTimestamp time.Time) ([]*trillian.QueuedLogLeaf, error) {
   183  	_, treeConfig, err := ls.ts.getTreeAndConfig(ctx, tree)
   184  	if err != nil {
   185  		return nil, err
   186  	}
   187  	config, ok := treeConfig.(*spannerpb.LogStorageConfig)
   188  	if !ok {
   189  		return nil, status.Errorf(codes.Internal, "got unexpected config type for Log operation: %T", treeConfig)
   190  	}
   191  
   192  	now := time.Now().UTC().Unix()
   193  	bucketPrefix := (now % config.NumUnseqBuckets) << 8
   194  
   195  	results := make([]*trillian.QueuedLogLeaf, len(leaves))
   196  	writeDupes := make(map[string][]int)
   197  
   198  	qTS := qTimestamp.UnixNano()
   199  	var wg sync.WaitGroup
   200  	for i, l := range leaves {
   201  		wg.Add(1)
   202  		// Capture values of i and l for later reference in the MutationResultFunc below.
   203  		i := i
   204  		l := l
   205  		go func() {
   206  			defer wg.Done()
   207  
   208  			// The insert of the leafdata and the unsequenced work item must happen
   209  			// atomically.
   210  			m1 := spanner.Insert(
   211  				leafDataTbl,
   212  				[]string{colTreeID, colLeafIdentityHash, colLeafValue, colExtraData, colQueueTimestampNanos},
   213  				[]interface{}{tree.TreeId, l.LeafIdentityHash, l.LeafValue, l.ExtraData, qTS})
   214  			b := bucketPrefix | int64(l.MerkleLeafHash[0])
   215  			m2 := spanner.Insert(
   216  				unseqTable,
   217  				[]string{colTreeID, colBucket, colQueueTimestampNanos, colMerkleLeafHash, colLeafIdentityHash},
   218  				[]interface{}{tree.TreeId, b, qTS, l.MerkleLeafHash, l.LeafIdentityHash})
   219  
   220  			_, err = ls.ts.client.Apply(ctx, []*spanner.Mutation{m1, m2})
   221  			if spanner.ErrCode(err) == codes.AlreadyExists {
   222  				k := string(l.LeafIdentityHash)
   223  				writeDupes[k] = append(writeDupes[k], i)
   224  			} else if err != nil {
   225  				s, _ := status.FromError(err)
   226  				results[i] = &trillian.QueuedLogLeaf{Status: s.Proto()}
   227  			} else {
   228  				results[i] = &trillian.QueuedLogLeaf{Leaf: l} // implicit OK status
   229  			}
   230  		}()
   231  	}
   232  
   233  	// Wait for all of our mutations to apply (or fail):
   234  	wg.Wait()
   235  
   236  	// Finally, read back any leaves which failed with an already exists error
   237  	// when we tried to insert them:
   238  	err = ls.readDupeLeaves(ctx, tree.TreeId, writeDupes, results)
   239  	if err != nil {
   240  		return nil, err
   241  	}
   242  	return results, nil
   243  }
   244  
   245  func (ls *logStorage) AddSequencedLeaves(ctx context.Context, tree *trillian.Tree, leaves []*trillian.LogLeaf, timestamp time.Time) ([]*trillian.QueuedLogLeaf, error) {
   246  	return nil, ErrNotImplemented
   247  }
   248  
   249  // readDupeLeaves reads the leaves whose ids are passed as keys in the dupes map,
   250  // and stores them in results.
   251  func (ls *logStorage) readDupeLeaves(ctx context.Context, logID int64, dupes map[string][]int, results []*trillian.QueuedLogLeaf) error {
   252  	numDupes := len(dupes)
   253  	if numDupes == 0 {
   254  		return nil
   255  	}
   256  	glog.V(2).Infof("dupe rowsToRead: %v", numDupes)
   257  
   258  	ids := make([][]byte, 0, numDupes)
   259  	for k := range dupes {
   260  		ids = append(ids, []byte(k))
   261  	}
   262  	dupesRead := 0
   263  	tx := ls.ts.client.Single()
   264  	err := readLeaves(ctx, tx, logID, ids, func(l *trillian.LogLeaf) {
   265  		glog.V(2).Infof("Found already exists dupe: %v", l)
   266  		dupesRead++
   267  
   268  		indices := dupes[string(l.LeafIdentityHash)]
   269  		glog.V(2).Infof("Indices %v", indices)
   270  		if len(indices) == 0 {
   271  			glog.Warningf("Logic error: Spanner returned a leaf %x, but it matched no requested index", l.LeafIdentityHash)
   272  			return
   273  		}
   274  		for _, i := range indices {
   275  			leaf := l
   276  			results[i] = &trillian.QueuedLogLeaf{
   277  				Leaf:   leaf,
   278  				Status: status.Newf(codes.AlreadyExists, "leaf already exists: %v", l.LeafIdentityHash).Proto(),
   279  			}
   280  		}
   281  	})
   282  	tx.Close()
   283  	if err != nil {
   284  		return err
   285  	}
   286  	if got, want := dupesRead, numDupes; got != want {
   287  		return fmt.Errorf("read unexpected number of dupe rows %d, want %d", got, want)
   288  	}
   289  	return nil
   290  }
   291  
   292  // logTX is a concrete implementation of the Trillian storage.LogStorage
   293  // interface.
   294  type logTX struct {
   295  	// treeTX embeds the merkle-tree level transactional actions.
   296  	*treeTX
   297  
   298  	// logStorage is the logStorage which begat this logTX.
   299  	ls *logStorage
   300  
   301  	// numSequenced holds the number of leaves sequenced by this transaction.
   302  	numSequenced int64
   303  
   304  	// dequeued is a map of LeafIdentityHash to QueuedEntry containing entries for
   305  	// everything dequeued by this transaction.
   306  	// This is required to recover the primary key for the unsequenced entry in
   307  	// UpdateSequencedLeaves.
   308  	dequeued map[string]*QueuedEntry
   309  }
   310  
   311  func (tx *logTX) getLogStorageConfig() *spannerpb.LogStorageConfig {
   312  	return tx.config.(*spannerpb.LogStorageConfig)
   313  }
   314  
   315  // LatestSignedLogRoot returns the freshest SignedLogRoot for this log at the
   316  // time the transaction was started.
   317  func (tx *logTX) LatestSignedLogRoot(ctx context.Context) (trillian.SignedLogRoot, error) {
   318  	currentSTH, err := tx.currentSTH(ctx)
   319  	if err != nil {
   320  		return trillian.SignedLogRoot{}, err
   321  	}
   322  	writeRev, err := tx.writeRev(ctx)
   323  	if err != nil {
   324  		return trillian.SignedLogRoot{}, err
   325  	}
   326  
   327  	if got, want := currentSTH.TreeRevision+1, writeRev; got != want {
   328  		return trillian.SignedLogRoot{}, fmt.Errorf("inconsistency: currentSTH.TreeRevision+1 (%d) != writeRev (%d)", got, want)
   329  	}
   330  
   331  	// Put logRoot back together. Fortunately LogRoot has a deterministic serialization.
   332  	logRoot, err := (&types.LogRootV1{
   333  		TimestampNanos: uint64(currentSTH.TsNanos),
   334  		RootHash:       currentSTH.RootHash,
   335  		TreeSize:       uint64(currentSTH.TreeSize),
   336  		Revision:       uint64(currentSTH.TreeRevision),
   337  		Metadata:       currentSTH.Metadata,
   338  	}).MarshalBinary()
   339  	if err != nil {
   340  		return trillian.SignedLogRoot{}, err
   341  	}
   342  
   343  	// We already read the latest root as part of starting the transaction (in
   344  	// order to calculate the writeRevision), so we just return that data here:
   345  	return trillian.SignedLogRoot{
   346  		KeyHint:          types.SerializeKeyHint(tx.treeID),
   347  		LogRoot:          logRoot,
   348  		LogRootSignature: currentSTH.Signature,
   349  		// TODO(gbelvin): Remove deprecated fields
   350  		TimestampNanos: currentSTH.TsNanos,
   351  		RootHash:       currentSTH.RootHash,
   352  		TreeSize:       currentSTH.TreeSize,
   353  		TreeRevision:   currentSTH.TreeRevision,
   354  	}, nil
   355  }
   356  
   357  // StoreSignedLogRoot stores the provided root.
   358  // This method will return an error if the caller attempts to store more than
   359  // one root per log for a given tree size.
   360  func (tx *logTX) StoreSignedLogRoot(ctx context.Context, root trillian.SignedLogRoot) error {
   361  	writeRev, err := tx.writeRev(ctx)
   362  	if err == storage.ErrTreeNeedsInit {
   363  		writeRev = 0
   364  	} else if err != nil {
   365  		return err
   366  	}
   367  
   368  	var logRoot types.LogRootV1
   369  	if err := logRoot.UnmarshalBinary(root.LogRoot); err != nil {
   370  		glog.Warningf("Failed to parse log root: %x %v", root.LogRoot, err)
   371  		return err
   372  	}
   373  
   374  	m := spanner.Insert(
   375  		"TreeHeads",
   376  		[]string{
   377  			"TreeID",
   378  			"TimestampNanos",
   379  			"TreeSize",
   380  			"RootHash",
   381  			"RootSignature",
   382  			"TreeRevision",
   383  			"TreeMetadata",
   384  		},
   385  		[]interface{}{
   386  			int64(tx.treeID),
   387  			int64(logRoot.TimestampNanos),
   388  			int64(logRoot.TreeSize),
   389  			logRoot.RootHash,
   390  			root.LogRootSignature,
   391  			writeRev,
   392  			logRoot.Metadata,
   393  		})
   394  
   395  	stx, ok := tx.stx.(*spanner.ReadWriteTransaction)
   396  	if !ok {
   397  		return ErrWrongTXType
   398  	}
   399  	return stx.BufferWrite([]*spanner.Mutation{m})
   400  }
   401  
   402  func readLeaves(ctx context.Context, stx *spanner.ReadOnlyTransaction, logID int64, ids [][]byte, f func(*trillian.LogLeaf)) error {
   403  	leafTable := leafDataTbl
   404  	cols := []string{colLeafIdentityHash, colLeafValue, colExtraData, colQueueTimestampNanos}
   405  	keys := make([]spanner.KeySet, 0)
   406  	for _, l := range ids {
   407  		keys = append(keys, spanner.Key{logID, l})
   408  	}
   409  
   410  	rows := stx.Read(ctx, leafTable, spanner.KeySets(keys...), cols)
   411  	return rows.Do(func(r *spanner.Row) error {
   412  		var l trillian.LogLeaf
   413  		var qTimestamp int64
   414  		if err := r.Columns(&l.LeafIdentityHash, &l.LeafValue, &l.ExtraData, &qTimestamp); err != nil {
   415  			return err
   416  		}
   417  		var err error
   418  		l.QueueTimestamp, err = ptypes.TimestampProto(time.Unix(0, qTimestamp))
   419  		if err != nil {
   420  			return fmt.Errorf("got invalid queue timestamp: %v", err)
   421  		}
   422  		f(&l)
   423  		return nil
   424  	})
   425  }
   426  
   427  func (tx *logTX) QueueLeaves(ctx context.Context, leaves []*trillian.LogLeaf, ts time.Time) ([]*trillian.LogLeaf, error) {
   428  	return nil, ErrNotImplemented
   429  }
   430  
   431  func (tx *logTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.LogLeaf, timestamp time.Time) ([]*trillian.QueuedLogLeaf, error) {
   432  	return nil, ErrNotImplemented
   433  }
   434  
   435  // DequeueLeaves removes [0, limit) leaves from the to-be-sequenced queue.
   436  // The leaves returned are not guaranteed to be in any particular order.
   437  // The caller should assign sequence numbers and pass the updated leaves as
   438  // arguments to the UpdateSequencedLeaves method.
   439  //
   440  // The LogLeaf structs returned by this method will not be fully populated;
   441  // only the LeafIdentityHash and MerkleLeafHash fields will contain data, this
   442  // should be sufficient for assigning sequence numbers with this storage impl.
   443  //
   444  // TODO(al): cutoff is currently ignored.
   445  func (tx *logTX) DequeueLeaves(ctx context.Context, limit int, cutoff time.Time) ([]*trillian.LogLeaf, error) {
   446  	if limit <= 0 {
   447  		return nil, fmt.Errorf("limit should be > 0, got %d", limit)
   448  	}
   449  
   450  	// Decide which bucket(s) to dequeue from.
   451  	// The high 8 bits of the bucket key is a time based ring - at any given
   452  	// moment, FEs queueing entries will be adding them to different buckets
   453  	// than we're dequeuing from here - the low 8 bits are the first byte of the
   454  	// merkle hash of the entry.
   455  	now := time.Now().UTC()
   456  	cfg := tx.getLogStorageConfig()
   457  	timeBucket := int64(((now.Unix() + cfg.NumUnseqBuckets/2) % cfg.NumUnseqBuckets) << 8)
   458  
   459  	// Choose a starting point in the merkle prefix range, and calculate the
   460  	// start/limit of the merkle range we'll dequeue from.
   461  	// It seems to be much better to tune for keeping this range small, and allow
   462  	// the signer to run multiple times per second than try to dequeue a large batch
   463  	// which spans a large number of merkle prefixes.
   464  	merklePrefix := rand.Int63n(256)
   465  	startBucket := timeBucket | merklePrefix
   466  	numMerkleBuckets := int64(256 * tx.ls.opts.DequeueAcrossMerkleBucketsRangeFraction)
   467  	merkleLimit := merklePrefix + numMerkleBuckets
   468  	if merkleLimit > 0xff {
   469  		merkleLimit = 0xff
   470  	}
   471  	limitBucket := timeBucket | merkleLimit
   472  
   473  	stmt := spanner.NewStatement(`
   474  			SELECT Bucket, QueueTimestampNanos, MerkleLeafHash, LeafIdentityHash
   475  			FROM Unsequenced u
   476  			WHERE u.TreeID = @tree_id
   477  			AND u.Bucket >= @start_bucket
   478  			AND u.Bucket <= @limit_bucket
   479  			LIMIT @max_num
   480  			`)
   481  	stmt.Params["tree_id"] = tx.treeID
   482  	stmt.Params["start_bucket"] = startBucket
   483  	stmt.Params["limit_bucket"] = limitBucket
   484  	stmt.Params["max_num"] = limit
   485  
   486  	ret := make([]*trillian.LogLeaf, 0, limit)
   487  	rows := tx.stx.Query(ctx, stmt)
   488  	if err := rows.Do(func(r *spanner.Row) error {
   489  		var l trillian.LogLeaf
   490  		var qe QueuedEntry
   491  		if err := r.Columns(&qe.bucket, &qe.timestamp, &l.MerkleLeafHash, &l.LeafIdentityHash); err != nil {
   492  			return err
   493  		}
   494  
   495  		var err error
   496  		l.QueueTimestamp, err = ptypes.TimestampProto(time.Unix(0, qe.timestamp))
   497  		if err != nil {
   498  			return fmt.Errorf("got invalid queue timestamp: %v", err)
   499  		}
   500  		k := string(l.LeafIdentityHash)
   501  		if tx.dequeued[k] != nil {
   502  			// dupe, user probably called DequeueLeaves more than once.
   503  			return nil
   504  		}
   505  
   506  		ret = append(ret, &l)
   507  		qe.leaf = &l
   508  		tx.dequeued[k] = &qe
   509  		return nil
   510  	}); err != nil {
   511  		return nil, err
   512  	}
   513  
   514  	return ret, nil
   515  }
   516  
   517  // UpdateSequencedLeaves stores the sequence numbers assigned to the leaves,
   518  // and integrates them into the tree.
   519  func (tx *logTX) UpdateSequencedLeaves(ctx context.Context, leaves []*trillian.LogLeaf) error {
   520  	stx, ok := tx.stx.(*spanner.ReadWriteTransaction)
   521  	if !ok {
   522  		return ErrWrongTXType
   523  	}
   524  	// We need the latest root to know what the next sequence number to use below is.
   525  	currentSTH, err := tx.currentSTH(ctx)
   526  	if err != nil {
   527  		return err
   528  	}
   529  
   530  	for _, l := range leaves {
   531  		if got, want := l.LeafIndex, currentSTH.TreeSize+tx.numSequenced; got != want {
   532  			return fmt.Errorf("attempting to assign non-sequential leaf with sequence %d, want %d", got, want)
   533  		}
   534  
   535  		qe, ok := tx.dequeued[string(l.LeafIdentityHash)]
   536  		if !ok {
   537  			return fmt.Errorf("attempting to assign unknown merkleleafhash %v", l.MerkleLeafHash)
   538  		}
   539  
   540  		iTimestamp, err := ptypes.Timestamp(l.IntegrateTimestamp)
   541  		if err != nil {
   542  			return fmt.Errorf("got invalid integrate timestamp: %v", err)
   543  		}
   544  
   545  		// Add the sequence mapping...
   546  		m1 := spanner.Insert(seqDataTbl,
   547  			[]string{colTreeID, colSequenceNumber, colLeafIdentityHash, colMerkleLeafHash, colIntegrateTimestampNanos},
   548  			[]interface{}{tx.treeID, l.LeafIndex, l.LeafIdentityHash, l.MerkleLeafHash, iTimestamp.UnixNano()})
   549  
   550  		m2 := spanner.Delete(unseqTable, spanner.Key{tx.treeID, qe.bucket, qe.timestamp, l.MerkleLeafHash})
   551  
   552  		tx.numSequenced++
   553  		if err := stx.BufferWrite([]*spanner.Mutation{m1, m2}); err != nil {
   554  			return fmt.Errorf("bufferwrite(): %v", err)
   555  		}
   556  	}
   557  
   558  	return nil
   559  }
   560  
   561  // GetSequencedLeafCount returns the number of leaves integrated into the tree
   562  // at the time the transaction was started.
   563  func (tx *logTX) GetSequencedLeafCount(ctx context.Context) (int64, error) {
   564  	currentSTH, err := tx.currentSTH(ctx)
   565  	if err != nil {
   566  		return -1, err
   567  	}
   568  
   569  	return currentSTH.TreeSize, nil
   570  }
   571  
   572  // leafmap is a map of LogLeaf by sequence number which knows how to populate
   573  // itself directly from Spanner Rows.
   574  type leafmap map[int64]*trillian.LogLeaf
   575  
   576  // addFullRow appends the leaf data in row to the array
   577  func (l leafmap) addFullRow(r *spanner.Row) error {
   578  	var (
   579  		merkleLeafHash, leafValue, extraData []byte
   580  		sequenceNumber                       int64
   581  		leafIDHash                           []byte
   582  		qTimestamp                           int64
   583  		iTimestamp                           int64
   584  	)
   585  
   586  	//`SELECT sd.MerkleLeafHash, ld.LeafValue, ld.ExtraData, sd.SequenceNumber, ld.LeafIdentityHash, ld.QueueTimestampNanos, sd.IntegrateTimestampNanos
   587  	if err := r.Columns(&merkleLeafHash, &leafValue, &extraData, &sequenceNumber, &leafIDHash, &qTimestamp, &iTimestamp); err != nil {
   588  		return err
   589  	}
   590  	leaf := &trillian.LogLeaf{
   591  		MerkleLeafHash:   merkleLeafHash,
   592  		LeafValue:        leafValue,
   593  		ExtraData:        extraData,
   594  		LeafIndex:        sequenceNumber,
   595  		LeafIdentityHash: leafIDHash,
   596  	}
   597  	var err error
   598  	leaf.QueueTimestamp, err = ptypes.TimestampProto(time.Unix(0, qTimestamp))
   599  	if err != nil {
   600  		return fmt.Errorf("got invalid queue timestamp %v", err)
   601  	}
   602  	leaf.IntegrateTimestamp, err = ptypes.TimestampProto(time.Unix(0, iTimestamp))
   603  	if err != nil {
   604  		return fmt.Errorf("got invalid integrate timestamp %v", err)
   605  	}
   606  
   607  	l[sequenceNumber] = leaf
   608  	return nil
   609  }
   610  
   611  // leavesByHash is a map of []LogLeaf (keyed by value hash) which knows how to
   612  // populate itself from Spanner Rows.
   613  type leavesByHash map[string][]*trillian.LogLeaf
   614  
   615  // addRow adds the contents of the Spanner Row to this map.
   616  func (b leavesByHash) addRow(r *spanner.Row) error {
   617  	var h []byte
   618  	var v []byte
   619  	var ed []byte
   620  	var qTimestamp int64
   621  	if err := r.Columns(&h, &v, &ed, &qTimestamp); err != nil {
   622  		return err
   623  	}
   624  	queueTimestamp, err := ptypes.TimestampProto(time.Unix(0, qTimestamp))
   625  	if err != nil {
   626  		return fmt.Errorf("got invalid queue timestamp: %v", err)
   627  	}
   628  
   629  	leaves, ok := b[string(h)]
   630  	if !ok {
   631  		return fmt.Errorf("inconsistency: unexpected leafValueHash %v", h)
   632  	}
   633  	for i := range leaves {
   634  		if got, want := leaves[i].LeafIdentityHash, h; !bytes.Equal(got, want) {
   635  			return fmt.Errorf("inconsistency: unexpected leafvaluehash %v, want %v", got, want)
   636  		}
   637  		leaves[i].LeafValue = v
   638  		leaves[i].ExtraData = ed
   639  		leaves[i].QueueTimestamp = queueTimestamp
   640  	}
   641  	return nil
   642  }
   643  
   644  // populateLeafData populates the partial LogLeaf structs held in the passed in
   645  // map of LeafIdentityHash to []LogLeaf by reading the remaining LogLeaf data from
   646  // Spanner.
   647  // The value of byHash is an []LogLeaf because the underlying leaf data could
   648  // be sequenced into multiple tree leaves if the log allows duplication.
   649  func (tx *logTX) populateLeafData(ctx context.Context, byHash leavesByHash) error {
   650  	keySet := make([]spanner.KeySet, 0, len(byHash))
   651  	for k := range byHash {
   652  		keySet = append(keySet, spanner.Key{tx.treeID, []byte(k)})
   653  	}
   654  	cols := []string{colLeafIdentityHash, colLeafValue, colExtraData, colQueueTimestampNanos}
   655  	rows := tx.stx.Read(ctx, leafDataTbl, spanner.KeySets(keySet...), cols)
   656  	return rows.Do(byHash.addRow)
   657  }
   658  
   659  // validateIndices ensures that all indices are between 0 and treeSize-1.
   660  func validateIndices(indices []int64, treeSize int64) error {
   661  	maxIndex := treeSize - 1
   662  	for _, i := range indices {
   663  		if i < 0 {
   664  			return status.Errorf(codes.InvalidArgument, "index %d is < 0", i)
   665  		}
   666  		if i > maxIndex {
   667  			return status.Errorf(codes.OutOfRange, "index %d is > highest index in current tree %d", i, maxIndex)
   668  		}
   669  	}
   670  	return nil
   671  }
   672  
   673  // GetLeavesByIndex returns the leaves corresponding to the given indices.
   674  func (tx *logTX) GetLeavesByIndex(ctx context.Context, indices []int64) ([]*trillian.LogLeaf, error) {
   675  	// We need the latest root to validate the indices are within range.
   676  	currentSTH, err := tx.currentSTH(ctx)
   677  	if err != nil {
   678  		return nil, err
   679  	}
   680  
   681  	if err := validateIndices(indices, currentSTH.TreeSize); err != nil {
   682  		return nil, err
   683  	}
   684  
   685  	leaves := make(leafmap)
   686  	stmt := spanner.NewStatement(
   687  		`SELECT sd.MerkleLeafHash, ld.LeafValue, ld.ExtraData, sd.SequenceNumber, ld.LeafIdentityHash, ld.QueueTimestampNanos, sd.IntegrateTimestampNanos
   688  FROM SequencedLeafData as sd
   689  INNER JOIN LeafData as ld
   690  ON sd.TreeID = ld.TreeID AND sd.LeafIdentityHash = ld.LeafIdentityHash
   691  WHERE sd.TreeID = @tree_id and sd.SequenceNumber IN UNNEST(@seq_nums)`)
   692  	stmt.Params["tree_id"] = tx.treeID
   693  	stmt.Params["seq_nums"] = indices
   694  
   695  	rows := tx.stx.Query(ctx, stmt)
   696  	if err := rows.Do(leaves.addFullRow); err != nil {
   697  		return nil, err
   698  	}
   699  
   700  	// Sanity check that we got everything we wanted
   701  	if got, want := len(leaves), len(indices); got != want {
   702  		return nil, fmt.Errorf("inconsistency: got %d leaves, want %d", got, want)
   703  	}
   704  
   705  	// Sort the leaves so they are in the same order as the indices.
   706  	ret := make([]*trillian.LogLeaf, 0, len(indices))
   707  	for _, i := range indices {
   708  		l, ok := leaves[i]
   709  		if !ok {
   710  			return nil, fmt.Errorf("inconsistency: missing data for index %d", i)
   711  		}
   712  		ret = append(ret, l)
   713  	}
   714  
   715  	return ret, nil
   716  }
   717  
   718  func validateRange(start, count, treeSize int64) error {
   719  	if count <= 0 {
   720  		return status.Errorf(codes.InvalidArgument, "invalid count %d", count)
   721  	}
   722  	if start < 0 {
   723  		return status.Errorf(codes.InvalidArgument, "invalid start %d", start)
   724  	}
   725  	if start >= treeSize {
   726  		return status.Errorf(codes.OutOfRange, "start index %d beyond tree size %d", start, treeSize)
   727  	}
   728  	return nil
   729  }
   730  
   731  // GetLeavesByRange returns the leaves corresponding to the given index range.
   732  func (tx *logTX) GetLeavesByRange(ctx context.Context, start, count int64) ([]*trillian.LogLeaf, error) {
   733  	// We need the latest root to validate the indices are within range.
   734  	currentSTH, err := tx.currentSTH(ctx)
   735  	if err != nil {
   736  		return nil, err
   737  	}
   738  
   739  	if err := validateRange(start, count, currentSTH.TreeSize); err != nil {
   740  		return nil, err
   741  	}
   742  
   743  	stmt := spanner.NewStatement(
   744  		`SELECT sd.MerkleLeafHash, ld.LeafValue, ld.ExtraData, sd.SequenceNumber, ld.LeafIdentityHash, ld.QueueTimestampNanos, sd.IntegrateTimestampNanos
   745  FROM SequencedLeafData as sd
   746  INNER JOIN LeafData as ld
   747  ON sd.TreeID = ld.TreeID AND sd.LeafIdentityHash = ld.LeafIdentityHash
   748  WHERE sd.TreeID = @tree_id AND sd.SequenceNumber >= @start AND sd.SequenceNumber < @xend`)
   749  	stmt.Params["tree_id"] = tx.treeID
   750  	stmt.Params["start"] = start
   751  	xend := start + count
   752  	if xend > currentSTH.TreeSize {
   753  		xend = currentSTH.TreeSize
   754  		count = xend - start
   755  	}
   756  	stmt.Params["xend"] = xend
   757  
   758  	// Results need to be returned in order [start, end), all of which should be
   759  	// available (as we restricted xend/count to TreeSize).
   760  	leaves := make(leafmap)
   761  	rows := tx.stx.Query(ctx, stmt)
   762  	if err := rows.Do(leaves.addFullRow); err != nil {
   763  		return nil, err
   764  	}
   765  	ret := make([]*trillian.LogLeaf, 0, count)
   766  	for i := start; i < (start + count); i++ {
   767  		l, ok := leaves[i]
   768  		if !ok {
   769  			return nil, fmt.Errorf("inconsistency: missing data for index %d", i)
   770  		}
   771  		ret = append(ret, l)
   772  		delete(leaves, i)
   773  	}
   774  	if len(leaves) > 0 {
   775  		return nil, fmt.Errorf("inconsistency: unexpected extra data outside range %d, +%d", start, count)
   776  	}
   777  
   778  	return ret, nil
   779  }
   780  
   781  // leafSlice is a slice of LogLeaf which knows how to populate itself from
   782  // Spanner Rows.
   783  type leafSlice []*trillian.LogLeaf
   784  
   785  // addRow appends the leaf data in Row to the array.
   786  func (l *leafSlice) addRow(r *spanner.Row) error {
   787  	var (
   788  		s      int64
   789  		mh, lh []byte
   790  	)
   791  
   792  	if err := r.Columns(&s, &mh, &lh); err != nil {
   793  		return err
   794  	}
   795  	leaf := trillian.LogLeaf{
   796  		LeafIndex:        s,
   797  		MerkleLeafHash:   mh,
   798  		LeafIdentityHash: lh,
   799  	}
   800  	*l = append(*l, &leaf)
   801  	return nil
   802  }
   803  
   804  // getUsingIndex returns a slice containing the LogLeaf structs corresponding
   805  // to the requested keys.
   806  // The entries in key are used in constructing a primary key (treeID, keyElem)
   807  // for the specified Spanner index.
   808  // If bySeq is true, the returned slice will be order by LogLeaf.LeafIndex.
   809  func (tx *logTX) getUsingIndex(ctx context.Context, idx string, keys [][]byte, bySeq bool) ([]*trillian.LogLeaf, error) {
   810  	keySet := make([]spanner.KeySet, 0, len(keys))
   811  	for _, k := range keys {
   812  		keySet = append(keySet, spanner.Key{tx.treeID, k})
   813  	}
   814  
   815  	leaves := make(leafSlice, 0, len(keys))
   816  	cols := []string{colSequenceNumber, colMerkleLeafHash, colLeafIdentityHash}
   817  	rows := tx.stx.ReadUsingIndex(ctx, seqDataTbl, idx, spanner.KeySets(keySet...), cols)
   818  	if err := rows.Do(leaves.addRow); err != nil {
   819  		return nil, err
   820  	}
   821  
   822  	byHash := make(leavesByHash)
   823  	for i := range leaves {
   824  		k := string(leaves[i].LeafIdentityHash)
   825  		byHash[k] = append(byHash[k], leaves[i])
   826  	}
   827  
   828  	// Now we can fetch & combine the actual leaf data:
   829  	if err := tx.populateLeafData(ctx, byHash); err != nil {
   830  		return nil, err
   831  	}
   832  
   833  	if bySeq {
   834  		sort.Sort(byIndex(leaves))
   835  	}
   836  
   837  	return leaves, nil
   838  }
   839  
   840  // GetLeavesByHash returns the leaves corresponding to the given merkle hashes.
   841  // Any unknown hashes will simply be ignored, and the caller should inspect the
   842  // returned leaves to determine whether this has occurred.
   843  // TODO(al): Currently, this method does not populate the IntegrateTimestamp
   844  //   member of the returned leaves. We should convert this method to use SQL
   845  //   rather than denormalising IntegrateTimestampNanos into the index too.
   846  func (tx *logTX) GetLeavesByHash(ctx context.Context, hashes [][]byte, bySeq bool) ([]*trillian.LogLeaf, error) {
   847  	return tx.getUsingIndex(ctx, seqDataByMerkleHashIdx, hashes, bySeq)
   848  }
   849  
   850  // QueuedEntry represents a leaf which was dequeued.
   851  // It's used to store some extra info which is necessary for rebuilding the
   852  // leaf's primary key when it's passed back in to UpdateSequencedLeaves.
   853  type QueuedEntry struct {
   854  	// leaf is partially populated with the Merkle and LeafValue hashes only.
   855  	leaf      *trillian.LogLeaf
   856  	bucket    int64
   857  	timestamp int64
   858  }
   859  
   860  // readOnlyLogTX implements storage.ReadOnlyLogTX.
   861  type readOnlyLogTX struct {
   862  	*snapshotTX
   863  }
   864  
   865  func (tx *readOnlyLogTX) GetActiveLogIDs(ctx context.Context) ([]int64, error) {
   866  	tx.mu.RLock()
   867  	defer tx.mu.RUnlock()
   868  	if tx.stx == nil {
   869  		return nil, ErrTransactionClosed
   870  	}
   871  
   872  	ids := []int64{}
   873  	// We have to use SQL as Read() doesn't work against an index.
   874  	stmt := spanner.NewStatement(getActiveLogIDsSQL)
   875  	rows := tx.stx.Query(ctx, stmt)
   876  	if err := rows.Do(func(r *spanner.Row) error {
   877  		var id int64
   878  		if err := r.Columns(&id); err != nil {
   879  			return err
   880  		}
   881  		ids = append(ids, id)
   882  		return nil
   883  	}); err != nil {
   884  		glog.Warning("GetActiveLogIDs: %v", err)
   885  		return nil, fmt.Errorf("problem executing getActiveLogIDsSQL: %v", err)
   886  	}
   887  	return ids, nil
   888  }
   889  
   890  func (tx *readOnlyLogTX) GetUnsequencedCounts(ctx context.Context) (storage.CountByLogID, error) {
   891  	stmt := spanner.NewStatement(unsequencedCountSQL)
   892  	ret := make(storage.CountByLogID)
   893  	rows := tx.stx.Query(ctx, stmt)
   894  	if err := rows.Do(func(r *spanner.Row) error {
   895  		var id, c int64
   896  		if err := r.Columns(&id, &c); err != nil {
   897  			return err
   898  		}
   899  		ret[id] = c
   900  		return nil
   901  	}); err != nil {
   902  		return nil, fmt.Errorf("problem executing unsequencedCountSQL: %v", err)
   903  	}
   904  	return ret, nil
   905  }
   906  
   907  // LogLeaf sorting boilerplate below.
   908  
   909  type byIndex []*trillian.LogLeaf
   910  
   911  func (b byIndex) Len() int { return len(b) }
   912  
   913  func (b byIndex) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
   914  
   915  func (b byIndex) Less(i, j int) bool { return b[i].LeafIndex < b[j].LeafIndex }