github.com/bartle-stripe/trillian@v1.2.1/storage/mysql/log_storage.go (about)

     1  // Copyright 2016 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mysql
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"database/sql"
    21  	"errors"
    22  	"fmt"
    23  	"sort"
    24  	"strconv"
    25  	"sync"
    26  	"time"
    27  
    28  	"github.com/go-sql-driver/mysql"
    29  	"github.com/golang/glog"
    30  	"github.com/golang/protobuf/ptypes"
    31  	"github.com/google/trillian"
    32  	"github.com/google/trillian/merkle/hashers"
    33  	"github.com/google/trillian/monitoring"
    34  	"github.com/google/trillian/storage"
    35  	"github.com/google/trillian/storage/cache"
    36  	"github.com/google/trillian/types"
    37  	"google.golang.org/grpc/codes"
    38  	"google.golang.org/grpc/status"
    39  )
    40  
    41  const (
    42  	valuesPlaceholder5 = "(?,?,?,?,?)"
    43  
    44  	insertLeafDataSQL      = "INSERT INTO LeafData(TreeId,LeafIdentityHash,LeafValue,ExtraData,QueueTimestampNanos) VALUES" + valuesPlaceholder5
    45  	insertSequencedLeafSQL = "INSERT INTO SequencedLeafData(TreeId,LeafIdentityHash,MerkleLeafHash,SequenceNumber,IntegrateTimestampNanos) VALUES"
    46  
    47  	selectNonDeletedTreeIDByTypeAndStateSQL = `
    48  		SELECT TreeId FROM Trees
    49  		  WHERE TreeType IN(?,?)
    50  		  AND TreeState IN(?,?)
    51  		  AND (Deleted IS NULL OR Deleted = 'false')`
    52  
    53  	selectSequencedLeafCountSQL   = "SELECT COUNT(*) FROM SequencedLeafData WHERE TreeId=?"
    54  	selectUnsequencedLeafCountSQL = "SELECT TreeId, COUNT(1) FROM Unsequenced GROUP BY TreeId"
    55  	selectLatestSignedLogRootSQL  = `SELECT TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature
    56  			FROM TreeHead WHERE TreeId=?
    57  			ORDER BY TreeHeadTimestamp DESC LIMIT 1`
    58  
    59  	selectLeavesByRangeSQL = `SELECT s.MerkleLeafHash,l.LeafIdentityHash,l.LeafValue,s.SequenceNumber,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos
    60  			FROM LeafData l,SequencedLeafData s
    61  			WHERE l.LeafIdentityHash = s.LeafIdentityHash
    62  			AND s.SequenceNumber >= ? AND s.SequenceNumber < ? AND l.TreeId = ? AND s.TreeId = l.TreeId` + orderBySequenceNumberSQL
    63  
    64  	// These statements need to be expanded to provide the correct number of parameter placeholders.
    65  	selectLeavesByIndexSQL = `SELECT s.MerkleLeafHash,l.LeafIdentityHash,l.LeafValue,s.SequenceNumber,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos
    66  			FROM LeafData l,SequencedLeafData s
    67  			WHERE l.LeafIdentityHash = s.LeafIdentityHash
    68  			AND s.SequenceNumber IN (` + placeholderSQL + `) AND l.TreeId = ? AND s.TreeId = l.TreeId`
    69  	selectLeavesByMerkleHashSQL = `SELECT s.MerkleLeafHash,l.LeafIdentityHash,l.LeafValue,s.SequenceNumber,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos
    70  			FROM LeafData l,SequencedLeafData s
    71  			WHERE l.LeafIdentityHash = s.LeafIdentityHash
    72  			AND s.MerkleLeafHash IN (` + placeholderSQL + `) AND l.TreeId = ? AND s.TreeId = l.TreeId`
    73  	// TODO(drysdale): rework the code so the dummy hash isn't needed (e.g. this assumes hash size is 32)
    74  	dummyMerkleLeafHash = "00000000000000000000000000000000"
    75  	// This statement returns a dummy Merkle leaf hash value (which must be
    76  	// of the right size) so that its signature matches that of the other
    77  	// leaf-selection statements.
    78  	selectLeavesByLeafIdentityHashSQL = `SELECT '` + dummyMerkleLeafHash + `',l.LeafIdentityHash,l.LeafValue,-1,l.ExtraData,l.QueueTimestampNanos,s.IntegrateTimestampNanos
    79  			FROM LeafData l LEFT JOIN SequencedLeafData s ON (l.LeafIdentityHash = s.LeafIdentityHash AND l.TreeID = s.TreeID)
    80  			WHERE l.LeafIdentityHash IN (` + placeholderSQL + `) AND l.TreeId = ?`
    81  
    82  	// Same as above except with leaves ordered by sequence so we only incur this cost when necessary
    83  	orderBySequenceNumberSQL                     = " ORDER BY s.SequenceNumber"
    84  	selectLeavesByMerkleHashOrderedBySequenceSQL = selectLeavesByMerkleHashSQL + orderBySequenceNumberSQL
    85  
    86  	// Error code returned by driver when inserting a duplicate row
    87  	errNumDuplicate = 1062
    88  
    89  	logIDLabel = "logid"
    90  )
    91  
    92  var (
    93  	defaultLogStrata = []int{8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}
    94  
    95  	once             sync.Once
    96  	queuedCounter    monitoring.Counter
    97  	queuedDupCounter monitoring.Counter
    98  	dequeuedCounter  monitoring.Counter
    99  
   100  	queueLatency            monitoring.Histogram
   101  	queueInsertLatency      monitoring.Histogram
   102  	queueReadLatency        monitoring.Histogram
   103  	queueInsertLeafLatency  monitoring.Histogram
   104  	queueInsertEntryLatency monitoring.Histogram
   105  	dequeueLatency          monitoring.Histogram
   106  	dequeueSelectLatency    monitoring.Histogram
   107  	dequeueRemoveLatency    monitoring.Histogram
   108  )
   109  
   110  func createMetrics(mf monitoring.MetricFactory) {
   111  	queuedCounter = mf.NewCounter("mysql_queued_leaves", "Number of leaves queued", logIDLabel)
   112  	queuedDupCounter = mf.NewCounter("mysql_queued_dup_leaves", "Number of duplicate leaves queued", logIDLabel)
   113  	dequeuedCounter = mf.NewCounter("mysql_dequeued_leaves", "Number of leaves dequeued", logIDLabel)
   114  
   115  	queueLatency = mf.NewHistogram("mysql_queue_leaves_latency", "Latency of queue leaves operation in seconds", logIDLabel)
   116  	queueInsertLatency = mf.NewHistogram("mysql_queue_leaves_latency_insert", "Latency of insertion part of queue leaves operation in seconds", logIDLabel)
   117  	queueReadLatency = mf.NewHistogram("mysql_queue_leaves_latency_read_dups", "Latency of read-duplicates part of queue leaves operation in seconds", logIDLabel)
   118  	queueInsertLeafLatency = mf.NewHistogram("mysql_queue_leaf_latency_leaf", "Latency of insert-leaf part of queue (single) leaf operation in seconds", logIDLabel)
   119  	queueInsertEntryLatency = mf.NewHistogram("mysql_queue_leaf_latency_entry", "Latency of insert-entry part of queue (single) leaf operation in seconds", logIDLabel)
   120  
   121  	dequeueLatency = mf.NewHistogram("mysql_dequeue_leaves_latency", "Latency of dequeue leaves operation in seconds", logIDLabel)
   122  	dequeueSelectLatency = mf.NewHistogram("mysql_dequeue_leaves_latency_select", "Latency of selection part of dequeue leaves operation in seconds", logIDLabel)
   123  	dequeueRemoveLatency = mf.NewHistogram("mysql_dequeue_leaves_latency_remove", "Latency of removal part of dequeue leaves operation in seconds", logIDLabel)
   124  }
   125  
   126  func labelForTX(t *logTreeTX) string {
   127  	return strconv.FormatInt(t.treeID, 10)
   128  }
   129  
   130  func observe(hist monitoring.Histogram, duration time.Duration, label string) {
   131  	hist.Observe(duration.Seconds(), label)
   132  }
   133  
   134  type mySQLLogStorage struct {
   135  	*mySQLTreeStorage
   136  	admin         storage.AdminStorage
   137  	metricFactory monitoring.MetricFactory
   138  }
   139  
   140  // NewLogStorage creates a storage.LogStorage instance for the specified MySQL URL.
   141  // It assumes storage.AdminStorage is backed by the same MySQL database as well.
   142  func NewLogStorage(db *sql.DB, mf monitoring.MetricFactory) storage.LogStorage {
   143  	if mf == nil {
   144  		mf = monitoring.InertMetricFactory{}
   145  	}
   146  	return &mySQLLogStorage{
   147  		admin:            NewAdminStorage(db),
   148  		mySQLTreeStorage: newTreeStorage(db),
   149  		metricFactory:    mf,
   150  	}
   151  }
   152  
   153  func (m *mySQLLogStorage) CheckDatabaseAccessible(ctx context.Context) error {
   154  	return m.db.PingContext(ctx)
   155  }
   156  
   157  func (m *mySQLLogStorage) getLeavesByIndexStmt(ctx context.Context, num int) (*sql.Stmt, error) {
   158  	return m.getStmt(ctx, selectLeavesByIndexSQL, num, "?", "?")
   159  }
   160  
   161  func (m *mySQLLogStorage) getLeavesByMerkleHashStmt(ctx context.Context, num int, orderBySequence bool) (*sql.Stmt, error) {
   162  	if orderBySequence {
   163  		return m.getStmt(ctx, selectLeavesByMerkleHashOrderedBySequenceSQL, num, "?", "?")
   164  	}
   165  
   166  	return m.getStmt(ctx, selectLeavesByMerkleHashSQL, num, "?", "?")
   167  }
   168  
   169  func (m *mySQLLogStorage) getLeavesByLeafIdentityHashStmt(ctx context.Context, num int) (*sql.Stmt, error) {
   170  	return m.getStmt(ctx, selectLeavesByLeafIdentityHashSQL, num, "?", "?")
   171  }
   172  
   173  // readOnlyLogTX implements storage.ReadOnlyLogTX
   174  type readOnlyLogTX struct {
   175  	ls *mySQLLogStorage
   176  	tx *sql.Tx
   177  }
   178  
   179  func (m *mySQLLogStorage) Snapshot(ctx context.Context) (storage.ReadOnlyLogTX, error) {
   180  	tx, err := m.db.BeginTx(ctx, nil /* opts */)
   181  	if err != nil {
   182  		glog.Warningf("Could not start ReadOnlyLogTX: %s", err)
   183  		return nil, err
   184  	}
   185  	return &readOnlyLogTX{m, tx}, nil
   186  }
   187  
   188  func (t *readOnlyLogTX) Commit() error {
   189  	return t.tx.Commit()
   190  }
   191  
   192  func (t *readOnlyLogTX) Rollback() error {
   193  	return t.tx.Rollback()
   194  }
   195  
   196  func (t *readOnlyLogTX) Close() error {
   197  	if err := t.Rollback(); err != nil && err != sql.ErrTxDone {
   198  		glog.Warningf("Rollback error on Close(): %v", err)
   199  		return err
   200  	}
   201  	return nil
   202  }
   203  
   204  func (t *readOnlyLogTX) GetActiveLogIDs(ctx context.Context) ([]int64, error) {
   205  	// Include logs that are DRAINING in the active list as we're still
   206  	// integrating leaves into them.
   207  	rows, err := t.tx.QueryContext(
   208  		ctx, selectNonDeletedTreeIDByTypeAndStateSQL,
   209  		trillian.TreeType_LOG.String(), trillian.TreeType_PREORDERED_LOG.String(),
   210  		trillian.TreeState_ACTIVE.String(), trillian.TreeState_DRAINING.String())
   211  	if err != nil {
   212  		return nil, err
   213  	}
   214  	defer rows.Close()
   215  	ids := []int64{}
   216  	for rows.Next() {
   217  		var treeID int64
   218  		if err := rows.Scan(&treeID); err != nil {
   219  			return nil, err
   220  		}
   221  		ids = append(ids, treeID)
   222  	}
   223  	return ids, rows.Err()
   224  }
   225  
   226  func (m *mySQLLogStorage) beginInternal(ctx context.Context, tree *trillian.Tree) (storage.LogTreeTX, error) {
   227  	once.Do(func() {
   228  		createMetrics(m.metricFactory)
   229  	})
   230  	hasher, err := hashers.NewLogHasher(tree.HashStrategy)
   231  	if err != nil {
   232  		return nil, err
   233  	}
   234  
   235  	stCache := cache.NewLogSubtreeCache(defaultLogStrata, hasher)
   236  	ttx, err := m.beginTreeTx(ctx, tree, hasher.Size(), stCache)
   237  	if err != nil && err != storage.ErrTreeNeedsInit {
   238  		return nil, err
   239  	}
   240  
   241  	ltx := &logTreeTX{
   242  		treeTX: ttx,
   243  		ls:     m,
   244  	}
   245  	ltx.slr, err = ltx.fetchLatestRoot(ctx)
   246  	if err == storage.ErrTreeNeedsInit {
   247  		return ltx, err
   248  	} else if err != nil {
   249  		ttx.Rollback()
   250  		return nil, err
   251  	}
   252  
   253  	if err := ltx.root.UnmarshalBinary(ltx.slr.LogRoot); err != nil {
   254  		ttx.Rollback()
   255  		return nil, err
   256  	}
   257  
   258  	ltx.treeTX.writeRevision = int64(ltx.root.Revision) + 1
   259  	return ltx, nil
   260  }
   261  
   262  func (m *mySQLLogStorage) ReadWriteTransaction(ctx context.Context, tree *trillian.Tree, f storage.LogTXFunc) error {
   263  	tx, err := m.beginInternal(ctx, tree)
   264  	if err != nil && err != storage.ErrTreeNeedsInit {
   265  		return err
   266  	}
   267  	defer tx.Close()
   268  	if err := f(ctx, tx); err != nil {
   269  		return err
   270  	}
   271  	return tx.Commit()
   272  }
   273  
   274  func (m *mySQLLogStorage) AddSequencedLeaves(ctx context.Context, tree *trillian.Tree, leaves []*trillian.LogLeaf, timestamp time.Time) ([]*trillian.QueuedLogLeaf, error) {
   275  	tx, err := m.beginInternal(ctx, tree)
   276  	if err != nil {
   277  		return nil, err
   278  	}
   279  	res, err := tx.AddSequencedLeaves(ctx, leaves, timestamp)
   280  	if err != nil {
   281  		return nil, err
   282  	}
   283  	if err := tx.Commit(); err != nil {
   284  		return nil, err
   285  	}
   286  	return res, nil
   287  }
   288  
   289  func (m *mySQLLogStorage) SnapshotForTree(ctx context.Context, tree *trillian.Tree) (storage.ReadOnlyLogTreeTX, error) {
   290  	tx, err := m.beginInternal(ctx, tree)
   291  	if err != nil && err != storage.ErrTreeNeedsInit {
   292  		return nil, err
   293  	}
   294  	return tx, err
   295  }
   296  
   297  func (m *mySQLLogStorage) QueueLeaves(ctx context.Context, tree *trillian.Tree, leaves []*trillian.LogLeaf, queueTimestamp time.Time) ([]*trillian.QueuedLogLeaf, error) {
   298  	tx, err := m.beginInternal(ctx, tree)
   299  	if err != nil {
   300  		return nil, err
   301  	}
   302  	existing, err := tx.QueueLeaves(ctx, leaves, queueTimestamp)
   303  	if err != nil {
   304  		return nil, err
   305  	}
   306  
   307  	if err := tx.Commit(); err != nil {
   308  		return nil, err
   309  	}
   310  
   311  	ret := make([]*trillian.QueuedLogLeaf, len(leaves))
   312  	for i, e := range existing {
   313  		if e != nil {
   314  			ret[i] = &trillian.QueuedLogLeaf{
   315  				Leaf:   e,
   316  				Status: status.Newf(codes.AlreadyExists, "leaf already exists: %v", e.LeafIdentityHash).Proto(),
   317  			}
   318  			continue
   319  		}
   320  		ret[i] = &trillian.QueuedLogLeaf{Leaf: leaves[i]}
   321  	}
   322  	return ret, nil
   323  }
   324  
   325  type logTreeTX struct {
   326  	treeTX
   327  	ls   *mySQLLogStorage
   328  	root types.LogRootV1
   329  	slr  trillian.SignedLogRoot
   330  }
   331  
   332  func (t *logTreeTX) ReadRevision() int64 {
   333  	return int64(t.root.Revision)
   334  }
   335  
   336  func (t *logTreeTX) WriteRevision() int64 {
   337  	return t.treeTX.writeRevision
   338  }
   339  
   340  func (t *logTreeTX) DequeueLeaves(ctx context.Context, limit int, cutoffTime time.Time) ([]*trillian.LogLeaf, error) {
   341  	start := time.Now()
   342  	stx, err := t.tx.PrepareContext(ctx, selectQueuedLeavesSQL)
   343  	if err != nil {
   344  		glog.Warningf("Failed to prepare dequeue select: %s", err)
   345  		return nil, err
   346  	}
   347  	defer stx.Close()
   348  
   349  	leaves := make([]*trillian.LogLeaf, 0, limit)
   350  	dq := make([]dequeuedLeaf, 0, limit)
   351  	rows, err := stx.QueryContext(ctx, t.treeID, cutoffTime.UnixNano(), limit)
   352  	if err != nil {
   353  		glog.Warningf("Failed to select rows for work: %s", err)
   354  		return nil, err
   355  	}
   356  	defer rows.Close()
   357  
   358  	for rows.Next() {
   359  		leaf, dqInfo, err := t.dequeueLeaf(rows)
   360  		if err != nil {
   361  			glog.Warningf("Error dequeuing leaf: %v", err)
   362  			return nil, err
   363  		}
   364  
   365  		if len(leaf.LeafIdentityHash) != t.hashSizeBytes {
   366  			return nil, errors.New("dequeued a leaf with incorrect hash size")
   367  		}
   368  
   369  		leaves = append(leaves, leaf)
   370  		dq = append(dq, dqInfo)
   371  	}
   372  
   373  	if rows.Err() != nil {
   374  		return nil, rows.Err()
   375  	}
   376  	label := labelForTX(t)
   377  	selectDuration := time.Since(start)
   378  	observe(dequeueSelectLatency, selectDuration, label)
   379  
   380  	// The convention is that if leaf processing succeeds (by committing this tx)
   381  	// then the unsequenced entries for them are removed
   382  	if len(leaves) > 0 {
   383  		err = t.removeSequencedLeaves(ctx, dq)
   384  	}
   385  
   386  	if err != nil {
   387  		return nil, err
   388  	}
   389  
   390  	totalDuration := time.Since(start)
   391  	removeDuration := totalDuration - selectDuration
   392  	observe(dequeueRemoveLatency, removeDuration, label)
   393  	observe(dequeueLatency, totalDuration, label)
   394  	dequeuedCounter.Add(float64(len(leaves)), label)
   395  
   396  	return leaves, nil
   397  }
   398  
   399  // sortLeavesForInsert returns a slice containing the passed in leaves sorted
   400  // by LeafIdentityHash, and paired with their original positions.
   401  // QueueLeaves and AddSequencedLeaves use this to make the order that LeafData
   402  // row locks are acquired deterministic and reduce the chance of deadlocks.
   403  func sortLeavesForInsert(leaves []*trillian.LogLeaf) []leafAndPosition {
   404  	ordLeaves := make([]leafAndPosition, len(leaves))
   405  	for i, leaf := range leaves {
   406  		ordLeaves[i] = leafAndPosition{leaf: leaf, idx: i}
   407  	}
   408  	sort.Sort(byLeafIdentityHashWithPosition(ordLeaves))
   409  	return ordLeaves
   410  }
   411  
   412  func (t *logTreeTX) QueueLeaves(ctx context.Context, leaves []*trillian.LogLeaf, queueTimestamp time.Time) ([]*trillian.LogLeaf, error) {
   413  	// Don't accept batches if any of the leaves are invalid.
   414  	for _, leaf := range leaves {
   415  		if len(leaf.LeafIdentityHash) != t.hashSizeBytes {
   416  			return nil, fmt.Errorf("queued leaf must have a leaf ID hash of length %d", t.hashSizeBytes)
   417  		}
   418  		var err error
   419  		leaf.QueueTimestamp, err = ptypes.TimestampProto(queueTimestamp)
   420  		if err != nil {
   421  			return nil, fmt.Errorf("got invalid queue timestamp: %v", err)
   422  		}
   423  	}
   424  	start := time.Now()
   425  	label := labelForTX(t)
   426  
   427  	ordLeaves := sortLeavesForInsert(leaves)
   428  	existingCount := 0
   429  	existingLeaves := make([]*trillian.LogLeaf, len(leaves))
   430  
   431  	for _, ol := range ordLeaves {
   432  		i, leaf := ol.idx, ol.leaf
   433  
   434  		leafStart := time.Now()
   435  		qTimestamp, err := ptypes.Timestamp(leaf.QueueTimestamp)
   436  		if err != nil {
   437  			return nil, fmt.Errorf("got invalid queue timestamp: %v", err)
   438  		}
   439  		_, err = t.tx.ExecContext(ctx, insertLeafDataSQL, t.treeID, leaf.LeafIdentityHash, leaf.LeafValue, leaf.ExtraData, qTimestamp.UnixNano())
   440  		insertDuration := time.Since(leafStart)
   441  		observe(queueInsertLeafLatency, insertDuration, label)
   442  		if isDuplicateErr(err) {
   443  			// Remember the duplicate leaf, using the requested leaf for now.
   444  			existingLeaves[i] = leaf
   445  			existingCount++
   446  			queuedDupCounter.Inc(label)
   447  			continue
   448  		}
   449  		if err != nil {
   450  			glog.Warningf("Error inserting %d into LeafData: %s", i, err)
   451  			return nil, err
   452  		}
   453  
   454  		// Create the work queue entry
   455  		args := []interface{}{
   456  			t.treeID,
   457  			leaf.LeafIdentityHash,
   458  			leaf.MerkleLeafHash,
   459  		}
   460  		queueTimestamp, err := ptypes.Timestamp(leaf.QueueTimestamp)
   461  		if err != nil {
   462  			return nil, fmt.Errorf("got invalid queue timestamp: %v", err)
   463  		}
   464  		args = append(args, queueArgs(t.treeID, leaf.LeafIdentityHash, queueTimestamp)...)
   465  		_, err = t.tx.ExecContext(
   466  			ctx,
   467  			insertUnsequencedEntrySQL,
   468  			args...,
   469  		)
   470  		if err != nil {
   471  			glog.Warningf("Error inserting into Unsequenced: %s", err)
   472  			return nil, fmt.Errorf("Unsequenced: %v", err)
   473  		}
   474  		leafDuration := time.Since(leafStart)
   475  		observe(queueInsertEntryLatency, (leafDuration - insertDuration), label)
   476  	}
   477  	insertDuration := time.Since(start)
   478  	observe(queueInsertLatency, insertDuration, label)
   479  	queuedCounter.Add(float64(len(leaves)), label)
   480  
   481  	if existingCount == 0 {
   482  		return existingLeaves, nil
   483  	}
   484  
   485  	// For existing leaves, we need to retrieve the contents.  First collate the desired LeafIdentityHash values.
   486  	var toRetrieve [][]byte
   487  	for _, existing := range existingLeaves {
   488  		if existing != nil {
   489  			toRetrieve = append(toRetrieve, existing.LeafIdentityHash)
   490  		}
   491  	}
   492  	results, err := t.getLeafDataByIdentityHash(ctx, toRetrieve)
   493  	if err != nil {
   494  		return nil, fmt.Errorf("failed to retrieve existing leaves: %v", err)
   495  	}
   496  	if len(results) != len(toRetrieve) {
   497  		return nil, fmt.Errorf("failed to retrieve all existing leaves: got %d, want %d", len(results), len(toRetrieve))
   498  	}
   499  	// Replace the requested leaves with the actual leaves.
   500  	for i, requested := range existingLeaves {
   501  		if requested == nil {
   502  			continue
   503  		}
   504  		found := false
   505  		for _, result := range results {
   506  			if bytes.Equal(result.LeafIdentityHash, requested.LeafIdentityHash) {
   507  				existingLeaves[i] = result
   508  				found = true
   509  				break
   510  			}
   511  		}
   512  		if !found {
   513  			return nil, fmt.Errorf("failed to find existing leaf for hash %x", requested.LeafIdentityHash)
   514  		}
   515  	}
   516  	totalDuration := time.Since(start)
   517  	readDuration := totalDuration - insertDuration
   518  	observe(queueReadLatency, readDuration, label)
   519  	observe(queueLatency, totalDuration, label)
   520  
   521  	return existingLeaves, nil
   522  }
   523  
   524  func (t *logTreeTX) AddSequencedLeaves(ctx context.Context, leaves []*trillian.LogLeaf, timestamp time.Time) ([]*trillian.QueuedLogLeaf, error) {
   525  	res := make([]*trillian.QueuedLogLeaf, len(leaves))
   526  	ok := status.New(codes.OK, "OK").Proto()
   527  
   528  	// Leaves in this transaction are inserted in two tables. For each leaf, if
   529  	// one of the two inserts fails, we remove the side effect by rolling back to
   530  	// a savepoint installed before the first insert of the two.
   531  	const savepoint = "SAVEPOINT AddSequencedLeaves"
   532  	if _, err := t.tx.ExecContext(ctx, savepoint); err != nil {
   533  		glog.Errorf("Error adding savepoint: %s", err)
   534  		return nil, err
   535  	}
   536  	// TODO(pavelkalinnikov): Consider performance implication of executing this
   537  	// extra SAVEPOINT, especially for 1-entry batches. Optimize if necessary.
   538  
   539  	// Note: LeafData inserts are presumably protected from deadlocks due to
   540  	// sorting, but the order of the corresponding SequencedLeafData inserts
   541  	// becomes indeterministic. However, in a typical case when leaves are
   542  	// supplied in contiguous non-intersecting batches, the chance of having
   543  	// circular dependencies between transactions is significantly lower.
   544  	ordLeaves := sortLeavesForInsert(leaves)
   545  	for _, ol := range ordLeaves {
   546  		i, leaf := ol.idx, ol.leaf
   547  
   548  		// This should fail on insert, but catch it early.
   549  		if got, want := len(leaf.LeafIdentityHash), t.hashSizeBytes; got != want {
   550  			return nil, status.Errorf(codes.FailedPrecondition, "leaves[%d] has incorrect hash size %d, want %d", i, got, want)
   551  		}
   552  
   553  		if _, err := t.tx.ExecContext(ctx, savepoint); err != nil {
   554  			glog.Errorf("Error updating savepoint: %s", err)
   555  			return nil, err
   556  		}
   557  
   558  		res[i] = &trillian.QueuedLogLeaf{Status: ok}
   559  
   560  		// TODO(pavelkalinnikov): Measure latencies.
   561  		_, err := t.tx.ExecContext(ctx, insertLeafDataSQL,
   562  			t.treeID, leaf.LeafIdentityHash, leaf.LeafValue, leaf.ExtraData, timestamp.UnixNano())
   563  		// TODO(pavelkalinnikov): Detach PREORDERED_LOG integration latency metric.
   564  
   565  		// TODO(pavelkalinnikov): Support opting out from duplicates detection.
   566  		if isDuplicateErr(err) {
   567  			res[i].Status = status.New(codes.FailedPrecondition, "conflicting LeafIdentityHash").Proto()
   568  			// Note: No rolling back to savepoint because there is no side effect.
   569  			continue
   570  		} else if err != nil {
   571  			glog.Errorf("Error inserting leaves[%d] into LeafData: %s", i, err)
   572  			return nil, err
   573  		}
   574  
   575  		_, err = t.tx.ExecContext(ctx, insertSequencedLeafSQL+valuesPlaceholder5,
   576  			t.treeID, leaf.LeafIdentityHash, leaf.MerkleLeafHash, leaf.LeafIndex, 0)
   577  		// TODO(pavelkalinnikov): Update IntegrateTimestamp on integrating the leaf.
   578  
   579  		if isDuplicateErr(err) {
   580  			res[i].Status = status.New(codes.FailedPrecondition, "conflicting LeafIndex").Proto()
   581  			if _, err := t.tx.ExecContext(ctx, "ROLLBACK TO "+savepoint); err != nil {
   582  				glog.Errorf("Error rolling back to savepoint: %s", err)
   583  				return nil, err
   584  			}
   585  		} else if err != nil {
   586  			glog.Errorf("Error inserting leaves[%d] into SequencedLeafData: %s", i, err)
   587  			return nil, err
   588  		}
   589  
   590  		// TODO(pavelkalinnikov): Load LeafData for conflicting entries.
   591  	}
   592  
   593  	if _, err := t.tx.ExecContext(ctx, "RELEASE "+savepoint); err != nil {
   594  		glog.Errorf("Error releasing savepoint: %s", err)
   595  		return nil, err
   596  	}
   597  
   598  	return res, nil
   599  }
   600  
   601  func (t *logTreeTX) GetSequencedLeafCount(ctx context.Context) (int64, error) {
   602  	var sequencedLeafCount int64
   603  
   604  	err := t.tx.QueryRowContext(ctx, selectSequencedLeafCountSQL, t.treeID).Scan(&sequencedLeafCount)
   605  	if err != nil {
   606  		glog.Warningf("Error getting sequenced leaf count: %s", err)
   607  	}
   608  
   609  	return sequencedLeafCount, err
   610  }
   611  
   612  func (t *logTreeTX) GetLeavesByIndex(ctx context.Context, leaves []int64) ([]*trillian.LogLeaf, error) {
   613  	if t.treeType == trillian.TreeType_LOG {
   614  		treeSize := int64(t.root.TreeSize)
   615  		for _, leaf := range leaves {
   616  			if leaf < 0 {
   617  				return nil, status.Errorf(codes.InvalidArgument, "index %d is < 0", leaf)
   618  			}
   619  			if leaf >= treeSize {
   620  				return nil, status.Errorf(codes.OutOfRange, "invalid leaf index %d, want < TreeSize(%d)", leaf, treeSize)
   621  			}
   622  		}
   623  	}
   624  	tmpl, err := t.ls.getLeavesByIndexStmt(ctx, len(leaves))
   625  	if err != nil {
   626  		return nil, err
   627  	}
   628  	stx := t.tx.StmtContext(ctx, tmpl)
   629  	defer stx.Close()
   630  
   631  	var args []interface{}
   632  	for _, nodeID := range leaves {
   633  		args = append(args, interface{}(int64(nodeID)))
   634  	}
   635  	args = append(args, interface{}(t.treeID))
   636  	rows, err := stx.QueryContext(ctx, args...)
   637  	if err != nil {
   638  		glog.Warningf("Failed to get leaves by idx: %s", err)
   639  		return nil, err
   640  	}
   641  	defer rows.Close()
   642  
   643  	ret := make([]*trillian.LogLeaf, 0, len(leaves))
   644  	for rows.Next() {
   645  		leaf := &trillian.LogLeaf{}
   646  		var qTimestamp, iTimestamp int64
   647  		if err := rows.Scan(
   648  			&leaf.MerkleLeafHash,
   649  			&leaf.LeafIdentityHash,
   650  			&leaf.LeafValue,
   651  			&leaf.LeafIndex,
   652  			&leaf.ExtraData,
   653  			&qTimestamp,
   654  			&iTimestamp); err != nil {
   655  			glog.Warningf("Failed to scan merkle leaves: %s", err)
   656  			return nil, err
   657  		}
   658  		var err error
   659  		leaf.QueueTimestamp, err = ptypes.TimestampProto(time.Unix(0, qTimestamp))
   660  		if err != nil {
   661  			return nil, fmt.Errorf("got invalid queue timestamp: %v", err)
   662  		}
   663  		leaf.IntegrateTimestamp, err = ptypes.TimestampProto(time.Unix(0, iTimestamp))
   664  		if err != nil {
   665  			return nil, fmt.Errorf("got invalid integrate timestamp: %v", err)
   666  		}
   667  		ret = append(ret, leaf)
   668  	}
   669  
   670  	if got, want := len(ret), len(leaves); got != want {
   671  		return nil, status.Errorf(codes.Internal, "len(ret): %d, want %d", got, want)
   672  	}
   673  	return ret, nil
   674  }
   675  
   676  func (t *logTreeTX) GetLeavesByRange(ctx context.Context, start, count int64) ([]*trillian.LogLeaf, error) {
   677  	if count <= 0 {
   678  		return nil, status.Errorf(codes.InvalidArgument, "invalid count %d, want > 0", count)
   679  	}
   680  	if start < 0 {
   681  		return nil, status.Errorf(codes.InvalidArgument, "invalid start %d, want >= 0", start)
   682  	}
   683  
   684  	if t.treeType == trillian.TreeType_LOG {
   685  		treeSize := int64(t.root.TreeSize)
   686  		if treeSize <= 0 {
   687  			return nil, status.Errorf(codes.OutOfRange, "empty tree")
   688  		} else if start >= treeSize {
   689  			return nil, status.Errorf(codes.OutOfRange, "invalid start %d, want < TreeSize(%d)", start, treeSize)
   690  		}
   691  		// Ensure no entries queried/returned beyond the tree.
   692  		if maxCount := treeSize - start; count > maxCount {
   693  			count = maxCount
   694  		}
   695  	}
   696  	// TODO(pavelkalinnikov): Further clip `count` to a safe upper bound like 64k.
   697  
   698  	args := []interface{}{start, start + count, t.treeID}
   699  	rows, err := t.tx.QueryContext(ctx, selectLeavesByRangeSQL, args...)
   700  	if err != nil {
   701  		glog.Warningf("Failed to get leaves by range: %s", err)
   702  		return nil, err
   703  	}
   704  	defer rows.Close()
   705  
   706  	ret := make([]*trillian.LogLeaf, 0, count)
   707  	for wantIndex := start; rows.Next(); wantIndex++ {
   708  		leaf := &trillian.LogLeaf{}
   709  		var qTimestamp, iTimestamp int64
   710  		if err := rows.Scan(
   711  			&leaf.MerkleLeafHash,
   712  			&leaf.LeafIdentityHash,
   713  			&leaf.LeafValue,
   714  			&leaf.LeafIndex,
   715  			&leaf.ExtraData,
   716  			&qTimestamp,
   717  			&iTimestamp); err != nil {
   718  			glog.Warningf("Failed to scan merkle leaves: %s", err)
   719  			return nil, err
   720  		}
   721  		if leaf.LeafIndex != wantIndex {
   722  			if wantIndex < int64(t.root.TreeSize) {
   723  				return nil, fmt.Errorf("got unexpected index %d, want %d", leaf.LeafIndex, wantIndex)
   724  			}
   725  			break
   726  		}
   727  		var err error
   728  		leaf.QueueTimestamp, err = ptypes.TimestampProto(time.Unix(0, qTimestamp))
   729  		if err != nil {
   730  			return nil, fmt.Errorf("got invalid queue timestamp: %v", err)
   731  		}
   732  		leaf.IntegrateTimestamp, err = ptypes.TimestampProto(time.Unix(0, iTimestamp))
   733  		if err != nil {
   734  			return nil, fmt.Errorf("got invalid integrate timestamp: %v", err)
   735  		}
   736  		ret = append(ret, leaf)
   737  	}
   738  
   739  	return ret, nil
   740  }
   741  
   742  func (t *logTreeTX) GetLeavesByHash(ctx context.Context, leafHashes [][]byte, orderBySequence bool) ([]*trillian.LogLeaf, error) {
   743  	tmpl, err := t.ls.getLeavesByMerkleHashStmt(ctx, len(leafHashes), orderBySequence)
   744  	if err != nil {
   745  		return nil, err
   746  	}
   747  
   748  	return t.getLeavesByHashInternal(ctx, leafHashes, tmpl, "merkle")
   749  }
   750  
   751  // getLeafDataByIdentityHash retrieves leaf data by LeafIdentityHash, returned
   752  // as a slice of LogLeaf objects for convenience.  However, note that the
   753  // returned LogLeaf objects will not have a valid MerkleLeafHash, LeafIndex, or IntegrateTimestamp.
   754  func (t *logTreeTX) getLeafDataByIdentityHash(ctx context.Context, leafHashes [][]byte) ([]*trillian.LogLeaf, error) {
   755  	tmpl, err := t.ls.getLeavesByLeafIdentityHashStmt(ctx, len(leafHashes))
   756  	if err != nil {
   757  		return nil, err
   758  	}
   759  	return t.getLeavesByHashInternal(ctx, leafHashes, tmpl, "leaf-identity")
   760  }
   761  
   762  func (t *logTreeTX) LatestSignedLogRoot(ctx context.Context) (trillian.SignedLogRoot, error) {
   763  	return t.slr, nil
   764  }
   765  
   766  // fetchLatestRoot reads the latest SignedLogRoot from the DB and returns it.
   767  func (t *logTreeTX) fetchLatestRoot(ctx context.Context) (trillian.SignedLogRoot, error) {
   768  	var timestamp, treeSize, treeRevision int64
   769  	var rootHash, rootSignatureBytes []byte
   770  	if err := t.tx.QueryRowContext(
   771  		ctx, selectLatestSignedLogRootSQL, t.treeID).Scan(
   772  		&timestamp, &treeSize, &rootHash, &treeRevision, &rootSignatureBytes,
   773  	); err == sql.ErrNoRows {
   774  		// It's possible there are no roots for this tree yet
   775  		return trillian.SignedLogRoot{}, storage.ErrTreeNeedsInit
   776  	}
   777  
   778  	// Put logRoot back together. Fortunately LogRoot has a deterministic serialization.
   779  	logRoot, err := (&types.LogRootV1{
   780  		RootHash:       rootHash,
   781  		TimestampNanos: uint64(timestamp),
   782  		Revision:       uint64(treeRevision),
   783  		TreeSize:       uint64(treeSize),
   784  	}).MarshalBinary()
   785  	if err != nil {
   786  		return trillian.SignedLogRoot{}, err
   787  	}
   788  
   789  	return trillian.SignedLogRoot{
   790  		KeyHint:          types.SerializeKeyHint(t.treeID),
   791  		LogRoot:          logRoot,
   792  		LogRootSignature: rootSignatureBytes,
   793  		// TODO(gbelvin): Remove deprecated fields
   794  		TimestampNanos: timestamp,
   795  		RootHash:       rootHash,
   796  		TreeSize:       treeSize,
   797  		TreeRevision:   treeRevision,
   798  	}, nil
   799  }
   800  
   801  func (t *logTreeTX) StoreSignedLogRoot(ctx context.Context, root trillian.SignedLogRoot) error {
   802  	var logRoot types.LogRootV1
   803  	if err := logRoot.UnmarshalBinary(root.LogRoot); err != nil {
   804  		glog.Warningf("Failed to parse log root: %x %v", root.LogRoot, err)
   805  		return err
   806  	}
   807  	if len(logRoot.Metadata) != 0 {
   808  		return fmt.Errorf("unimplemented: mysql storage does not support log root metadata")
   809  
   810  	}
   811  
   812  	res, err := t.tx.ExecContext(
   813  		ctx,
   814  		insertTreeHeadSQL,
   815  		t.treeID,
   816  		logRoot.TimestampNanos,
   817  		logRoot.TreeSize,
   818  		logRoot.RootHash,
   819  		logRoot.Revision,
   820  		root.LogRootSignature)
   821  	if err != nil {
   822  		glog.Warningf("Failed to store signed root: %s", err)
   823  	}
   824  
   825  	return checkResultOkAndRowCountIs(res, err, 1)
   826  }
   827  
   828  func (t *logTreeTX) getLeavesByHashInternal(ctx context.Context, leafHashes [][]byte, tmpl *sql.Stmt, desc string) ([]*trillian.LogLeaf, error) {
   829  	stx := t.tx.StmtContext(ctx, tmpl)
   830  	defer stx.Close()
   831  
   832  	var args []interface{}
   833  	for _, hash := range leafHashes {
   834  		args = append(args, interface{}([]byte(hash)))
   835  	}
   836  	args = append(args, interface{}(t.treeID))
   837  	rows, err := stx.QueryContext(ctx, args...)
   838  	if err != nil {
   839  		glog.Warningf("Query() %s hash = %v", desc, err)
   840  		return nil, err
   841  	}
   842  	defer rows.Close()
   843  
   844  	// The tree could include duplicates so we don't know how many results will be returned
   845  	var ret []*trillian.LogLeaf
   846  	for rows.Next() {
   847  		leaf := &trillian.LogLeaf{}
   848  		// We might be using a LEFT JOIN in our statement, so leaves which are
   849  		// queued but not yet integrated will have a NULL IntegrateTimestamp
   850  		// when there's no corresponding entry in SequencedLeafData, even though
   851  		// the table definition forbids that, so we use a nullable type here and
   852  		// check its validity below.
   853  		var integrateTS sql.NullInt64
   854  		var queueTS int64
   855  
   856  		if err := rows.Scan(&leaf.MerkleLeafHash, &leaf.LeafIdentityHash, &leaf.LeafValue, &leaf.LeafIndex, &leaf.ExtraData, &queueTS, &integrateTS); err != nil {
   857  			glog.Warningf("LogID: %d Scan() %s = %s", t.treeID, desc, err)
   858  			return nil, err
   859  		}
   860  		var err error
   861  		leaf.QueueTimestamp, err = ptypes.TimestampProto(time.Unix(0, queueTS))
   862  		if err != nil {
   863  			return nil, fmt.Errorf("got invalid queue timestamp: %v", err)
   864  		}
   865  		if integrateTS.Valid {
   866  			leaf.IntegrateTimestamp, err = ptypes.TimestampProto(time.Unix(0, integrateTS.Int64))
   867  			if err != nil {
   868  				return nil, fmt.Errorf("got invalid integrate timestamp: %v", err)
   869  			}
   870  		}
   871  
   872  		if got, want := len(leaf.MerkleLeafHash), t.hashSizeBytes; got != want {
   873  			return nil, fmt.Errorf("LogID: %d Scanned leaf %s does not have hash length %d, got %d", t.treeID, desc, want, got)
   874  		}
   875  
   876  		ret = append(ret, leaf)
   877  	}
   878  
   879  	return ret, nil
   880  }
   881  
   882  func (t *readOnlyLogTX) GetUnsequencedCounts(ctx context.Context) (storage.CountByLogID, error) {
   883  	stx, err := t.tx.PrepareContext(ctx, selectUnsequencedLeafCountSQL)
   884  	if err != nil {
   885  		glog.Warningf("Failed to prep unsequenced leaf count statement: %v", err)
   886  		return nil, err
   887  	}
   888  	defer stx.Close()
   889  
   890  	rows, err := stx.QueryContext(ctx)
   891  	if err != nil {
   892  		return nil, err
   893  	}
   894  	defer rows.Close()
   895  
   896  	ret := make(map[int64]int64)
   897  	for rows.Next() {
   898  		var logID, count int64
   899  		if err := rows.Scan(&logID, &count); err != nil {
   900  			return nil, fmt.Errorf("failed to scan row from unsequenced counts: %v", err)
   901  		}
   902  		ret[logID] = count
   903  	}
   904  	return ret, nil
   905  }
   906  
   907  // leafAndPosition records original position before sort.
   908  type leafAndPosition struct {
   909  	leaf *trillian.LogLeaf
   910  	idx  int
   911  }
   912  
   913  // byLeafIdentityHashWithPosition allows sorting (as above), but where we need
   914  // to remember the original position
   915  type byLeafIdentityHashWithPosition []leafAndPosition
   916  
   917  func (l byLeafIdentityHashWithPosition) Len() int {
   918  	return len(l)
   919  }
   920  func (l byLeafIdentityHashWithPosition) Swap(i, j int) {
   921  	l[i], l[j] = l[j], l[i]
   922  }
   923  func (l byLeafIdentityHashWithPosition) Less(i, j int) bool {
   924  	return bytes.Compare(l[i].leaf.LeafIdentityHash, l[j].leaf.LeafIdentityHash) == -1
   925  }
   926  
   927  func isDuplicateErr(err error) bool {
   928  	switch err := err.(type) {
   929  	case *mysql.MySQLError:
   930  		return err.Number == errNumDuplicate
   931  	default:
   932  		return false
   933  	}
   934  }