github.com/google/trillian-examples@v0.0.0-20240520080811-0d40d35cef0e/clone/cmd/sumdbverify/verify.go (about)

     1  // Copyright 2023 Google LLC. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // verify checks that a cloned SumDB log does not contain any conflicting entries.
    16  package main
    17  
    18  import (
    19  	"bytes"
    20  	"context"
    21  	"flag"
    22  	"fmt"
    23  	"regexp"
    24  	"strings"
    25  	"time"
    26  
    27  	"github.com/google/trillian-examples/clone/logdb"
    28  	"github.com/transparency-dev/formats/log"
    29  	"github.com/transparency-dev/merkle/compact"
    30  	"github.com/transparency-dev/merkle/rfc6962"
    31  	"golang.org/x/mod/sumdb/note"
    32  	"k8s.io/klog/v2"
    33  
    34  	_ "github.com/go-sql-driver/mysql"
    35  )
    36  
    37  var (
    38  	mysqlURI     = flag.String("mysql_uri", "", "URI of the MySQL database containing the log.")
    39  	pollInterval = flag.Duration("poll_interval", 0, "How often to re-verify the contents of the DB. Set to 0 to exit after first verification.")
    40  
    41  	// Example leaf:
    42  	// golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
    43  	// golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
    44  	//
    45  	line0RE = regexp.MustCompile(`(.*) (.*) h1:(.*)`)
    46  	line1RE = regexp.MustCompile(`(.*) (.*)/go.mod h1:(.*)`)
    47  )
    48  
    49  type dataSource interface {
    50  	GetLatestCheckpoint(ctx context.Context) (size uint64, checkpoint []byte, compactRange [][]byte, err error)
    51  	StreamLeaves(ctx context.Context, start, end uint64, out chan<- logdb.StreamResult)
    52  }
    53  
    54  func main() {
    55  	flag.Parse()
    56  	ctx := context.Background()
    57  
    58  	if len(*mysqlURI) == 0 {
    59  		klog.Exit("Missing required parameter 'mysql_uri'")
    60  	}
    61  	db, err := logdb.NewDatabase(*mysqlURI)
    62  	if err != nil {
    63  		klog.Exitf("Failed to connect to database: %q", err)
    64  	}
    65  
    66  	verifier, err := note.NewVerifier("sum.golang.org+033de0ae+Ac4zctda0e5eza+HJyk9SxEdh+s3Ux18htTTAD8OuAn8")
    67  	if err != nil {
    68  		klog.Exitf("Failed to construct verifier: %v", err)
    69  	}
    70  
    71  	logVerifier := sumdbVerifier{
    72  		db:       db,
    73  		origin:   "go.sum database tree",
    74  		verifier: verifier,
    75  	}
    76  	doit := func() {
    77  		size, err := logVerifier.verifyLeaves(ctx)
    78  		if err != nil {
    79  			klog.Exitf("Failed verification: %v", err)
    80  		}
    81  		klog.Infof("No conflicting hashes found after verifying %d leaves", size)
    82  	}
    83  	doit()
    84  	if *pollInterval == 0 {
    85  		return
    86  	}
    87  	ticker := time.NewTicker(*pollInterval)
    88  	for {
    89  		select {
    90  		case <-ticker.C:
    91  			doit()
    92  		case <-ctx.Done():
    93  			klog.Exit(ctx.Err())
    94  		}
    95  	}
    96  }
    97  
    98  type sumdbVerifier struct {
    99  	db       dataSource
   100  	origin   string
   101  	verifier note.Verifier
   102  }
   103  
   104  func (v sumdbVerifier) verifyLeaves(ctx context.Context) (uint64, error) {
   105  	leaves := make(chan logdb.StreamResult, 1)
   106  
   107  	// Get the raw data representing the latest checkpoint from the database.
   108  	_, cpRaw, _, err := v.db.GetLatestCheckpoint(ctx)
   109  	if err != nil {
   110  		if err == logdb.ErrNoDataFound {
   111  			klog.Warning("No checkpoint found in the log. Try again when the clone tool has completed.")
   112  			return 0, nil
   113  		}
   114  		return 0, fmt.Errorf("GetLatestCheckpoint(): %v", err)
   115  	}
   116  	// Parse the checkpoint to ensure it is from the expected log.
   117  	cp, _, _, err := log.ParseCheckpoint(cpRaw, v.origin, v.verifier)
   118  	if err != nil {
   119  		return 0, fmt.Errorf("ParseCheckpoint(): %v", err)
   120  	}
   121  
   122  	// Start streaming the leaves from the database, in order, from the beginning.
   123  	go v.db.StreamLeaves(ctx, 0, cp.Size, leaves)
   124  
   125  	// modVerToHashes is a map used to perform the core claim verification.
   126  	// Two entries in the log being mapped to the same key means that the log has
   127  	// the same module+version occurring more than once. This is only OK if both of
   128  	// the leaf entries commit to the same hashes for this key.
   129  	modVerToHashes := make(map[string]hashesAtIndex)
   130  
   131  	// Construct a compact range, which is essentially an efficient in-memory Merkle Tree
   132  	// calculator as we use it here. Every time we process a leaf we will append it to the
   133  	// compact range, and then at the end we must check that the calculated Merkle Tree
   134  	// root hash is the same as that in the checkpoint we parsed above.
   135  	rf := compact.RangeFactory{
   136  		Hash: rfc6962.DefaultHasher.HashChildren,
   137  	}
   138  	cr := rf.NewEmptyRange(0)
   139  
   140  	// Now loop over each of the leaves, checking:
   141  	// 1. Each leaf is correctly formatted (syntax)
   142  	// 2. Each leaf is semantically valid in isolation
   143  	// 3. That any previous declaration for the module+version is consistent with this leaf
   144  	var resErr error
   145  	var index uint64
   146  	for leaf := range leaves {
   147  		if leaf.Err != nil {
   148  			return 0, fmt.Errorf("failed to get leaves from DB: %w", leaf.Err)
   149  		}
   150  		data := leaf.Leaf
   151  		if err := cr.Append(rfc6962.DefaultHasher.HashLeaf(data), nil); err != nil {
   152  			return 0, err
   153  		}
   154  
   155  		lines := strings.Split(string(data), "\n")
   156  
   157  		line0Parts := line0RE.FindStringSubmatch(lines[0])
   158  		line0Module, line0Version, line0Hash := line0Parts[1], line0Parts[2], line0Parts[3]
   159  
   160  		line1Parts := line1RE.FindStringSubmatch(lines[1])
   161  		line1Module, line1Version, line1Hash := line1Parts[1], line1Parts[2], line1Parts[3]
   162  
   163  		if line0Module != line1Module {
   164  			return 0, fmt.Errorf("mismatched module names at %d: (%s, %s)", index, line0Module, line1Module)
   165  		}
   166  		if line0Version != line1Version {
   167  			return 0, fmt.Errorf("mismatched version names at %d: (%s, %s)", index, line0Version, line1Version)
   168  		}
   169  
   170  		modVer := fmt.Sprintf("%s %s", line0Module, line0Version)
   171  		hashes := hashesAtIndex{
   172  			line0Hash: line0Hash,
   173  			line1Hash: line1Hash,
   174  			index:     index,
   175  		}
   176  
   177  		if existing, found := modVerToHashes[modVer]; found {
   178  			klog.V(1).Infof("Found existing hash for %q", modVer)
   179  			if !existing.hashEq(hashes) {
   180  				resErr = fmt.Errorf("module and version %q has conflicting hashes!\n%q != %q", modVer, existing, hashes)
   181  				klog.Error(resErr)
   182  			}
   183  		}
   184  		modVerToHashes[modVer] = hashes
   185  		index++
   186  	}
   187  	if resErr != nil {
   188  		return 0, resErr
   189  	}
   190  
   191  	// Use the compact range to calculate the root hash and ensure it matches the checkpoint
   192  	rootHash, err := cr.GetRootHash(nil)
   193  	if err != nil {
   194  		return 0, fmt.Errorf("GetRootHash(): %v", err)
   195  	}
   196  	if !bytes.Equal(rootHash, cp.Hash) {
   197  		return 0, fmt.Errorf("Data corruption: checkpoint from DB has hash %x but calculated hash %x from leaves", cp.Hash, rootHash)
   198  	}
   199  	return index, nil
   200  }
   201  
   202  type hashesAtIndex struct {
   203  	line0Hash string
   204  	line1Hash string
   205  	index     uint64
   206  }
   207  
   208  func (h hashesAtIndex) String() string {
   209  	return fmt.Sprintf("index=%d, line0Hash=%s line1Hash=%s", h.index, h.line0Hash, h.line1Hash)
   210  }
   211  
   212  func (h hashesAtIndex) hashEq(other hashesAtIndex) bool {
   213  	return h.line0Hash == other.line0Hash && h.line1Hash == other.line1Hash
   214  }