github.com/google/trillian-examples@v0.0.0-20240520080811-0d40d35cef0e/clone/cmd/sumdbverify/verify.go (about) 1 // Copyright 2023 Google LLC. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // verify checks that a cloned SumDB log does not contain any conflicting entries. 16 package main 17 18 import ( 19 "bytes" 20 "context" 21 "flag" 22 "fmt" 23 "regexp" 24 "strings" 25 "time" 26 27 "github.com/google/trillian-examples/clone/logdb" 28 "github.com/transparency-dev/formats/log" 29 "github.com/transparency-dev/merkle/compact" 30 "github.com/transparency-dev/merkle/rfc6962" 31 "golang.org/x/mod/sumdb/note" 32 "k8s.io/klog/v2" 33 34 _ "github.com/go-sql-driver/mysql" 35 ) 36 37 var ( 38 mysqlURI = flag.String("mysql_uri", "", "URI of the MySQL database containing the log.") 39 pollInterval = flag.Duration("poll_interval", 0, "How often to re-verify the contents of the DB. Set to 0 to exit after first verification.") 40 41 // Example leaf: 42 // golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= 43 // golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 44 // 45 line0RE = regexp.MustCompile(`(.*) (.*) h1:(.*)`) 46 line1RE = regexp.MustCompile(`(.*) (.*)/go.mod h1:(.*)`) 47 ) 48 49 type dataSource interface { 50 GetLatestCheckpoint(ctx context.Context) (size uint64, checkpoint []byte, compactRange [][]byte, err error) 51 StreamLeaves(ctx context.Context, start, end uint64, out chan<- logdb.StreamResult) 52 } 53 54 func main() { 55 flag.Parse() 56 ctx := context.Background() 57 58 if len(*mysqlURI) == 0 { 59 klog.Exit("Missing required parameter 'mysql_uri'") 60 } 61 db, err := logdb.NewDatabase(*mysqlURI) 62 if err != nil { 63 klog.Exitf("Failed to connect to database: %q", err) 64 } 65 66 verifier, err := note.NewVerifier("sum.golang.org+033de0ae+Ac4zctda0e5eza+HJyk9SxEdh+s3Ux18htTTAD8OuAn8") 67 if err != nil { 68 klog.Exitf("Failed to construct verifier: %v", err) 69 } 70 71 logVerifier := sumdbVerifier{ 72 db: db, 73 origin: "go.sum database tree", 74 verifier: verifier, 75 } 76 doit := func() { 77 size, err := logVerifier.verifyLeaves(ctx) 78 if err != nil { 79 klog.Exitf("Failed verification: %v", err) 80 } 81 klog.Infof("No conflicting hashes found after verifying %d leaves", size) 82 } 83 doit() 84 if *pollInterval == 0 { 85 return 86 } 87 ticker := time.NewTicker(*pollInterval) 88 for { 89 select { 90 case <-ticker.C: 91 doit() 92 case <-ctx.Done(): 93 klog.Exit(ctx.Err()) 94 } 95 } 96 } 97 98 type sumdbVerifier struct { 99 db dataSource 100 origin string 101 verifier note.Verifier 102 } 103 104 func (v sumdbVerifier) verifyLeaves(ctx context.Context) (uint64, error) { 105 leaves := make(chan logdb.StreamResult, 1) 106 107 // Get the raw data representing the latest checkpoint from the database. 108 _, cpRaw, _, err := v.db.GetLatestCheckpoint(ctx) 109 if err != nil { 110 if err == logdb.ErrNoDataFound { 111 klog.Warning("No checkpoint found in the log. Try again when the clone tool has completed.") 112 return 0, nil 113 } 114 return 0, fmt.Errorf("GetLatestCheckpoint(): %v", err) 115 } 116 // Parse the checkpoint to ensure it is from the expected log. 117 cp, _, _, err := log.ParseCheckpoint(cpRaw, v.origin, v.verifier) 118 if err != nil { 119 return 0, fmt.Errorf("ParseCheckpoint(): %v", err) 120 } 121 122 // Start streaming the leaves from the database, in order, from the beginning. 123 go v.db.StreamLeaves(ctx, 0, cp.Size, leaves) 124 125 // modVerToHashes is a map used to perform the core claim verification. 126 // Two entries in the log being mapped to the same key means that the log has 127 // the same module+version occurring more than once. This is only OK if both of 128 // the leaf entries commit to the same hashes for this key. 129 modVerToHashes := make(map[string]hashesAtIndex) 130 131 // Construct a compact range, which is essentially an efficient in-memory Merkle Tree 132 // calculator as we use it here. Every time we process a leaf we will append it to the 133 // compact range, and then at the end we must check that the calculated Merkle Tree 134 // root hash is the same as that in the checkpoint we parsed above. 135 rf := compact.RangeFactory{ 136 Hash: rfc6962.DefaultHasher.HashChildren, 137 } 138 cr := rf.NewEmptyRange(0) 139 140 // Now loop over each of the leaves, checking: 141 // 1. Each leaf is correctly formatted (syntax) 142 // 2. Each leaf is semantically valid in isolation 143 // 3. That any previous declaration for the module+version is consistent with this leaf 144 var resErr error 145 var index uint64 146 for leaf := range leaves { 147 if leaf.Err != nil { 148 return 0, fmt.Errorf("failed to get leaves from DB: %w", leaf.Err) 149 } 150 data := leaf.Leaf 151 if err := cr.Append(rfc6962.DefaultHasher.HashLeaf(data), nil); err != nil { 152 return 0, err 153 } 154 155 lines := strings.Split(string(data), "\n") 156 157 line0Parts := line0RE.FindStringSubmatch(lines[0]) 158 line0Module, line0Version, line0Hash := line0Parts[1], line0Parts[2], line0Parts[3] 159 160 line1Parts := line1RE.FindStringSubmatch(lines[1]) 161 line1Module, line1Version, line1Hash := line1Parts[1], line1Parts[2], line1Parts[3] 162 163 if line0Module != line1Module { 164 return 0, fmt.Errorf("mismatched module names at %d: (%s, %s)", index, line0Module, line1Module) 165 } 166 if line0Version != line1Version { 167 return 0, fmt.Errorf("mismatched version names at %d: (%s, %s)", index, line0Version, line1Version) 168 } 169 170 modVer := fmt.Sprintf("%s %s", line0Module, line0Version) 171 hashes := hashesAtIndex{ 172 line0Hash: line0Hash, 173 line1Hash: line1Hash, 174 index: index, 175 } 176 177 if existing, found := modVerToHashes[modVer]; found { 178 klog.V(1).Infof("Found existing hash for %q", modVer) 179 if !existing.hashEq(hashes) { 180 resErr = fmt.Errorf("module and version %q has conflicting hashes!\n%q != %q", modVer, existing, hashes) 181 klog.Error(resErr) 182 } 183 } 184 modVerToHashes[modVer] = hashes 185 index++ 186 } 187 if resErr != nil { 188 return 0, resErr 189 } 190 191 // Use the compact range to calculate the root hash and ensure it matches the checkpoint 192 rootHash, err := cr.GetRootHash(nil) 193 if err != nil { 194 return 0, fmt.Errorf("GetRootHash(): %v", err) 195 } 196 if !bytes.Equal(rootHash, cp.Hash) { 197 return 0, fmt.Errorf("Data corruption: checkpoint from DB has hash %x but calculated hash %x from leaves", cp.Hash, rootHash) 198 } 199 return index, nil 200 } 201 202 type hashesAtIndex struct { 203 line0Hash string 204 line1Hash string 205 index uint64 206 } 207 208 func (h hashesAtIndex) String() string { 209 return fmt.Sprintf("index=%d, line0Hash=%s line1Hash=%s", h.index, h.line0Hash, h.line1Hash) 210 } 211 212 func (h hashesAtIndex) hashEq(other hashesAtIndex) bool { 213 return h.line0Hash == other.line0Hash && h.line1Hash == other.line1Hash 214 }