github.com/zmap/zcrypto@v0.0.0-20240512203510-0fef58d9a9db/ct/scanner/scanner.go (about) 1 package scanner 2 3 import ( 4 "container/list" 5 "fmt" 6 "math/big" 7 "regexp" 8 "sync" 9 "sync/atomic" 10 "time" 11 12 log "github.com/sirupsen/logrus" 13 "github.com/zmap/zcrypto/ct" 14 "github.com/zmap/zcrypto/ct/client" 15 "github.com/zmap/zcrypto/ct/x509" 16 ) 17 18 // Clients wishing to implement their own Matchers should implement this interface: 19 type Matcher interface { 20 // CertificateMatches is called by the scanner for each X509 Certificate found in the log. 21 // The implementation should return |true| if the passed Certificate is interesting, and |false| otherwise. 22 CertificateMatches(*x509.Certificate) bool 23 24 // PrecertificateMatches is called by the scanner for each CT Precertificate found in the log. 25 // The implementation should return |true| if the passed Precertificate is interesting, and |false| otherwise. 26 PrecertificateMatches(*ct.Precertificate) bool 27 } 28 29 // MatchAll is a Matcher which will match every possible Certificate and Precertificate. 30 type MatchAll struct{} 31 32 func (m MatchAll) CertificateMatches(_ *x509.Certificate) bool { 33 return true 34 } 35 36 func (m MatchAll) PrecertificateMatches(_ *ct.Precertificate) bool { 37 return true 38 } 39 40 // MatchNone is a Matcher which will never match any Certificate or Precertificate. 41 type MatchNone struct{} 42 43 func (m MatchNone) CertificateMatches(_ *x509.Certificate) bool { 44 return false 45 } 46 47 func (m MatchNone) PrecertificateMatches(_ *ct.Precertificate) bool { 48 return false 49 } 50 51 type MatchSerialNumber struct { 52 SerialNumber big.Int 53 } 54 55 func (m MatchSerialNumber) CertificateMatches(c *x509.Certificate) bool { 56 return c.SerialNumber.String() == m.SerialNumber.String() 57 } 58 59 func (m MatchSerialNumber) PrecertificateMatches(p *ct.Precertificate) bool { 60 return p.TBSCertificate.SerialNumber.String() == m.SerialNumber.String() 61 } 62 63 // MatchSubjectRegex is a Matcher which will use |CertificateSubjectRegex| and |PrecertificateSubjectRegex| 64 // to determine whether Certificates and Precertificates are interesting. 65 // The two regexes are tested against Subject Common Name as well as all 66 // Subject Alternative Names 67 type MatchSubjectRegex struct { 68 CertificateSubjectRegex *regexp.Regexp 69 PrecertificateSubjectRegex *regexp.Regexp 70 } 71 72 // Returns true if either CN or any SAN of |c| matches |CertificateSubjectRegex|. 73 func (m MatchSubjectRegex) CertificateMatches(c *x509.Certificate) bool { 74 if m.CertificateSubjectRegex.FindStringIndex(c.Subject.CommonName) != nil { 75 return true 76 } 77 for _, alt := range c.DNSNames { 78 if m.CertificateSubjectRegex.FindStringIndex(alt) != nil { 79 return true 80 } 81 } 82 return false 83 } 84 85 // Returns true if either CN or any SAN of |p| matches |PrecertificatesubjectRegex|. 86 func (m MatchSubjectRegex) PrecertificateMatches(p *ct.Precertificate) bool { 87 if m.PrecertificateSubjectRegex.FindStringIndex(p.TBSCertificate.Subject.CommonName) != nil { 88 return true 89 } 90 for _, alt := range p.TBSCertificate.DNSNames { 91 if m.PrecertificateSubjectRegex.FindStringIndex(alt) != nil { 92 return true 93 } 94 } 95 return false 96 } 97 98 // Matches on issuer cn by regex 99 type MatchIssuerRegex struct { 100 CertificateIssuerRegex *regexp.Regexp 101 PrecertificateIssuerRegex *regexp.Regexp 102 } 103 104 func (m MatchIssuerRegex) CertificateMatches(c *x509.Certificate) bool { 105 return m.CertificateIssuerRegex.FindStringIndex(c.Issuer.CommonName) != nil 106 } 107 108 func (m MatchIssuerRegex) PrecertificateMatches(p *ct.Precertificate) bool { 109 return m.PrecertificateIssuerRegex.FindStringIndex(p.TBSCertificate.Issuer.CommonName) != nil 110 } 111 112 // ScannerOptions holds configuration options for the Scanner 113 type ScannerOptions struct { 114 // Custom matcher for x509 Certificates, functor will be called for each 115 // Certificate found during scanning. 116 Matcher Matcher 117 118 // Match precerts only (Matcher still applies to precerts) 119 PrecertOnly bool 120 121 // Number of entries to request in one batch from the Log 122 BatchSize int64 123 124 // Number of concurrent matchers to run 125 NumWorkers int 126 127 // Number of concurrent fethers to run 128 ParallelFetch int 129 130 // Log entry index to start fetching & matching at 131 StartIndex int64 132 133 // Don't print any status messages to stdout 134 Quiet bool 135 136 // The name of the CT server we're pulling certs from 137 Name string 138 139 MaximumIndex int64 140 } 141 142 // Creates a new ScannerOptions struct with sensible defaults 143 func DefaultScannerOptions() *ScannerOptions { 144 return &ScannerOptions{ 145 Matcher: &MatchAll{}, 146 PrecertOnly: false, 147 BatchSize: 1000, 148 NumWorkers: 1, 149 ParallelFetch: 1, 150 StartIndex: 0, 151 Quiet: false, 152 Name: "https://ct.googleapis.com/rocketeer", 153 MaximumIndex: 0, 154 } 155 } 156 157 // Scanner is a tool to scan all the entries in a CT Log. 158 type Scanner struct { 159 // Client used to talk to the CT log instance 160 logClient *client.LogClient 161 162 // Configuration options for this Scanner instance 163 opts ScannerOptions 164 165 // Counter of the number of certificates scanned 166 certsProcessed int64 167 168 // Counter of the number of precertificates encountered during the scan. 169 precertsSeen int64 170 171 unparsableEntries int64 172 entriesWithNonFatalErrors int64 173 174 logger *log.Logger 175 } 176 177 // matcherJob represents the context for an individual matcher job. 178 type matcherJob struct { 179 // The log entry returned by the log server 180 entry ct.LogEntry 181 // The index of the entry containing the LeafInput in the log 182 index int64 183 } 184 185 // fetchRange represents a range of certs to fetch from a CT log 186 type fetchRange struct { 187 start int64 188 end int64 189 } 190 191 // Takes the error returned by either x509.ParseCertificate() or 192 // x509.ParseTBSCertificate() and determines if it's non-fatal or otherwise. 193 // In the case of non-fatal errors, the error will be logged, 194 // entriesWithNonFatalErrors will be incremented, and the return value will be 195 // nil. 196 // Fatal errors will be logged, unparsableEntires will be incremented, and the 197 // fatal error itself will be returned. 198 // When |err| is nil, this method does nothing. 199 func (s *Scanner) handleParseEntryError(err error, entryType ct.LogEntryType, index int64) error { 200 if err == nil { 201 // No error to handle 202 return nil 203 } 204 switch err.(type) { 205 case x509.NonFatalErrors: 206 s.entriesWithNonFatalErrors++ 207 // We'll make a note, but continue. 208 s.logger.Warnf("Non-fatal error in %+v at index %d of log at %s: %s", entryType, index, s.logClient.Uri, err) 209 default: 210 s.unparsableEntries++ 211 s.logger.Warnf("Failed to parse in %+v at index %d of log at %s: %s", entryType, index, s.logClient.Uri, err) 212 return err 213 } 214 return nil 215 } 216 217 // Processes the given |entry| in the specified log. 218 func (s *Scanner) processEntry(entry ct.LogEntry, foundCert func(*ct.LogEntry, string), foundPrecert func(*ct.LogEntry, string)) { 219 atomic.AddInt64(&s.certsProcessed, 1) 220 switch entry.Leaf.TimestampedEntry.EntryType { 221 case ct.X509LogEntryType: 222 if s.opts.PrecertOnly { 223 // Only interested in precerts and this is an X.509 cert, early-out. 224 return 225 } 226 cert, err := x509.ParseCertificate(entry.Leaf.TimestampedEntry.X509Entry) 227 if err = s.handleParseEntryError(err, entry.Leaf.TimestampedEntry.EntryType, entry.Index); err != nil { 228 // We hit an unparseable entry, already logged inside handleParseEntryError() 229 return 230 } 231 if s.opts.Matcher.CertificateMatches(cert) { 232 entry.X509Cert = cert 233 foundCert(&entry, s.opts.Name) 234 } 235 case ct.PrecertLogEntryType: 236 c, err := x509.ParseTBSCertificate(entry.Leaf.TimestampedEntry.PrecertEntry.TBSCertificate) 237 if err = s.handleParseEntryError(err, entry.Leaf.TimestampedEntry.EntryType, entry.Index); err != nil { 238 // We hit an unparseable entry, already logged inside handleParseEntryError() 239 return 240 } 241 precert := &ct.Precertificate{ 242 Raw: entry.Chain[0], 243 TBSCertificate: *c, 244 IssuerKeyHash: entry.Leaf.TimestampedEntry.PrecertEntry.IssuerKeyHash} 245 if s.opts.Matcher.PrecertificateMatches(precert) { 246 entry.Precert = precert 247 foundPrecert(&entry, s.opts.Name) 248 } 249 s.precertsSeen++ 250 } 251 } 252 253 // Worker function to match certs. 254 // Accepts MatcherJobs over the |entries| channel, and processes them. 255 // Returns true over the |done| channel when the |entries| channel is closed. 256 func (s *Scanner) matcherJob(id int, entries <-chan matcherJob, foundCert func(*ct.LogEntry, string), foundPrecert func(*ct.LogEntry, string), wg *sync.WaitGroup) { 257 for e := range entries { 258 s.processEntry(e.entry, foundCert, foundPrecert) 259 } 260 s.logger.Debugf("Matcher %d finished", id) 261 wg.Done() 262 } 263 264 // Worker function for fetcher jobs. 265 // Accepts cert ranges to fetch over the |ranges| channel, and if the fetch is 266 // successful sends the individual LeafInputs out (as MatcherJobs) into the 267 // |entries| channel for the matchers to chew on. 268 // Will retry failed attempts to retrieve ranges indefinitely. 269 // Sends true over the |done| channel when the |ranges| channel is closed. 270 func (s *Scanner) fetcherJob(id int, ranges <-chan fetchRange, entries chan<- matcherJob, wg *sync.WaitGroup) { 271 for r := range ranges { 272 success := false 273 // TODO(alcutter): give up after a while: 274 for !success { 275 logEntries, err := s.logClient.GetEntries(r.start, r.end) 276 if err != nil { 277 s.logger.Infof("Problem fetching from log: %s", err) 278 if err.Error() == "HTTP error: 500 Internal Server Error" { 279 time.Sleep(500 * time.Millisecond) 280 } 281 continue 282 } 283 if len(logEntries) == 0 { 284 s.logger.Debugf("Log %s gave empty slice of certificates for range %d-%d", s.logClient.Uri, r.start, r.end) 285 time.Sleep(500 * time.Millisecond) 286 continue 287 } 288 for _, logEntry := range logEntries { 289 logEntry.Index = r.start 290 entries <- matcherJob{logEntry, r.start} 291 r.start++ 292 } 293 if r.start > r.end { 294 // Only complete if we actually got all the leaves we were 295 // expecting -- Logs MAY return fewer than the number of 296 // leaves requested. 297 success = true 298 } 299 } 300 } 301 s.logger.Debugf("Fetcher %d finished", id) 302 wg.Done() 303 } 304 305 // Returns the smaller of |a| and |b| 306 func min(a int64, b int64) int64 { 307 if a < b { 308 return a 309 } else { 310 return b 311 } 312 } 313 314 // Returns the larger of |a| and |b| 315 func max(a int64, b int64) int64 { 316 if a > b { 317 return a 318 } else { 319 return b 320 } 321 } 322 323 // Pretty prints the passed in number of |seconds| into a more human readable 324 // string. 325 func humanTime(seconds int) string { 326 nanos := time.Duration(seconds) * time.Second 327 hours := int(nanos / (time.Hour)) 328 nanos %= time.Hour 329 minutes := int(nanos / time.Minute) 330 nanos %= time.Minute 331 seconds = int(nanos / time.Second) 332 s := "" 333 if hours > 0 { 334 s += fmt.Sprintf("%d hours ", hours) 335 } 336 if minutes > 0 { 337 s += fmt.Sprintf("%d minutes ", minutes) 338 } 339 if seconds > 0 { 340 s += fmt.Sprintf("%d seconds ", seconds) 341 } 342 return s 343 } 344 345 // Performs a scan against the Log. 346 // For each x509 certificate found, |foundCert| will be called with the 347 // index of the entry and certificate itself as arguments. For each precert 348 // found, |foundPrecert| will be called with the index of the entry and the raw 349 // precert string as the arguments. 350 // 351 // This method blocks until the scan is complete. 352 func (s *Scanner) Scan(foundCert func(*ct.LogEntry, string), 353 foundPrecert func(*ct.LogEntry, string), updater chan int64) (int64, error) { 354 s.logger.Info("Starting up...\n") 355 s.certsProcessed = 0 356 s.precertsSeen = 0 357 s.unparsableEntries = 0 358 s.entriesWithNonFatalErrors = 0 359 360 latestSth, err := s.logClient.GetSTH() 361 if err != nil { 362 return 0, err 363 } 364 s.logger.Infof("Got %s STH with %d certs", s.opts.Name, latestSth.TreeSize) 365 366 stopIndex := s.opts.MaximumIndex 367 if s.opts.MaximumIndex == 0 { 368 stopIndex = int64(latestSth.TreeSize) 369 } 370 371 ticker := time.NewTicker(time.Second) 372 startTime := time.Now() 373 fetches := make(chan fetchRange, 1000) 374 jobs := make(chan matcherJob, 100000) 375 //done := make(chan bool) 376 go func() { 377 //oldProc := int64(0) 378 for range ticker.C { 379 380 throughput := float64(s.certsProcessed) / time.Since(startTime).Seconds() 381 remainingCerts := int64(stopIndex) - int64(s.opts.StartIndex) - s.certsProcessed 382 383 if remainingCerts == 0 { 384 updater <- int64(stopIndex) 385 return 386 } 387 388 remainingSeconds := int(float64(remainingCerts) / throughput) 389 remainingString := humanTime(remainingSeconds) 390 s.logger.Infof("Processed: %d %s certs (to index %d). Throughput: %3.2f ETA: %s\n", s.certsProcessed, s.opts.Name, 391 s.opts.StartIndex+int64(s.certsProcessed), throughput, remainingString) 392 393 updater <- int64(stopIndex) - remainingCerts 394 } 395 }() 396 397 var ranges list.List 398 for start := s.opts.StartIndex; start < int64(stopIndex); { 399 end := min(start+int64(s.opts.BatchSize), int64(stopIndex)) - 1 400 ranges.PushBack(fetchRange{start, end}) 401 start = end + 1 402 } 403 var fetcherWG sync.WaitGroup 404 var matcherWG sync.WaitGroup 405 // Start matcher workers 406 for w := 0; w < s.opts.NumWorkers; w++ { 407 matcherWG.Add(1) 408 go s.matcherJob(w, jobs, foundCert, foundPrecert, &matcherWG) 409 } 410 // Start fetcher workers 411 for w := 0; w < s.opts.ParallelFetch; w++ { 412 fetcherWG.Add(1) 413 go s.fetcherJob(w, fetches, jobs, &fetcherWG) 414 } 415 for r := ranges.Front(); r != nil; r = r.Next() { 416 fetches <- r.Value.(fetchRange) 417 } 418 close(fetches) 419 fetcherWG.Wait() 420 close(jobs) 421 matcherWG.Wait() 422 ticker.Stop() 423 424 s.logger.Infof("Completed %d %s certs in %s", s.certsProcessed, s.opts.Name, humanTime(int(time.Since(startTime).Seconds()))) 425 s.logger.Infof("Saw %d precerts", s.precertsSeen) 426 s.logger.Infof("%d unparsable entries, %d non-fatal errors", s.unparsableEntries, s.entriesWithNonFatalErrors) 427 return int64(s.opts.StartIndex) + s.certsProcessed, nil 428 } 429 430 // Creates a new Scanner instance using |client| to talk to the log, and taking 431 // configuration options from |opts|. 432 func NewScanner(client *client.LogClient, opts ScannerOptions, logger *log.Logger) *Scanner { 433 var scanner Scanner 434 scanner.logClient = client 435 // Set a default match-everything regex if none was provided: 436 if opts.Matcher == nil { 437 opts.Matcher = &MatchAll{} 438 } 439 scanner.opts = opts 440 scanner.logger = logger 441 return &scanner 442 }