github.com/zmap/zcrypto@v0.0.0-20240512203510-0fef58d9a9db/ct/scanner/scanner.go (about)

     1  package scanner
     2  
     3  import (
     4  	"container/list"
     5  	"fmt"
     6  	"math/big"
     7  	"regexp"
     8  	"sync"
     9  	"sync/atomic"
    10  	"time"
    11  
    12  	log "github.com/sirupsen/logrus"
    13  	"github.com/zmap/zcrypto/ct"
    14  	"github.com/zmap/zcrypto/ct/client"
    15  	"github.com/zmap/zcrypto/ct/x509"
    16  )
    17  
    18  // Clients wishing to implement their own Matchers should implement this interface:
    19  type Matcher interface {
    20  	// CertificateMatches is called by the scanner for each X509 Certificate found in the log.
    21  	// The implementation should return |true| if the passed Certificate is interesting, and |false| otherwise.
    22  	CertificateMatches(*x509.Certificate) bool
    23  
    24  	// PrecertificateMatches is called by the scanner for each CT Precertificate found in the log.
    25  	// The implementation should return |true| if the passed Precertificate is interesting, and |false| otherwise.
    26  	PrecertificateMatches(*ct.Precertificate) bool
    27  }
    28  
    29  // MatchAll is a Matcher which will match every possible Certificate and Precertificate.
    30  type MatchAll struct{}
    31  
    32  func (m MatchAll) CertificateMatches(_ *x509.Certificate) bool {
    33  	return true
    34  }
    35  
    36  func (m MatchAll) PrecertificateMatches(_ *ct.Precertificate) bool {
    37  	return true
    38  }
    39  
    40  // MatchNone is a Matcher which will never match any Certificate or Precertificate.
    41  type MatchNone struct{}
    42  
    43  func (m MatchNone) CertificateMatches(_ *x509.Certificate) bool {
    44  	return false
    45  }
    46  
    47  func (m MatchNone) PrecertificateMatches(_ *ct.Precertificate) bool {
    48  	return false
    49  }
    50  
    51  type MatchSerialNumber struct {
    52  	SerialNumber big.Int
    53  }
    54  
    55  func (m MatchSerialNumber) CertificateMatches(c *x509.Certificate) bool {
    56  	return c.SerialNumber.String() == m.SerialNumber.String()
    57  }
    58  
    59  func (m MatchSerialNumber) PrecertificateMatches(p *ct.Precertificate) bool {
    60  	return p.TBSCertificate.SerialNumber.String() == m.SerialNumber.String()
    61  }
    62  
    63  // MatchSubjectRegex is a Matcher which will use |CertificateSubjectRegex| and |PrecertificateSubjectRegex|
    64  // to determine whether Certificates and Precertificates are interesting.
    65  // The two regexes are tested against Subject Common Name as well as all
    66  // Subject Alternative Names
    67  type MatchSubjectRegex struct {
    68  	CertificateSubjectRegex    *regexp.Regexp
    69  	PrecertificateSubjectRegex *regexp.Regexp
    70  }
    71  
    72  // Returns true if either CN or any SAN of |c| matches |CertificateSubjectRegex|.
    73  func (m MatchSubjectRegex) CertificateMatches(c *x509.Certificate) bool {
    74  	if m.CertificateSubjectRegex.FindStringIndex(c.Subject.CommonName) != nil {
    75  		return true
    76  	}
    77  	for _, alt := range c.DNSNames {
    78  		if m.CertificateSubjectRegex.FindStringIndex(alt) != nil {
    79  			return true
    80  		}
    81  	}
    82  	return false
    83  }
    84  
    85  // Returns true if either CN or any SAN of |p| matches |PrecertificatesubjectRegex|.
    86  func (m MatchSubjectRegex) PrecertificateMatches(p *ct.Precertificate) bool {
    87  	if m.PrecertificateSubjectRegex.FindStringIndex(p.TBSCertificate.Subject.CommonName) != nil {
    88  		return true
    89  	}
    90  	for _, alt := range p.TBSCertificate.DNSNames {
    91  		if m.PrecertificateSubjectRegex.FindStringIndex(alt) != nil {
    92  			return true
    93  		}
    94  	}
    95  	return false
    96  }
    97  
    98  // Matches on issuer cn by regex
    99  type MatchIssuerRegex struct {
   100  	CertificateIssuerRegex    *regexp.Regexp
   101  	PrecertificateIssuerRegex *regexp.Regexp
   102  }
   103  
   104  func (m MatchIssuerRegex) CertificateMatches(c *x509.Certificate) bool {
   105  	return m.CertificateIssuerRegex.FindStringIndex(c.Issuer.CommonName) != nil
   106  }
   107  
   108  func (m MatchIssuerRegex) PrecertificateMatches(p *ct.Precertificate) bool {
   109  	return m.PrecertificateIssuerRegex.FindStringIndex(p.TBSCertificate.Issuer.CommonName) != nil
   110  }
   111  
   112  // ScannerOptions holds configuration options for the Scanner
   113  type ScannerOptions struct {
   114  	// Custom matcher for x509 Certificates, functor will be called for each
   115  	// Certificate found during scanning.
   116  	Matcher Matcher
   117  
   118  	// Match precerts only (Matcher still applies to precerts)
   119  	PrecertOnly bool
   120  
   121  	// Number of entries to request in one batch from the Log
   122  	BatchSize int64
   123  
   124  	// Number of concurrent matchers to run
   125  	NumWorkers int
   126  
   127  	// Number of concurrent fethers to run
   128  	ParallelFetch int
   129  
   130  	// Log entry index to start fetching & matching at
   131  	StartIndex int64
   132  
   133  	// Don't print any status messages to stdout
   134  	Quiet bool
   135  
   136  	// The name of the CT server we're pulling certs from
   137  	Name string
   138  
   139  	MaximumIndex int64
   140  }
   141  
   142  // Creates a new ScannerOptions struct with sensible defaults
   143  func DefaultScannerOptions() *ScannerOptions {
   144  	return &ScannerOptions{
   145  		Matcher:       &MatchAll{},
   146  		PrecertOnly:   false,
   147  		BatchSize:     1000,
   148  		NumWorkers:    1,
   149  		ParallelFetch: 1,
   150  		StartIndex:    0,
   151  		Quiet:         false,
   152  		Name:          "https://ct.googleapis.com/rocketeer",
   153  		MaximumIndex:  0,
   154  	}
   155  }
   156  
   157  // Scanner is a tool to scan all the entries in a CT Log.
   158  type Scanner struct {
   159  	// Client used to talk to the CT log instance
   160  	logClient *client.LogClient
   161  
   162  	// Configuration options for this Scanner instance
   163  	opts ScannerOptions
   164  
   165  	// Counter of the number of certificates scanned
   166  	certsProcessed int64
   167  
   168  	// Counter of the number of precertificates encountered during the scan.
   169  	precertsSeen int64
   170  
   171  	unparsableEntries         int64
   172  	entriesWithNonFatalErrors int64
   173  
   174  	logger *log.Logger
   175  }
   176  
   177  // matcherJob represents the context for an individual matcher job.
   178  type matcherJob struct {
   179  	// The log entry returned by the log server
   180  	entry ct.LogEntry
   181  	// The index of the entry containing the LeafInput in the log
   182  	index int64
   183  }
   184  
   185  // fetchRange represents a range of certs to fetch from a CT log
   186  type fetchRange struct {
   187  	start int64
   188  	end   int64
   189  }
   190  
   191  // Takes the error returned by either x509.ParseCertificate() or
   192  // x509.ParseTBSCertificate() and determines if it's non-fatal or otherwise.
   193  // In the case of non-fatal errors, the error will be logged,
   194  // entriesWithNonFatalErrors will be incremented, and the return value will be
   195  // nil.
   196  // Fatal errors will be logged, unparsableEntires will be incremented, and the
   197  // fatal error itself will be returned.
   198  // When |err| is nil, this method does nothing.
   199  func (s *Scanner) handleParseEntryError(err error, entryType ct.LogEntryType, index int64) error {
   200  	if err == nil {
   201  		// No error to handle
   202  		return nil
   203  	}
   204  	switch err.(type) {
   205  	case x509.NonFatalErrors:
   206  		s.entriesWithNonFatalErrors++
   207  		// We'll make a note, but continue.
   208  		s.logger.Warnf("Non-fatal error in %+v at index %d of log at %s: %s", entryType, index, s.logClient.Uri, err)
   209  	default:
   210  		s.unparsableEntries++
   211  		s.logger.Warnf("Failed to parse in %+v at index %d of log at %s: %s", entryType, index, s.logClient.Uri, err)
   212  		return err
   213  	}
   214  	return nil
   215  }
   216  
   217  // Processes the given |entry| in the specified log.
   218  func (s *Scanner) processEntry(entry ct.LogEntry, foundCert func(*ct.LogEntry, string), foundPrecert func(*ct.LogEntry, string)) {
   219  	atomic.AddInt64(&s.certsProcessed, 1)
   220  	switch entry.Leaf.TimestampedEntry.EntryType {
   221  	case ct.X509LogEntryType:
   222  		if s.opts.PrecertOnly {
   223  			// Only interested in precerts and this is an X.509 cert, early-out.
   224  			return
   225  		}
   226  		cert, err := x509.ParseCertificate(entry.Leaf.TimestampedEntry.X509Entry)
   227  		if err = s.handleParseEntryError(err, entry.Leaf.TimestampedEntry.EntryType, entry.Index); err != nil {
   228  			// We hit an unparseable entry, already logged inside handleParseEntryError()
   229  			return
   230  		}
   231  		if s.opts.Matcher.CertificateMatches(cert) {
   232  			entry.X509Cert = cert
   233  			foundCert(&entry, s.opts.Name)
   234  		}
   235  	case ct.PrecertLogEntryType:
   236  		c, err := x509.ParseTBSCertificate(entry.Leaf.TimestampedEntry.PrecertEntry.TBSCertificate)
   237  		if err = s.handleParseEntryError(err, entry.Leaf.TimestampedEntry.EntryType, entry.Index); err != nil {
   238  			// We hit an unparseable entry, already logged inside handleParseEntryError()
   239  			return
   240  		}
   241  		precert := &ct.Precertificate{
   242  			Raw:            entry.Chain[0],
   243  			TBSCertificate: *c,
   244  			IssuerKeyHash:  entry.Leaf.TimestampedEntry.PrecertEntry.IssuerKeyHash}
   245  		if s.opts.Matcher.PrecertificateMatches(precert) {
   246  			entry.Precert = precert
   247  			foundPrecert(&entry, s.opts.Name)
   248  		}
   249  		s.precertsSeen++
   250  	}
   251  }
   252  
   253  // Worker function to match certs.
   254  // Accepts MatcherJobs over the |entries| channel, and processes them.
   255  // Returns true over the |done| channel when the |entries| channel is closed.
   256  func (s *Scanner) matcherJob(id int, entries <-chan matcherJob, foundCert func(*ct.LogEntry, string), foundPrecert func(*ct.LogEntry, string), wg *sync.WaitGroup) {
   257  	for e := range entries {
   258  		s.processEntry(e.entry, foundCert, foundPrecert)
   259  	}
   260  	s.logger.Debugf("Matcher %d finished", id)
   261  	wg.Done()
   262  }
   263  
   264  // Worker function for fetcher jobs.
   265  // Accepts cert ranges to fetch over the |ranges| channel, and if the fetch is
   266  // successful sends the individual LeafInputs out (as MatcherJobs) into the
   267  // |entries| channel for the matchers to chew on.
   268  // Will retry failed attempts to retrieve ranges indefinitely.
   269  // Sends true over the |done| channel when the |ranges| channel is closed.
   270  func (s *Scanner) fetcherJob(id int, ranges <-chan fetchRange, entries chan<- matcherJob, wg *sync.WaitGroup) {
   271  	for r := range ranges {
   272  		success := false
   273  		// TODO(alcutter): give up after a while:
   274  		for !success {
   275  			logEntries, err := s.logClient.GetEntries(r.start, r.end)
   276  			if err != nil {
   277  				s.logger.Infof("Problem fetching from log: %s", err)
   278  				if err.Error() == "HTTP error: 500 Internal Server Error" {
   279  					time.Sleep(500 * time.Millisecond)
   280  				}
   281  				continue
   282  			}
   283  			if len(logEntries) == 0 {
   284  				s.logger.Debugf("Log %s gave empty slice of certificates for range %d-%d", s.logClient.Uri, r.start, r.end)
   285  				time.Sleep(500 * time.Millisecond)
   286  				continue
   287  			}
   288  			for _, logEntry := range logEntries {
   289  				logEntry.Index = r.start
   290  				entries <- matcherJob{logEntry, r.start}
   291  				r.start++
   292  			}
   293  			if r.start > r.end {
   294  				// Only complete if we actually got all the leaves we were
   295  				// expecting -- Logs MAY return fewer than the number of
   296  				// leaves requested.
   297  				success = true
   298  			}
   299  		}
   300  	}
   301  	s.logger.Debugf("Fetcher %d finished", id)
   302  	wg.Done()
   303  }
   304  
   305  // Returns the smaller of |a| and |b|
   306  func min(a int64, b int64) int64 {
   307  	if a < b {
   308  		return a
   309  	} else {
   310  		return b
   311  	}
   312  }
   313  
   314  // Returns the larger of |a| and |b|
   315  func max(a int64, b int64) int64 {
   316  	if a > b {
   317  		return a
   318  	} else {
   319  		return b
   320  	}
   321  }
   322  
   323  // Pretty prints the passed in number of |seconds| into a more human readable
   324  // string.
   325  func humanTime(seconds int) string {
   326  	nanos := time.Duration(seconds) * time.Second
   327  	hours := int(nanos / (time.Hour))
   328  	nanos %= time.Hour
   329  	minutes := int(nanos / time.Minute)
   330  	nanos %= time.Minute
   331  	seconds = int(nanos / time.Second)
   332  	s := ""
   333  	if hours > 0 {
   334  		s += fmt.Sprintf("%d hours ", hours)
   335  	}
   336  	if minutes > 0 {
   337  		s += fmt.Sprintf("%d minutes ", minutes)
   338  	}
   339  	if seconds > 0 {
   340  		s += fmt.Sprintf("%d seconds ", seconds)
   341  	}
   342  	return s
   343  }
   344  
   345  // Performs a scan against the Log.
   346  // For each x509 certificate found, |foundCert| will be called with the
   347  // index of the entry and certificate itself as arguments.  For each precert
   348  // found, |foundPrecert| will be called with the index of the entry and the raw
   349  // precert string as the arguments.
   350  //
   351  // This method blocks until the scan is complete.
   352  func (s *Scanner) Scan(foundCert func(*ct.LogEntry, string),
   353  	foundPrecert func(*ct.LogEntry, string), updater chan int64) (int64, error) {
   354  	s.logger.Info("Starting up...\n")
   355  	s.certsProcessed = 0
   356  	s.precertsSeen = 0
   357  	s.unparsableEntries = 0
   358  	s.entriesWithNonFatalErrors = 0
   359  
   360  	latestSth, err := s.logClient.GetSTH()
   361  	if err != nil {
   362  		return 0, err
   363  	}
   364  	s.logger.Infof("Got %s STH with %d certs", s.opts.Name, latestSth.TreeSize)
   365  
   366  	stopIndex := s.opts.MaximumIndex
   367  	if s.opts.MaximumIndex == 0 {
   368  		stopIndex = int64(latestSth.TreeSize)
   369  	}
   370  
   371  	ticker := time.NewTicker(time.Second)
   372  	startTime := time.Now()
   373  	fetches := make(chan fetchRange, 1000)
   374  	jobs := make(chan matcherJob, 100000)
   375  	//done := make(chan bool)
   376  	go func() {
   377  		//oldProc := int64(0)
   378  		for range ticker.C {
   379  
   380  			throughput := float64(s.certsProcessed) / time.Since(startTime).Seconds()
   381  			remainingCerts := int64(stopIndex) - int64(s.opts.StartIndex) - s.certsProcessed
   382  
   383  			if remainingCerts == 0 {
   384  				updater <- int64(stopIndex)
   385  				return
   386  			}
   387  
   388  			remainingSeconds := int(float64(remainingCerts) / throughput)
   389  			remainingString := humanTime(remainingSeconds)
   390  			s.logger.Infof("Processed: %d %s certs (to index %d). Throughput: %3.2f ETA: %s\n", s.certsProcessed, s.opts.Name,
   391  				s.opts.StartIndex+int64(s.certsProcessed), throughput, remainingString)
   392  
   393  			updater <- int64(stopIndex) - remainingCerts
   394  		}
   395  	}()
   396  
   397  	var ranges list.List
   398  	for start := s.opts.StartIndex; start < int64(stopIndex); {
   399  		end := min(start+int64(s.opts.BatchSize), int64(stopIndex)) - 1
   400  		ranges.PushBack(fetchRange{start, end})
   401  		start = end + 1
   402  	}
   403  	var fetcherWG sync.WaitGroup
   404  	var matcherWG sync.WaitGroup
   405  	// Start matcher workers
   406  	for w := 0; w < s.opts.NumWorkers; w++ {
   407  		matcherWG.Add(1)
   408  		go s.matcherJob(w, jobs, foundCert, foundPrecert, &matcherWG)
   409  	}
   410  	// Start fetcher workers
   411  	for w := 0; w < s.opts.ParallelFetch; w++ {
   412  		fetcherWG.Add(1)
   413  		go s.fetcherJob(w, fetches, jobs, &fetcherWG)
   414  	}
   415  	for r := ranges.Front(); r != nil; r = r.Next() {
   416  		fetches <- r.Value.(fetchRange)
   417  	}
   418  	close(fetches)
   419  	fetcherWG.Wait()
   420  	close(jobs)
   421  	matcherWG.Wait()
   422  	ticker.Stop()
   423  
   424  	s.logger.Infof("Completed %d %s certs in %s", s.certsProcessed, s.opts.Name, humanTime(int(time.Since(startTime).Seconds())))
   425  	s.logger.Infof("Saw %d precerts", s.precertsSeen)
   426  	s.logger.Infof("%d unparsable entries, %d non-fatal errors", s.unparsableEntries, s.entriesWithNonFatalErrors)
   427  	return int64(s.opts.StartIndex) + s.certsProcessed, nil
   428  }
   429  
   430  // Creates a new Scanner instance using |client| to talk to the log, and taking
   431  // configuration options from |opts|.
   432  func NewScanner(client *client.LogClient, opts ScannerOptions, logger *log.Logger) *Scanner {
   433  	var scanner Scanner
   434  	scanner.logClient = client
   435  	// Set a default match-everything regex if none was provided:
   436  	if opts.Matcher == nil {
   437  		opts.Matcher = &MatchAll{}
   438  	}
   439  	scanner.opts = opts
   440  	scanner.logger = logger
   441  	return &scanner
   442  }