github.com/letsencrypt/boulder@v0.20251208.0/cmd/bad-key-revoker/main.go (about) 1 package notmain 2 3 import ( 4 "context" 5 "flag" 6 "fmt" 7 "os" 8 "time" 9 10 "github.com/jmhodges/clock" 11 "github.com/prometheus/client_golang/prometheus" 12 "github.com/prometheus/client_golang/prometheus/promauto" 13 "google.golang.org/grpc" 14 "google.golang.org/protobuf/types/known/emptypb" 15 16 "github.com/letsencrypt/boulder/cmd" 17 "github.com/letsencrypt/boulder/config" 18 "github.com/letsencrypt/boulder/core" 19 "github.com/letsencrypt/boulder/db" 20 bgrpc "github.com/letsencrypt/boulder/grpc" 21 blog "github.com/letsencrypt/boulder/log" 22 rapb "github.com/letsencrypt/boulder/ra/proto" 23 "github.com/letsencrypt/boulder/revocation" 24 "github.com/letsencrypt/boulder/sa" 25 ) 26 27 const blockedKeysGaugeLimit = 1000 28 29 // revoker is an interface used to reduce the scope of a RA gRPC client 30 // to only the single method we need to use, this makes testing significantly 31 // simpler 32 type revoker interface { 33 AdministrativelyRevokeCertificate(ctx context.Context, in *rapb.AdministrativelyRevokeCertificateRequest, opts ...grpc.CallOption) (*emptypb.Empty, error) 34 } 35 36 type badKeyRevoker struct { 37 dbMap *db.WrappedMap 38 maxRevocations int 39 serialBatchSize int 40 raClient revoker 41 logger blog.Logger 42 clk clock.Clock 43 backoffIntervalBase time.Duration 44 backoffIntervalMax time.Duration 45 backoffFactor float64 46 backoffTicker int 47 maxExpectedReplicationLag time.Duration 48 keysToProcess prometheus.Gauge 49 keysProcessed *prometheus.CounterVec 50 certsRevoked prometheus.Counter 51 } 52 53 // uncheckedBlockedKey represents a row in the blockedKeys table 54 type uncheckedBlockedKey struct { 55 KeyHash []byte 56 RevokedBy int64 57 } 58 59 func (ubk uncheckedBlockedKey) String() string { 60 return fmt.Sprintf("[revokedBy: %d, keyHash: %x]", 61 ubk.RevokedBy, ubk.KeyHash) 62 } 63 64 func (bkr *badKeyRevoker) countUncheckedKeys(ctx context.Context) (int, error) { 65 var count int 66 err := bkr.dbMap.SelectOne( 67 ctx, 68 &count, 69 `SELECT COUNT(*) 70 FROM (SELECT 1 FROM blockedKeys 71 WHERE extantCertificatesChecked = false AND added < ? - INTERVAL ? SECOND 72 LIMIT ?) AS a`, 73 bkr.clk.Now(), 74 bkr.maxExpectedReplicationLag.Seconds(), 75 blockedKeysGaugeLimit, 76 ) 77 return count, err 78 } 79 80 func (bkr *badKeyRevoker) selectUncheckedKey(ctx context.Context) (uncheckedBlockedKey, error) { 81 var row uncheckedBlockedKey 82 err := bkr.dbMap.SelectOne( 83 ctx, 84 &row, 85 `SELECT keyHash, revokedBy 86 FROM blockedKeys 87 WHERE extantCertificatesChecked = false AND added < ? - INTERVAL ? SECOND 88 LIMIT 1`, 89 bkr.clk.Now(), 90 bkr.maxExpectedReplicationLag.Seconds(), 91 ) 92 return row, err 93 } 94 95 // unrevokedCertificate represents a yet to be revoked certificate 96 type unrevokedCertificate struct { 97 ID int 98 Serial string 99 DER []byte 100 RegistrationID int64 101 Status core.OCSPStatus 102 IsExpired bool 103 } 104 105 func (uc unrevokedCertificate) String() string { 106 return fmt.Sprintf("id=%d serial=%s regID=%d status=%s expired=%t", 107 uc.ID, uc.Serial, uc.RegistrationID, uc.Status, uc.IsExpired) 108 } 109 110 // findUnrevoked looks for all unexpired, currently valid certificates which have a specific SPKI hash, 111 // by looking first at the keyHashToSerial table and then the certificateStatus and certificates tables. 112 // If the number of certificates it finds is larger than bkr.maxRevocations it'll error out. 113 func (bkr *badKeyRevoker) findUnrevoked(ctx context.Context, unchecked uncheckedBlockedKey) ([]unrevokedCertificate, error) { 114 var unrevokedCerts []unrevokedCertificate 115 initialID := 0 116 for { 117 var batch []struct { 118 ID int 119 CertSerial string 120 } 121 _, err := bkr.dbMap.Select( 122 ctx, 123 &batch, 124 "SELECT id, certSerial FROM keyHashToSerial WHERE keyHash = ? AND id > ? AND certNotAfter > ? ORDER BY id LIMIT ?", 125 unchecked.KeyHash, 126 initialID, 127 bkr.clk.Now(), 128 bkr.serialBatchSize, 129 ) 130 if err != nil { 131 return nil, err 132 } 133 if len(batch) == 0 { 134 break 135 } 136 initialID = batch[len(batch)-1].ID 137 for _, serial := range batch { 138 var unrevokedCert unrevokedCertificate 139 // NOTE: This has a `LIMIT 1` because the certificateStatus and precertificates 140 // tables do not have a UNIQUE KEY on serial (for partitioning reasons). So it's 141 // possible we could get multiple results for a single serial number, but they 142 // would be duplicates. 143 err = bkr.dbMap.SelectOne( 144 ctx, 145 &unrevokedCert, 146 `SELECT cs.id, cs.serial, c.registrationID, c.der, cs.status, cs.isExpired 147 FROM certificateStatus AS cs 148 JOIN precertificates AS c 149 ON cs.serial = c.serial 150 WHERE cs.serial = ? 151 LIMIT 1`, 152 serial.CertSerial, 153 ) 154 if err != nil { 155 return nil, err 156 } 157 if unrevokedCert.IsExpired || unrevokedCert.Status == core.OCSPStatusRevoked { 158 continue 159 } 160 unrevokedCerts = append(unrevokedCerts, unrevokedCert) 161 } 162 } 163 if len(unrevokedCerts) > bkr.maxRevocations { 164 return nil, fmt.Errorf("too many certificates to revoke associated with %x: got %d, max %d", unchecked.KeyHash, len(unrevokedCerts), bkr.maxRevocations) 165 } 166 return unrevokedCerts, nil 167 } 168 169 // markRowChecked updates a row in the blockedKeys table to mark a keyHash 170 // as having been checked for extant unrevoked certificates. 171 func (bkr *badKeyRevoker) markRowChecked(ctx context.Context, unchecked uncheckedBlockedKey) error { 172 _, err := bkr.dbMap.ExecContext(ctx, "UPDATE blockedKeys SET extantCertificatesChecked = true WHERE keyHash = ?", unchecked.KeyHash) 173 return err 174 } 175 176 // revokeCerts revokes all the provided certificates. It uses reason 177 // keyCompromise and includes note indicating that they were revoked by 178 // bad-key-revoker. 179 func (bkr *badKeyRevoker) revokeCerts(certs []unrevokedCertificate) error { 180 for _, cert := range certs { 181 _, err := bkr.raClient.AdministrativelyRevokeCertificate(context.Background(), &rapb.AdministrativelyRevokeCertificateRequest{ 182 Cert: cert.DER, 183 Serial: cert.Serial, 184 Code: int64(revocation.KeyCompromise), 185 AdminName: "bad-key-revoker", 186 }) 187 if err != nil { 188 return err 189 } 190 bkr.certsRevoked.Inc() 191 } 192 return nil 193 } 194 195 // invoke exits early and returns true if there is no work to be done. 196 // Otherwise, it processes a single key in the blockedKeys table and returns false. 197 func (bkr *badKeyRevoker) invoke(ctx context.Context) (bool, error) { 198 // Gather a count of rows to be processed. 199 uncheckedCount, err := bkr.countUncheckedKeys(ctx) 200 if err != nil { 201 return false, err 202 } 203 204 // Set the gauge to the number of rows to be processed (max: 205 // blockedKeysGaugeLimit). 206 bkr.keysToProcess.Set(float64(uncheckedCount)) 207 208 if uncheckedCount >= blockedKeysGaugeLimit { 209 bkr.logger.AuditInfof("found >= %d unchecked blocked keys left to process", uncheckedCount) 210 } else { 211 bkr.logger.AuditInfof("found %d unchecked blocked keys left to process", uncheckedCount) 212 } 213 214 // select a row to process 215 unchecked, err := bkr.selectUncheckedKey(ctx) 216 if err != nil { 217 if db.IsNoRows(err) { 218 return true, nil 219 } 220 return false, err 221 } 222 bkr.logger.AuditInfo(fmt.Sprintf("found unchecked block key to work on: %s", unchecked)) 223 224 // select all unrevoked, unexpired serials associated with the blocked key hash 225 unrevokedCerts, err := bkr.findUnrevoked(ctx, unchecked) 226 if err != nil { 227 bkr.logger.AuditInfo(fmt.Sprintf("finding unrevoked certificates related to %s: %s", 228 unchecked, err)) 229 return false, err 230 } 231 if len(unrevokedCerts) == 0 { 232 bkr.logger.AuditInfo(fmt.Sprintf("found no certificates that need revoking related to %s, marking row as checked", unchecked)) 233 // mark row as checked 234 err = bkr.markRowChecked(ctx, unchecked) 235 if err != nil { 236 return false, err 237 } 238 return false, nil 239 } 240 241 var serials []string 242 for _, cert := range unrevokedCerts { 243 serials = append(serials, cert.Serial) 244 } 245 bkr.logger.AuditInfo(fmt.Sprintf("revoking serials %v for key with hash %x", serials, unchecked.KeyHash)) 246 247 // revoke each certificate 248 err = bkr.revokeCerts(unrevokedCerts) 249 if err != nil { 250 return false, err 251 } 252 253 // mark the key as checked 254 err = bkr.markRowChecked(ctx, unchecked) 255 if err != nil { 256 return false, err 257 } 258 return false, nil 259 } 260 261 type Config struct { 262 BadKeyRevoker struct { 263 DB cmd.DBConfig 264 DebugAddr string `validate:"omitempty,hostname_port"` 265 266 TLS cmd.TLSConfig 267 RAService *cmd.GRPCClientConfig 268 269 // MaximumRevocations specifies the maximum number of certificates associated with 270 // a key hash that bad-key-revoker will attempt to revoke. If the number of certificates 271 // is higher than MaximumRevocations bad-key-revoker will error out and refuse to 272 // progress until this is addressed. 273 MaximumRevocations int `validate:"gte=0"` 274 275 // FindCertificatesBatchSize specifies the maximum number of serials to select from the 276 // keyHashToSerial table at once 277 FindCertificatesBatchSize int `validate:"required"` 278 279 // Interval specifies the minimum duration bad-key-revoker 280 // should sleep between attempting to find blockedKeys rows to 281 // process when there is an error or no work to do. 282 Interval config.Duration `validate:"-"` 283 284 // BackoffIntervalMax specifies a maximum duration the backoff 285 // algorithm will wait before retrying in the event of error 286 // or no work to do. 287 BackoffIntervalMax config.Duration `validate:"-"` 288 289 // MaxExpectedReplicationLag specifies the minimum duration 290 // bad-key-revoker should wait before searching for certificates 291 // matching a blockedKeys row. This should be just slightly greater than 292 // the database's maximum replication lag, and always well under 24 293 // hours. 294 MaxExpectedReplicationLag config.Duration `validate:"-"` 295 } 296 297 Syslog cmd.SyslogConfig 298 OpenTelemetry cmd.OpenTelemetryConfig 299 } 300 301 func main() { 302 debugAddr := flag.String("debug-addr", "", "Debug server address override") 303 configPath := flag.String("config", "", "File path to the configuration file for this service") 304 flag.Parse() 305 306 if *configPath == "" { 307 flag.Usage() 308 os.Exit(1) 309 } 310 var config Config 311 err := cmd.ReadConfigFile(*configPath, &config) 312 cmd.FailOnError(err, "Failed reading config file") 313 314 if *debugAddr != "" { 315 config.BadKeyRevoker.DebugAddr = *debugAddr 316 } 317 318 stats, logger, oTelShutdown := cmd.StatsAndLogging(config.Syslog, config.OpenTelemetry, config.BadKeyRevoker.DebugAddr) 319 defer oTelShutdown(context.Background()) 320 logger.Info(cmd.VersionString()) 321 clk := clock.New() 322 323 keysToProcess := promauto.With(stats).NewGauge(prometheus.GaugeOpts{ 324 Name: "bad_keys_to_process", 325 Help: fmt.Sprintf("A gauge of blockedKeys rows to process (max: %d)", blockedKeysGaugeLimit), 326 }) 327 keysProcessed := promauto.With(stats).NewCounterVec(prometheus.CounterOpts{ 328 Name: "bad_keys_processed", 329 Help: "A counter of blockedKeys rows processed labelled by processing state", 330 }, []string{"state"}) 331 certsRevoked := promauto.With(stats).NewCounter(prometheus.CounterOpts{ 332 Name: "bad_keys_certs_revoked", 333 Help: "A counter of certificates associated with rows in blockedKeys that have been revoked", 334 }) 335 336 dbMap, err := sa.InitWrappedDb(config.BadKeyRevoker.DB, stats, logger) 337 cmd.FailOnError(err, "While initializing dbMap") 338 339 tlsConfig, err := config.BadKeyRevoker.TLS.Load(stats) 340 cmd.FailOnError(err, "TLS config") 341 342 conn, err := bgrpc.ClientSetup(config.BadKeyRevoker.RAService, tlsConfig, stats, clk) 343 cmd.FailOnError(err, "Failed to load credentials and create gRPC connection to RA") 344 rac := rapb.NewRegistrationAuthorityClient(conn) 345 346 bkr := &badKeyRevoker{ 347 dbMap: dbMap, 348 maxRevocations: config.BadKeyRevoker.MaximumRevocations, 349 serialBatchSize: config.BadKeyRevoker.FindCertificatesBatchSize, 350 raClient: rac, 351 logger: logger, 352 clk: clk, 353 backoffIntervalMax: config.BadKeyRevoker.BackoffIntervalMax.Duration, 354 backoffIntervalBase: config.BadKeyRevoker.Interval.Duration, 355 backoffFactor: 1.3, 356 maxExpectedReplicationLag: config.BadKeyRevoker.MaxExpectedReplicationLag.Duration, 357 keysToProcess: keysToProcess, 358 keysProcessed: keysProcessed, 359 certsRevoked: certsRevoked, 360 } 361 362 // If `BackoffIntervalMax` was not set via the config, set it to 60 363 // seconds. This will avoid a tight loop on error but not be an 364 // excessive delay if the config value was not deliberately set. 365 if bkr.backoffIntervalMax == 0 { 366 bkr.backoffIntervalMax = time.Second * 60 367 } 368 369 // If `Interval` was not set via the config then set 370 // `bkr.backoffIntervalBase` to a default 1 second. 371 if bkr.backoffIntervalBase == 0 { 372 bkr.backoffIntervalBase = time.Second 373 } 374 375 // If `MaxExpectedReplicationLag` was not set via the config, then set 376 // `bkr.maxExpectedReplicationLag` to a default 22 seconds. This is based on 377 // ProxySQL's max_replication_lag for bad-key-revoker (10s), times two, plus 378 // two seconds. 379 if bkr.maxExpectedReplicationLag == 0 { 380 bkr.maxExpectedReplicationLag = time.Second * 22 381 } 382 383 // Run bad-key-revoker in a loop. Backoff if no work or errors. 384 for { 385 noWork, err := bkr.invoke(context.Background()) 386 if err != nil { 387 keysProcessed.WithLabelValues("error").Inc() 388 logger.AuditErrf("failed to process blockedKeys row: %s", err) 389 // Calculate and sleep for a backoff interval 390 bkr.backoff() 391 continue 392 } 393 if noWork { 394 logger.Info("no work to do") 395 // Calculate and sleep for a backoff interval 396 bkr.backoff() 397 } else { 398 keysProcessed.WithLabelValues("success").Inc() 399 // Successfully processed, reset backoff. 400 bkr.backoffReset() 401 } 402 } 403 } 404 405 // backoff increments the backoffTicker, calls core.RetryBackoff to 406 // calculate a new backoff duration, then logs the backoff and sleeps for 407 // the calculated duration. 408 func (bkr *badKeyRevoker) backoff() { 409 bkr.backoffTicker++ 410 backoffDur := core.RetryBackoff( 411 bkr.backoffTicker, 412 bkr.backoffIntervalBase, 413 bkr.backoffIntervalMax, 414 bkr.backoffFactor, 415 ) 416 bkr.logger.Infof("backoff trying again in %.2f seconds", backoffDur.Seconds()) 417 bkr.clk.Sleep(backoffDur) 418 } 419 420 // reset sets the backoff ticker and duration to zero. 421 func (bkr *badKeyRevoker) backoffReset() { 422 bkr.backoffTicker = 0 423 } 424 425 func init() { 426 cmd.RegisterCommand("bad-key-revoker", main, &cmd.ConfigValidator{Config: &Config{}}) 427 }