github.com/letsencrypt/boulder@v0.20251208.0/cmd/shell.go (about) 1 // Package cmd provides utilities that underlie the specific commands. 2 package cmd 3 4 import ( 5 "context" 6 "encoding/json" 7 "errors" 8 "expvar" 9 "fmt" 10 "io" 11 "log" 12 "log/syslog" 13 "net/http" 14 "net/http/pprof" 15 "os" 16 "os/signal" 17 "runtime" 18 "runtime/debug" 19 "strings" 20 "syscall" 21 "time" 22 23 "github.com/go-logr/stdr" 24 "github.com/go-sql-driver/mysql" 25 "github.com/prometheus/client_golang/prometheus" 26 "github.com/prometheus/client_golang/prometheus/collectors" 27 "github.com/prometheus/client_golang/prometheus/collectors/version" 28 "github.com/prometheus/client_golang/prometheus/promhttp" 29 "github.com/redis/go-redis/v9" 30 "go.opentelemetry.io/otel" 31 "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" 32 "go.opentelemetry.io/otel/propagation" 33 "go.opentelemetry.io/otel/sdk/resource" 34 "go.opentelemetry.io/otel/sdk/trace" 35 semconv "go.opentelemetry.io/otel/semconv/v1.30.0" 36 "google.golang.org/grpc/grpclog" 37 38 "github.com/letsencrypt/boulder/config" 39 "github.com/letsencrypt/boulder/core" 40 blog "github.com/letsencrypt/boulder/log" 41 "github.com/letsencrypt/boulder/strictyaml" 42 "github.com/letsencrypt/validator/v10" 43 ) 44 45 // Because we don't know when this init will be called with respect to 46 // flag.Parse() and other flag definitions, we can't rely on the regular 47 // flag mechanism. But this one is fine. 48 func init() { 49 for _, v := range os.Args { 50 if v == "--version" || v == "-version" { 51 fmt.Println(VersionString()) 52 os.Exit(0) 53 } 54 } 55 } 56 57 // mysqlLogger implements the mysql.Logger interface. 58 type mysqlLogger struct { 59 blog.Logger 60 } 61 62 func (m mysqlLogger) Print(v ...any) { 63 m.AuditErrf("[mysql] %s", fmt.Sprint(v...)) 64 } 65 66 // grpcLogger implements the grpclog.LoggerV2 interface. 67 type grpcLogger struct { 68 blog.Logger 69 } 70 71 // Ensure that fatal logs exit, because we use neither the gRPC default logger 72 // nor the stdlib default logger, both of which would call os.Exit(1) for us. 73 func (log grpcLogger) Fatal(args ...any) { 74 log.Error(args...) 75 os.Exit(1) 76 } 77 func (log grpcLogger) Fatalf(format string, args ...any) { 78 log.Errorf(format, args...) 79 os.Exit(1) 80 } 81 func (log grpcLogger) Fatalln(args ...any) { 82 log.Errorln(args...) 83 os.Exit(1) 84 } 85 86 // Treat all gRPC error logs as potential audit events. 87 func (log grpcLogger) Error(args ...any) { 88 log.Logger.AuditErr(fmt.Sprint(args...)) 89 } 90 func (log grpcLogger) Errorf(format string, args ...any) { 91 log.Logger.AuditErrf(format, args...) 92 } 93 func (log grpcLogger) Errorln(args ...any) { 94 log.Logger.AuditErr(fmt.Sprintln(args...)) 95 } 96 97 // Pass through most Warnings, but filter out a few noisy ones. 98 func (log grpcLogger) Warning(args ...any) { 99 log.Logger.Warning(fmt.Sprint(args...)) 100 } 101 func (log grpcLogger) Warningf(format string, args ...any) { 102 log.Logger.Warningf(format, args...) 103 } 104 func (log grpcLogger) Warningln(args ...any) { 105 msg := fmt.Sprintln(args...) 106 // See https://github.com/letsencrypt/boulder/issues/4628 107 if strings.Contains(msg, `ccResolverWrapper: error parsing service config: no JSON service config provided`) { 108 return 109 } 110 // See https://github.com/letsencrypt/boulder/issues/4379 111 if strings.Contains(msg, `Server.processUnaryRPC failed to write status: connection error: desc = "transport is closing"`) { 112 return 113 } 114 // Since we've already formatted the message, just pass through to .Warning() 115 log.Logger.Warning(msg) 116 } 117 118 // Don't log any INFO-level gRPC stuff. In practice this is all noise, like 119 // failed TXT lookups for service discovery (we only use A records). 120 func (log grpcLogger) Info(args ...any) {} 121 func (log grpcLogger) Infof(format string, args ...any) {} 122 func (log grpcLogger) Infoln(args ...any) {} 123 124 // V returns true if the verbosity level l is less than the verbosity we want to 125 // log at. 126 func (log grpcLogger) V(l int) bool { 127 // We always return false. This causes gRPC to not log some things which are 128 // only logged conditionally if the logLevel is set below a certain value. 129 // TODO: Use the wrapped log.Logger.stdoutLevel and log.Logger.syslogLevel 130 // to determine a correct return value here. 131 return false 132 } 133 134 // promLogger implements the promhttp.Logger interface. 135 type promLogger struct { 136 blog.Logger 137 } 138 139 func (log promLogger) Println(args ...any) { 140 log.AuditErr(fmt.Sprint(args...)) 141 } 142 143 type redisLogger struct { 144 blog.Logger 145 } 146 147 func (rl redisLogger) Printf(ctx context.Context, format string, v ...any) { 148 rl.Infof(format, v...) 149 } 150 151 // logWriter implements the io.Writer interface. 152 type logWriter struct { 153 blog.Logger 154 } 155 156 func (lw logWriter) Write(p []byte) (n int, err error) { 157 // Lines received by logWriter will always have a trailing newline. 158 lw.Logger.Info(strings.Trim(string(p), "\n")) 159 return 160 } 161 162 // logOutput implements the log.Logger interface's Output method for use with logr 163 type logOutput struct { 164 blog.Logger 165 } 166 167 func (l logOutput) Output(calldepth int, logline string) error { 168 l.Logger.Info(logline) 169 return nil 170 } 171 172 // StatsAndLogging sets up an AuditLogger, Prometheus Registerer, and 173 // OpenTelemetry tracing. It returns the Registerer and AuditLogger, along 174 // with a graceful shutdown function to be deferred. 175 // 176 // It spawns off an HTTP server on the provided port to report the stats and 177 // provide pprof profiling handlers. 178 // 179 // The constructed AuditLogger as the default logger, and configures the mysql 180 // and grpc packages to use our logger. This must be called before any gRPC code 181 // is called, because gRPC's SetLogger doesn't use any locking. 182 // 183 // This function does not return an error, and will panic on problems. 184 func StatsAndLogging(logConf SyslogConfig, otConf OpenTelemetryConfig, addr string) (prometheus.Registerer, blog.Logger, func(context.Context)) { 185 logger := NewLogger(logConf) 186 187 shutdown := NewOpenTelemetry(otConf, logger) 188 189 return newStatsRegistry(addr, logger), logger, shutdown 190 } 191 192 // NewLogger creates a logger object with the provided settings, sets it as 193 // the global logger, and returns it. 194 // 195 // It also sets the logging systems for various packages we use to go through 196 // the created logger, and sets up a periodic log event for the current timestamp. 197 func NewLogger(logConf SyslogConfig) blog.Logger { 198 var logger blog.Logger 199 if logConf.SyslogLevel >= 0 { 200 syslogger, err := syslog.Dial( 201 "", 202 "", 203 syslog.LOG_INFO, // default, not actually used 204 core.Command()) 205 FailOnError(err, "Could not connect to Syslog") 206 syslogLevel := int(syslog.LOG_INFO) 207 if logConf.SyslogLevel != 0 { 208 syslogLevel = logConf.SyslogLevel 209 } 210 logger, err = blog.New(syslogger, logConf.StdoutLevel, syslogLevel) 211 FailOnError(err, "Could not connect to Syslog") 212 } else { 213 logger = blog.StdoutLogger(logConf.StdoutLevel) 214 } 215 216 _ = blog.Set(logger) 217 _ = mysql.SetLogger(mysqlLogger{logger}) 218 grpclog.SetLoggerV2(grpcLogger{logger}) 219 log.SetOutput(logWriter{logger}) 220 redis.SetLogger(redisLogger{logger}) 221 222 // Periodically log the current timestamp, to ensure syslog timestamps match 223 // Boulder's conception of time. 224 go func() { 225 for { 226 time.Sleep(time.Hour) 227 logger.Info(fmt.Sprintf("time=%s", time.Now().Format(time.RFC3339Nano))) 228 } 229 }() 230 return logger 231 } 232 233 func newVersionCollector() prometheus.Collector { 234 buildTime := core.Unspecified 235 if core.GetBuildTime() != core.Unspecified { 236 // core.BuildTime is set by our Makefile using the shell command 'date 237 // -u' which outputs in a consistent format across all POSIX systems. 238 bt, err := time.Parse(time.UnixDate, core.BuildTime) 239 if err != nil { 240 // Should never happen unless the Makefile is changed. 241 buildTime = "Unparsable" 242 } else { 243 buildTime = bt.Format(time.RFC3339) 244 } 245 } 246 return prometheus.NewGaugeFunc( 247 prometheus.GaugeOpts{ 248 Name: "version", 249 Help: fmt.Sprintf( 250 "A metric with a constant value of '1' labeled by the short commit-id (buildId), build timestamp in RFC3339 format (buildTime), and Go release tag like 'go1.3' (goVersion) from which %s was built.", 251 core.Command(), 252 ), 253 ConstLabels: prometheus.Labels{ 254 "buildId": core.GetBuildID(), 255 "buildTime": buildTime, 256 "goVersion": runtime.Version(), 257 }, 258 }, 259 func() float64 { return 1 }, 260 ) 261 } 262 263 func newStatsRegistry(addr string, logger blog.Logger) prometheus.Registerer { 264 registry := prometheus.NewRegistry() 265 266 if addr == "" { 267 logger.Info("No debug listen address specified") 268 return registry 269 } 270 271 registry.MustRegister(collectors.NewGoCollector()) 272 registry.MustRegister(collectors.NewProcessCollector( 273 collectors.ProcessCollectorOpts{})) 274 registry.MustRegister(newVersionCollector()) 275 registry.MustRegister(version.NewCollector("boulder")) 276 277 mux := http.NewServeMux() 278 // Register the available pprof handlers. These are all registered on 279 // DefaultServeMux just by importing pprof, but since we eschew 280 // DefaultServeMux, we need to explicitly register them on our own mux. 281 mux.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index)) 282 mux.Handle("/debug/pprof/profile", http.HandlerFunc(pprof.Profile)) 283 mux.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol)) 284 mux.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace)) 285 // These handlers are defined in runtime/pprof instead of net/http/pprof, and 286 // have to be accessed through net/http/pprof's Handler func. 287 mux.Handle("/debug/pprof/goroutine", pprof.Handler("goroutine")) 288 mux.Handle("/debug/pprof/block", pprof.Handler("block")) 289 mux.Handle("/debug/pprof/heap", pprof.Handler("heap")) 290 mux.Handle("/debug/pprof/mutex", pprof.Handler("mutex")) 291 mux.Handle("/debug/pprof/threadcreate", pprof.Handler("threadcreate")) 292 293 mux.Handle("/debug/vars", expvar.Handler()) 294 mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{ 295 ErrorLog: promLogger{logger}, 296 })) 297 298 logger.Infof("Debug server listening on %s", addr) 299 300 server := http.Server{ 301 Addr: addr, 302 Handler: mux, 303 ReadTimeout: time.Minute, 304 } 305 go func() { 306 err := server.ListenAndServe() 307 if err != nil { 308 logger.Errf("unable to boot debug server on %s: %v", addr, err) 309 os.Exit(1) 310 } 311 }() 312 return registry 313 } 314 315 // NewOpenTelemetry sets up our OpenTelemetry tracing 316 // It returns a graceful shutdown function to be deferred. 317 func NewOpenTelemetry(config OpenTelemetryConfig, logger blog.Logger) func(ctx context.Context) { 318 otel.SetLogger(stdr.New(logOutput{logger})) 319 otel.SetErrorHandler(otel.ErrorHandlerFunc(func(err error) { logger.Errf("OpenTelemetry error: %v", err) })) 320 321 resources := resource.NewWithAttributes( 322 semconv.SchemaURL, 323 semconv.ServiceName(core.Command()), 324 semconv.ServiceVersion(core.GetBuildID()), 325 semconv.ProcessPID(os.Getpid()), 326 ) 327 328 opts := []trace.TracerProviderOption{ 329 trace.WithResource(resources), 330 // Use a ParentBased sampler to respect the sample decisions on incoming 331 // traces, and TraceIDRatioBased to randomly sample new traces. 332 trace.WithSampler(trace.ParentBased(trace.TraceIDRatioBased(config.SampleRatio))), 333 } 334 335 if config.Endpoint != "" { 336 exporter, err := otlptracegrpc.New( 337 context.Background(), 338 otlptracegrpc.WithInsecure(), 339 otlptracegrpc.WithEndpoint(config.Endpoint)) 340 if err != nil { 341 FailOnError(err, "Could not create OpenTelemetry OTLP exporter") 342 } 343 344 opts = append(opts, trace.WithBatcher(exporter)) 345 } 346 347 tracerProvider := trace.NewTracerProvider(opts...) 348 otel.SetTracerProvider(tracerProvider) 349 otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(propagation.TraceContext{}, propagation.Baggage{})) 350 351 return func(ctx context.Context) { 352 err := tracerProvider.Shutdown(ctx) 353 if err != nil { 354 logger.Errf("Error while shutting down OpenTelemetry: %v", err) 355 } 356 } 357 } 358 359 // AuditPanic catches and logs panics, then exits with exit code 1. 360 // This method should be called in a defer statement as early as possible. 361 func AuditPanic() { 362 err := recover() 363 // No panic, no problem 364 if err == nil { 365 return 366 } 367 // Get the global logger if it's initialized, or create a default one if not. 368 // We could wind up creating a default logger if we panic so early in a process' 369 // lifetime that we haven't yet parsed the config and created a logger. 370 log := blog.Get() 371 // For the special type `failure`, audit log the message and exit quietly 372 fail, ok := err.(failure) 373 if ok { 374 log.AuditErr(fail.msg) 375 } else { 376 // For all other values passed to `panic`, log them and a stack trace 377 log.AuditErrf("Panic caused by err: %s", err) 378 379 log.AuditErrf("Stack Trace (Current goroutine) %s", debug.Stack()) 380 } 381 // Because this function is deferred as early as possible, there's no further defers to run after this one 382 // So it is safe to os.Exit to set the exit code and exit without losing any defers we haven't executed. 383 os.Exit(1) 384 } 385 386 // failure is a sentinel type that `Fail` passes to `panic` so `AuditPanic` can exit 387 // quietly and print the msg. 388 type failure struct { 389 msg string 390 } 391 392 func (f failure) String() string { 393 return f.msg 394 } 395 396 // Fail raises a panic with a special type that causes `AuditPanic` to audit log the provided message 397 // and then exit nonzero (without printing a stack trace). 398 func Fail(msg string) { 399 panic(failure{msg}) 400 } 401 402 // FailOnError calls Fail if the provided error is non-nil. 403 // This is useful for one-line error handling in top-level executables, 404 // but should generally be avoided in libraries. The message argument is optional. 405 func FailOnError(err error, msg string) { 406 if err == nil { 407 return 408 } 409 if msg == "" { 410 Fail(err.Error()) 411 } else { 412 Fail(fmt.Sprintf("%s: %s", msg, err)) 413 } 414 } 415 416 func decodeJSONStrict(in io.Reader, out any) error { 417 decoder := json.NewDecoder(in) 418 decoder.DisallowUnknownFields() 419 420 return decoder.Decode(out) 421 } 422 423 // ReadConfigFile takes a file path as an argument and attempts to 424 // unmarshal the content of the file into a struct containing a 425 // configuration of a boulder component. Any config keys in the JSON 426 // file which do not correspond to expected keys in the config struct 427 // will result in errors. 428 func ReadConfigFile(filename string, out any) error { 429 file, err := os.Open(filename) 430 if err != nil { 431 return err 432 } 433 defer file.Close() 434 435 return decodeJSONStrict(file, out) 436 } 437 438 // ValidateJSONConfig takes a *ConfigValidator and an io.Reader containing a 439 // JSON representation of a config. The JSON data is unmarshaled into the 440 // *ConfigValidator's inner Config and then validated according to the 441 // 'validate' tags for on each field. Callers can use cmd.LookupConfigValidator 442 // to get a *ConfigValidator for a given Boulder component. This is exported for 443 // use in SRE CI tooling. 444 func ValidateJSONConfig(cv *ConfigValidator, in io.Reader) error { 445 if cv == nil { 446 return errors.New("config validator cannot be nil") 447 } 448 449 // Initialize the validator and load any custom tags. 450 validate := validator.New() 451 for tag, v := range cv.Validators { 452 err := validate.RegisterValidation(tag, v) 453 if err != nil { 454 return err 455 } 456 } 457 458 // Register custom types for use with existing validation tags. 459 validate.RegisterCustomTypeFunc(config.DurationCustomTypeFunc, config.Duration{}) 460 461 err := decodeJSONStrict(in, cv.Config) 462 if err != nil { 463 return err 464 } 465 err = validate.Struct(cv.Config) 466 if err != nil { 467 errs, ok := err.(validator.ValidationErrors) 468 if !ok { 469 // This should never happen. 470 return err 471 } 472 if len(errs) > 0 { 473 allErrs := []string{} 474 for _, e := range errs { 475 allErrs = append(allErrs, e.Error()) 476 } 477 return errors.New(strings.Join(allErrs, ", ")) 478 } 479 } 480 return nil 481 } 482 483 // ValidateYAMLConfig takes a *ConfigValidator and an io.Reader containing a 484 // YAML representation of a config. The YAML data is unmarshaled into the 485 // *ConfigValidator's inner Config and then validated according to the 486 // 'validate' tags for on each field. Callers can use cmd.LookupConfigValidator 487 // to get a *ConfigValidator for a given Boulder component. This is exported for 488 // use in SRE CI tooling. 489 func ValidateYAMLConfig(cv *ConfigValidator, in io.Reader) error { 490 if cv == nil { 491 return errors.New("config validator cannot be nil") 492 } 493 494 // Initialize the validator and load any custom tags. 495 validate := validator.New() 496 for tag, v := range cv.Validators { 497 err := validate.RegisterValidation(tag, v) 498 if err != nil { 499 return err 500 } 501 } 502 503 // Register custom types for use with existing validation tags. 504 validate.RegisterCustomTypeFunc(config.DurationCustomTypeFunc, config.Duration{}) 505 506 inBytes, err := io.ReadAll(in) 507 if err != nil { 508 return err 509 } 510 err = strictyaml.Unmarshal(inBytes, cv.Config) 511 if err != nil { 512 return err 513 } 514 err = validate.Struct(cv.Config) 515 if err != nil { 516 errs, ok := err.(validator.ValidationErrors) 517 if !ok { 518 // This should never happen. 519 return err 520 } 521 if len(errs) > 0 { 522 allErrs := []string{} 523 for _, e := range errs { 524 allErrs = append(allErrs, e.Error()) 525 } 526 return errors.New(strings.Join(allErrs, ", ")) 527 } 528 } 529 return nil 530 } 531 532 // VersionString produces a friendly Application version string. 533 func VersionString() string { 534 return fmt.Sprintf("Versions: %s=(%s %s) Golang=(%s) BuildHost=(%s)", core.Command(), core.GetBuildID(), core.GetBuildTime(), runtime.Version(), core.GetBuildHost()) 535 } 536 537 // CatchSignals blocks until a SIGTERM, SIGINT, or SIGHUP is received, then 538 // executes the given callback. The callback should not block, it should simply 539 // signal other goroutines (particularly the main goroutine) to clean themselves 540 // up and exit. This function is intended to be called in its own goroutine, 541 // while the main goroutine waits for an indication that the other goroutines 542 // have exited cleanly. 543 func CatchSignals(callback func()) { 544 WaitForSignal() 545 callback() 546 } 547 548 // WaitForSignal blocks until a SIGTERM, SIGINT, or SIGHUP is received. It then 549 // returns, allowing execution to resume, generally allowing a main() function 550 // to return and trigger and deferred cleanup functions. This function is 551 // intended to be called directly from the main goroutine, while a gRPC or HTTP 552 // server runs in a background goroutine. 553 func WaitForSignal() { 554 sigChan := make(chan os.Signal, 1) 555 signal.Notify(sigChan, syscall.SIGTERM) 556 signal.Notify(sigChan, syscall.SIGINT) 557 signal.Notify(sigChan, syscall.SIGHUP) 558 <-sigChan 559 }