github.com/letsencrypt/boulder@v0.20251208.0/cmd/shell.go (about)

     1  // Package cmd provides utilities that underlie the specific commands.
     2  package cmd
     3  
     4  import (
     5  	"context"
     6  	"encoding/json"
     7  	"errors"
     8  	"expvar"
     9  	"fmt"
    10  	"io"
    11  	"log"
    12  	"log/syslog"
    13  	"net/http"
    14  	"net/http/pprof"
    15  	"os"
    16  	"os/signal"
    17  	"runtime"
    18  	"runtime/debug"
    19  	"strings"
    20  	"syscall"
    21  	"time"
    22  
    23  	"github.com/go-logr/stdr"
    24  	"github.com/go-sql-driver/mysql"
    25  	"github.com/prometheus/client_golang/prometheus"
    26  	"github.com/prometheus/client_golang/prometheus/collectors"
    27  	"github.com/prometheus/client_golang/prometheus/collectors/version"
    28  	"github.com/prometheus/client_golang/prometheus/promhttp"
    29  	"github.com/redis/go-redis/v9"
    30  	"go.opentelemetry.io/otel"
    31  	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
    32  	"go.opentelemetry.io/otel/propagation"
    33  	"go.opentelemetry.io/otel/sdk/resource"
    34  	"go.opentelemetry.io/otel/sdk/trace"
    35  	semconv "go.opentelemetry.io/otel/semconv/v1.30.0"
    36  	"google.golang.org/grpc/grpclog"
    37  
    38  	"github.com/letsencrypt/boulder/config"
    39  	"github.com/letsencrypt/boulder/core"
    40  	blog "github.com/letsencrypt/boulder/log"
    41  	"github.com/letsencrypt/boulder/strictyaml"
    42  	"github.com/letsencrypt/validator/v10"
    43  )
    44  
    45  // Because we don't know when this init will be called with respect to
    46  // flag.Parse() and other flag definitions, we can't rely on the regular
    47  // flag mechanism. But this one is fine.
    48  func init() {
    49  	for _, v := range os.Args {
    50  		if v == "--version" || v == "-version" {
    51  			fmt.Println(VersionString())
    52  			os.Exit(0)
    53  		}
    54  	}
    55  }
    56  
    57  // mysqlLogger implements the mysql.Logger interface.
    58  type mysqlLogger struct {
    59  	blog.Logger
    60  }
    61  
    62  func (m mysqlLogger) Print(v ...any) {
    63  	m.AuditErrf("[mysql] %s", fmt.Sprint(v...))
    64  }
    65  
    66  // grpcLogger implements the grpclog.LoggerV2 interface.
    67  type grpcLogger struct {
    68  	blog.Logger
    69  }
    70  
    71  // Ensure that fatal logs exit, because we use neither the gRPC default logger
    72  // nor the stdlib default logger, both of which would call os.Exit(1) for us.
    73  func (log grpcLogger) Fatal(args ...any) {
    74  	log.Error(args...)
    75  	os.Exit(1)
    76  }
    77  func (log grpcLogger) Fatalf(format string, args ...any) {
    78  	log.Errorf(format, args...)
    79  	os.Exit(1)
    80  }
    81  func (log grpcLogger) Fatalln(args ...any) {
    82  	log.Errorln(args...)
    83  	os.Exit(1)
    84  }
    85  
    86  // Treat all gRPC error logs as potential audit events.
    87  func (log grpcLogger) Error(args ...any) {
    88  	log.Logger.AuditErr(fmt.Sprint(args...))
    89  }
    90  func (log grpcLogger) Errorf(format string, args ...any) {
    91  	log.Logger.AuditErrf(format, args...)
    92  }
    93  func (log grpcLogger) Errorln(args ...any) {
    94  	log.Logger.AuditErr(fmt.Sprintln(args...))
    95  }
    96  
    97  // Pass through most Warnings, but filter out a few noisy ones.
    98  func (log grpcLogger) Warning(args ...any) {
    99  	log.Logger.Warning(fmt.Sprint(args...))
   100  }
   101  func (log grpcLogger) Warningf(format string, args ...any) {
   102  	log.Logger.Warningf(format, args...)
   103  }
   104  func (log grpcLogger) Warningln(args ...any) {
   105  	msg := fmt.Sprintln(args...)
   106  	// See https://github.com/letsencrypt/boulder/issues/4628
   107  	if strings.Contains(msg, `ccResolverWrapper: error parsing service config: no JSON service config provided`) {
   108  		return
   109  	}
   110  	// See https://github.com/letsencrypt/boulder/issues/4379
   111  	if strings.Contains(msg, `Server.processUnaryRPC failed to write status: connection error: desc = "transport is closing"`) {
   112  		return
   113  	}
   114  	// Since we've already formatted the message, just pass through to .Warning()
   115  	log.Logger.Warning(msg)
   116  }
   117  
   118  // Don't log any INFO-level gRPC stuff. In practice this is all noise, like
   119  // failed TXT lookups for service discovery (we only use A records).
   120  func (log grpcLogger) Info(args ...any)                 {}
   121  func (log grpcLogger) Infof(format string, args ...any) {}
   122  func (log grpcLogger) Infoln(args ...any)               {}
   123  
   124  // V returns true if the verbosity level l is less than the verbosity we want to
   125  // log at.
   126  func (log grpcLogger) V(l int) bool {
   127  	// We always return false. This causes gRPC to not log some things which are
   128  	// only logged conditionally if the logLevel is set below a certain value.
   129  	// TODO: Use the wrapped log.Logger.stdoutLevel and log.Logger.syslogLevel
   130  	// to determine a correct return value here.
   131  	return false
   132  }
   133  
   134  // promLogger implements the promhttp.Logger interface.
   135  type promLogger struct {
   136  	blog.Logger
   137  }
   138  
   139  func (log promLogger) Println(args ...any) {
   140  	log.AuditErr(fmt.Sprint(args...))
   141  }
   142  
   143  type redisLogger struct {
   144  	blog.Logger
   145  }
   146  
   147  func (rl redisLogger) Printf(ctx context.Context, format string, v ...any) {
   148  	rl.Infof(format, v...)
   149  }
   150  
   151  // logWriter implements the io.Writer interface.
   152  type logWriter struct {
   153  	blog.Logger
   154  }
   155  
   156  func (lw logWriter) Write(p []byte) (n int, err error) {
   157  	// Lines received by logWriter will always have a trailing newline.
   158  	lw.Logger.Info(strings.Trim(string(p), "\n"))
   159  	return
   160  }
   161  
   162  // logOutput implements the log.Logger interface's Output method for use with logr
   163  type logOutput struct {
   164  	blog.Logger
   165  }
   166  
   167  func (l logOutput) Output(calldepth int, logline string) error {
   168  	l.Logger.Info(logline)
   169  	return nil
   170  }
   171  
   172  // StatsAndLogging sets up an AuditLogger, Prometheus Registerer, and
   173  // OpenTelemetry tracing.  It returns the Registerer and AuditLogger, along
   174  // with a graceful shutdown function to be deferred.
   175  //
   176  // It spawns off an HTTP server on the provided port to report the stats and
   177  // provide pprof profiling handlers.
   178  //
   179  // The constructed AuditLogger as the default logger, and configures the mysql
   180  // and grpc packages to use our logger. This must be called before any gRPC code
   181  // is called, because gRPC's SetLogger doesn't use any locking.
   182  //
   183  // This function does not return an error, and will panic on problems.
   184  func StatsAndLogging(logConf SyslogConfig, otConf OpenTelemetryConfig, addr string) (prometheus.Registerer, blog.Logger, func(context.Context)) {
   185  	logger := NewLogger(logConf)
   186  
   187  	shutdown := NewOpenTelemetry(otConf, logger)
   188  
   189  	return newStatsRegistry(addr, logger), logger, shutdown
   190  }
   191  
   192  // NewLogger creates a logger object with the provided settings, sets it as
   193  // the global logger, and returns it.
   194  //
   195  // It also sets the logging systems for various packages we use to go through
   196  // the created logger, and sets up a periodic log event for the current timestamp.
   197  func NewLogger(logConf SyslogConfig) blog.Logger {
   198  	var logger blog.Logger
   199  	if logConf.SyslogLevel >= 0 {
   200  		syslogger, err := syslog.Dial(
   201  			"",
   202  			"",
   203  			syslog.LOG_INFO, // default, not actually used
   204  			core.Command())
   205  		FailOnError(err, "Could not connect to Syslog")
   206  		syslogLevel := int(syslog.LOG_INFO)
   207  		if logConf.SyslogLevel != 0 {
   208  			syslogLevel = logConf.SyslogLevel
   209  		}
   210  		logger, err = blog.New(syslogger, logConf.StdoutLevel, syslogLevel)
   211  		FailOnError(err, "Could not connect to Syslog")
   212  	} else {
   213  		logger = blog.StdoutLogger(logConf.StdoutLevel)
   214  	}
   215  
   216  	_ = blog.Set(logger)
   217  	_ = mysql.SetLogger(mysqlLogger{logger})
   218  	grpclog.SetLoggerV2(grpcLogger{logger})
   219  	log.SetOutput(logWriter{logger})
   220  	redis.SetLogger(redisLogger{logger})
   221  
   222  	// Periodically log the current timestamp, to ensure syslog timestamps match
   223  	// Boulder's conception of time.
   224  	go func() {
   225  		for {
   226  			time.Sleep(time.Hour)
   227  			logger.Info(fmt.Sprintf("time=%s", time.Now().Format(time.RFC3339Nano)))
   228  		}
   229  	}()
   230  	return logger
   231  }
   232  
   233  func newVersionCollector() prometheus.Collector {
   234  	buildTime := core.Unspecified
   235  	if core.GetBuildTime() != core.Unspecified {
   236  		// core.BuildTime is set by our Makefile using the shell command 'date
   237  		// -u' which outputs in a consistent format across all POSIX systems.
   238  		bt, err := time.Parse(time.UnixDate, core.BuildTime)
   239  		if err != nil {
   240  			// Should never happen unless the Makefile is changed.
   241  			buildTime = "Unparsable"
   242  		} else {
   243  			buildTime = bt.Format(time.RFC3339)
   244  		}
   245  	}
   246  	return prometheus.NewGaugeFunc(
   247  		prometheus.GaugeOpts{
   248  			Name: "version",
   249  			Help: fmt.Sprintf(
   250  				"A metric with a constant value of '1' labeled by the short commit-id (buildId), build timestamp in RFC3339 format (buildTime), and Go release tag like 'go1.3' (goVersion) from which %s was built.",
   251  				core.Command(),
   252  			),
   253  			ConstLabels: prometheus.Labels{
   254  				"buildId":   core.GetBuildID(),
   255  				"buildTime": buildTime,
   256  				"goVersion": runtime.Version(),
   257  			},
   258  		},
   259  		func() float64 { return 1 },
   260  	)
   261  }
   262  
   263  func newStatsRegistry(addr string, logger blog.Logger) prometheus.Registerer {
   264  	registry := prometheus.NewRegistry()
   265  
   266  	if addr == "" {
   267  		logger.Info("No debug listen address specified")
   268  		return registry
   269  	}
   270  
   271  	registry.MustRegister(collectors.NewGoCollector())
   272  	registry.MustRegister(collectors.NewProcessCollector(
   273  		collectors.ProcessCollectorOpts{}))
   274  	registry.MustRegister(newVersionCollector())
   275  	registry.MustRegister(version.NewCollector("boulder"))
   276  
   277  	mux := http.NewServeMux()
   278  	// Register the available pprof handlers. These are all registered on
   279  	// DefaultServeMux just by importing pprof, but since we eschew
   280  	// DefaultServeMux, we need to explicitly register them on our own mux.
   281  	mux.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index))
   282  	mux.Handle("/debug/pprof/profile", http.HandlerFunc(pprof.Profile))
   283  	mux.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol))
   284  	mux.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
   285  	// These handlers are defined in runtime/pprof instead of net/http/pprof, and
   286  	// have to be accessed through net/http/pprof's Handler func.
   287  	mux.Handle("/debug/pprof/goroutine", pprof.Handler("goroutine"))
   288  	mux.Handle("/debug/pprof/block", pprof.Handler("block"))
   289  	mux.Handle("/debug/pprof/heap", pprof.Handler("heap"))
   290  	mux.Handle("/debug/pprof/mutex", pprof.Handler("mutex"))
   291  	mux.Handle("/debug/pprof/threadcreate", pprof.Handler("threadcreate"))
   292  
   293  	mux.Handle("/debug/vars", expvar.Handler())
   294  	mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{
   295  		ErrorLog: promLogger{logger},
   296  	}))
   297  
   298  	logger.Infof("Debug server listening on %s", addr)
   299  
   300  	server := http.Server{
   301  		Addr:        addr,
   302  		Handler:     mux,
   303  		ReadTimeout: time.Minute,
   304  	}
   305  	go func() {
   306  		err := server.ListenAndServe()
   307  		if err != nil {
   308  			logger.Errf("unable to boot debug server on %s: %v", addr, err)
   309  			os.Exit(1)
   310  		}
   311  	}()
   312  	return registry
   313  }
   314  
   315  // NewOpenTelemetry sets up our OpenTelemetry tracing
   316  // It returns a graceful shutdown function to be deferred.
   317  func NewOpenTelemetry(config OpenTelemetryConfig, logger blog.Logger) func(ctx context.Context) {
   318  	otel.SetLogger(stdr.New(logOutput{logger}))
   319  	otel.SetErrorHandler(otel.ErrorHandlerFunc(func(err error) { logger.Errf("OpenTelemetry error: %v", err) }))
   320  
   321  	resources := resource.NewWithAttributes(
   322  		semconv.SchemaURL,
   323  		semconv.ServiceName(core.Command()),
   324  		semconv.ServiceVersion(core.GetBuildID()),
   325  		semconv.ProcessPID(os.Getpid()),
   326  	)
   327  
   328  	opts := []trace.TracerProviderOption{
   329  		trace.WithResource(resources),
   330  		// Use a ParentBased sampler to respect the sample decisions on incoming
   331  		// traces, and TraceIDRatioBased to randomly sample new traces.
   332  		trace.WithSampler(trace.ParentBased(trace.TraceIDRatioBased(config.SampleRatio))),
   333  	}
   334  
   335  	if config.Endpoint != "" {
   336  		exporter, err := otlptracegrpc.New(
   337  			context.Background(),
   338  			otlptracegrpc.WithInsecure(),
   339  			otlptracegrpc.WithEndpoint(config.Endpoint))
   340  		if err != nil {
   341  			FailOnError(err, "Could not create OpenTelemetry OTLP exporter")
   342  		}
   343  
   344  		opts = append(opts, trace.WithBatcher(exporter))
   345  	}
   346  
   347  	tracerProvider := trace.NewTracerProvider(opts...)
   348  	otel.SetTracerProvider(tracerProvider)
   349  	otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(propagation.TraceContext{}, propagation.Baggage{}))
   350  
   351  	return func(ctx context.Context) {
   352  		err := tracerProvider.Shutdown(ctx)
   353  		if err != nil {
   354  			logger.Errf("Error while shutting down OpenTelemetry: %v", err)
   355  		}
   356  	}
   357  }
   358  
   359  // AuditPanic catches and logs panics, then exits with exit code 1.
   360  // This method should be called in a defer statement as early as possible.
   361  func AuditPanic() {
   362  	err := recover()
   363  	// No panic, no problem
   364  	if err == nil {
   365  		return
   366  	}
   367  	// Get the global logger if it's initialized, or create a default one if not.
   368  	// We could wind up creating a default logger if we panic so early in a process'
   369  	// lifetime that we haven't yet parsed the config and created a logger.
   370  	log := blog.Get()
   371  	// For the special type `failure`, audit log the message and exit quietly
   372  	fail, ok := err.(failure)
   373  	if ok {
   374  		log.AuditErr(fail.msg)
   375  	} else {
   376  		// For all other values passed to `panic`, log them and a stack trace
   377  		log.AuditErrf("Panic caused by err: %s", err)
   378  
   379  		log.AuditErrf("Stack Trace (Current goroutine) %s", debug.Stack())
   380  	}
   381  	// Because this function is deferred as early as possible, there's no further defers to run after this one
   382  	// So it is safe to os.Exit to set the exit code and exit without losing any defers we haven't executed.
   383  	os.Exit(1)
   384  }
   385  
   386  // failure is a sentinel type that `Fail` passes to `panic` so `AuditPanic` can exit
   387  // quietly and print the msg.
   388  type failure struct {
   389  	msg string
   390  }
   391  
   392  func (f failure) String() string {
   393  	return f.msg
   394  }
   395  
   396  // Fail raises a panic with a special type that causes `AuditPanic` to audit log the provided message
   397  // and then exit nonzero (without printing a stack trace).
   398  func Fail(msg string) {
   399  	panic(failure{msg})
   400  }
   401  
   402  // FailOnError calls Fail if the provided error is non-nil.
   403  // This is useful for one-line error handling in top-level executables,
   404  // but should generally be avoided in libraries. The message argument is optional.
   405  func FailOnError(err error, msg string) {
   406  	if err == nil {
   407  		return
   408  	}
   409  	if msg == "" {
   410  		Fail(err.Error())
   411  	} else {
   412  		Fail(fmt.Sprintf("%s: %s", msg, err))
   413  	}
   414  }
   415  
   416  func decodeJSONStrict(in io.Reader, out any) error {
   417  	decoder := json.NewDecoder(in)
   418  	decoder.DisallowUnknownFields()
   419  
   420  	return decoder.Decode(out)
   421  }
   422  
   423  // ReadConfigFile takes a file path as an argument and attempts to
   424  // unmarshal the content of the file into a struct containing a
   425  // configuration of a boulder component. Any config keys in the JSON
   426  // file which do not correspond to expected keys in the config struct
   427  // will result in errors.
   428  func ReadConfigFile(filename string, out any) error {
   429  	file, err := os.Open(filename)
   430  	if err != nil {
   431  		return err
   432  	}
   433  	defer file.Close()
   434  
   435  	return decodeJSONStrict(file, out)
   436  }
   437  
   438  // ValidateJSONConfig takes a *ConfigValidator and an io.Reader containing a
   439  // JSON representation of a config. The JSON data is unmarshaled into the
   440  // *ConfigValidator's inner Config and then validated according to the
   441  // 'validate' tags for on each field. Callers can use cmd.LookupConfigValidator
   442  // to get a *ConfigValidator for a given Boulder component. This is exported for
   443  // use in SRE CI tooling.
   444  func ValidateJSONConfig(cv *ConfigValidator, in io.Reader) error {
   445  	if cv == nil {
   446  		return errors.New("config validator cannot be nil")
   447  	}
   448  
   449  	// Initialize the validator and load any custom tags.
   450  	validate := validator.New()
   451  	for tag, v := range cv.Validators {
   452  		err := validate.RegisterValidation(tag, v)
   453  		if err != nil {
   454  			return err
   455  		}
   456  	}
   457  
   458  	// Register custom types for use with existing validation tags.
   459  	validate.RegisterCustomTypeFunc(config.DurationCustomTypeFunc, config.Duration{})
   460  
   461  	err := decodeJSONStrict(in, cv.Config)
   462  	if err != nil {
   463  		return err
   464  	}
   465  	err = validate.Struct(cv.Config)
   466  	if err != nil {
   467  		errs, ok := err.(validator.ValidationErrors)
   468  		if !ok {
   469  			// This should never happen.
   470  			return err
   471  		}
   472  		if len(errs) > 0 {
   473  			allErrs := []string{}
   474  			for _, e := range errs {
   475  				allErrs = append(allErrs, e.Error())
   476  			}
   477  			return errors.New(strings.Join(allErrs, ", "))
   478  		}
   479  	}
   480  	return nil
   481  }
   482  
   483  // ValidateYAMLConfig takes a *ConfigValidator and an io.Reader containing a
   484  // YAML representation of a config. The YAML data is unmarshaled into the
   485  // *ConfigValidator's inner Config and then validated according to the
   486  // 'validate' tags for on each field. Callers can use cmd.LookupConfigValidator
   487  // to get a *ConfigValidator for a given Boulder component. This is exported for
   488  // use in SRE CI tooling.
   489  func ValidateYAMLConfig(cv *ConfigValidator, in io.Reader) error {
   490  	if cv == nil {
   491  		return errors.New("config validator cannot be nil")
   492  	}
   493  
   494  	// Initialize the validator and load any custom tags.
   495  	validate := validator.New()
   496  	for tag, v := range cv.Validators {
   497  		err := validate.RegisterValidation(tag, v)
   498  		if err != nil {
   499  			return err
   500  		}
   501  	}
   502  
   503  	// Register custom types for use with existing validation tags.
   504  	validate.RegisterCustomTypeFunc(config.DurationCustomTypeFunc, config.Duration{})
   505  
   506  	inBytes, err := io.ReadAll(in)
   507  	if err != nil {
   508  		return err
   509  	}
   510  	err = strictyaml.Unmarshal(inBytes, cv.Config)
   511  	if err != nil {
   512  		return err
   513  	}
   514  	err = validate.Struct(cv.Config)
   515  	if err != nil {
   516  		errs, ok := err.(validator.ValidationErrors)
   517  		if !ok {
   518  			// This should never happen.
   519  			return err
   520  		}
   521  		if len(errs) > 0 {
   522  			allErrs := []string{}
   523  			for _, e := range errs {
   524  				allErrs = append(allErrs, e.Error())
   525  			}
   526  			return errors.New(strings.Join(allErrs, ", "))
   527  		}
   528  	}
   529  	return nil
   530  }
   531  
   532  // VersionString produces a friendly Application version string.
   533  func VersionString() string {
   534  	return fmt.Sprintf("Versions: %s=(%s %s) Golang=(%s) BuildHost=(%s)", core.Command(), core.GetBuildID(), core.GetBuildTime(), runtime.Version(), core.GetBuildHost())
   535  }
   536  
   537  // CatchSignals blocks until a SIGTERM, SIGINT, or SIGHUP is received, then
   538  // executes the given callback. The callback should not block, it should simply
   539  // signal other goroutines (particularly the main goroutine) to clean themselves
   540  // up and exit. This function is intended to be called in its own goroutine,
   541  // while the main goroutine waits for an indication that the other goroutines
   542  // have exited cleanly.
   543  func CatchSignals(callback func()) {
   544  	WaitForSignal()
   545  	callback()
   546  }
   547  
   548  // WaitForSignal blocks until a SIGTERM, SIGINT, or SIGHUP is received. It then
   549  // returns, allowing execution to resume, generally allowing a main() function
   550  // to return and trigger and deferred cleanup functions. This function is
   551  // intended to be called directly from the main goroutine, while a gRPC or HTTP
   552  // server runs in a background goroutine.
   553  func WaitForSignal() {
   554  	sigChan := make(chan os.Signal, 1)
   555  	signal.Notify(sigChan, syscall.SIGTERM)
   556  	signal.Notify(sigChan, syscall.SIGINT)
   557  	signal.Notify(sigChan, syscall.SIGHUP)
   558  	<-sigChan
   559  }