github.com/cosmos/cosmos-sdk@v0.50.10/telemetry/metrics.go (about)

     1  package telemetry
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"fmt"
     7  	"net/http"
     8  	"time"
     9  
    10  	"github.com/hashicorp/go-metrics"
    11  	"github.com/hashicorp/go-metrics/datadog"
    12  	metricsprom "github.com/hashicorp/go-metrics/prometheus"
    13  	"github.com/prometheus/client_golang/prometheus"
    14  	"github.com/prometheus/common/expfmt"
    15  )
    16  
    17  // globalTelemetryEnabled is a private variable that stores the telemetry enabled state.
    18  // It is set on initialization and does not change for the lifetime of the program.
    19  var globalTelemetryEnabled bool
    20  
    21  // IsTelemetryEnabled provides controlled access to check if telemetry is enabled.
    22  func IsTelemetryEnabled() bool {
    23  	return globalTelemetryEnabled
    24  }
    25  
    26  // globalLabels defines the set of global labels that will be applied to all
    27  // metrics emitted using the telemetry package function wrappers.
    28  var globalLabels = []metrics.Label{}
    29  
    30  // Metrics supported format types.
    31  const (
    32  	FormatDefault    = ""
    33  	FormatPrometheus = "prometheus"
    34  	FormatText       = "text"
    35  	ContentTypeText  = `text/plain; version=` + expfmt.TextVersion + `; charset=utf-8`
    36  
    37  	MetricSinkInMem      = "mem"
    38  	MetricSinkStatsd     = "statsd"
    39  	MetricSinkDogsStatsd = "dogstatsd"
    40  )
    41  
    42  // DisplayableSink is an interface that defines a method for displaying metrics.
    43  type DisplayableSink interface {
    44  	DisplayMetrics(resp http.ResponseWriter, req *http.Request) (any, error)
    45  }
    46  
    47  // Config defines the configuration options for application telemetry.
    48  type Config struct {
    49  	// Prefixed with keys to separate services
    50  	ServiceName string `mapstructure:"service-name"`
    51  
    52  	// Enabled enables the application telemetry functionality. When enabled,
    53  	// an in-memory sink is also enabled by default. Operators may also enabled
    54  	// other sinks such as Prometheus.
    55  	Enabled bool `mapstructure:"enabled"`
    56  
    57  	// Enable prefixing gauge values with hostname
    58  	EnableHostname bool `mapstructure:"enable-hostname"`
    59  
    60  	// Enable adding hostname to labels
    61  	EnableHostnameLabel bool `mapstructure:"enable-hostname-label"`
    62  
    63  	// Enable adding service to labels
    64  	EnableServiceLabel bool `mapstructure:"enable-service-label"`
    65  
    66  	// PrometheusRetentionTime, when positive, enables a Prometheus metrics sink.
    67  	// It defines the retention duration in seconds.
    68  	PrometheusRetentionTime int64 `mapstructure:"prometheus-retention-time"`
    69  
    70  	// GlobalLabels defines a global set of name/value label tuples applied to all
    71  	// metrics emitted using the wrapper functions defined in telemetry package.
    72  	//
    73  	// Example:
    74  	// [["chain_id", "cosmoshub-1"]]
    75  	GlobalLabels [][]string `mapstructure:"global-labels"`
    76  
    77  	// MetricsSink defines the type of metrics backend to use.
    78  	MetricsSink string `mapstructure:"metrics-sink" default:"mem"`
    79  
    80  	// StatsdAddr defines the address of a statsd server to send metrics to.
    81  	// Only utilized if MetricsSink is set to "statsd" or "dogstatsd".
    82  	StatsdAddr string `mapstructure:"statsd-addr"`
    83  
    84  	// DatadogHostname defines the hostname to use when emitting metrics to
    85  	// Datadog. Only utilized if MetricsSink is set to "dogstatsd".
    86  	DatadogHostname string `mapstructure:"datadog-hostname"`
    87  }
    88  
    89  // Metrics defines a wrapper around application telemetry functionality. It allows
    90  // metrics to be gathered at any point in time. When creating a Metrics object,
    91  // internally, a global metrics is registered with a set of sinks as configured
    92  // by the operator. In addition to the sinks, when a process gets a SIGUSR1, a
    93  // dump of formatted recent metrics will be sent to STDERR.
    94  type Metrics struct {
    95  	sink              metrics.MetricSink
    96  	prometheusEnabled bool
    97  }
    98  
    99  // GatherResponse is the response type of registered metrics
   100  type GatherResponse struct {
   101  	Metrics     []byte
   102  	ContentType string
   103  }
   104  
   105  // New creates a new instance of Metrics
   106  func New(cfg Config) (_ *Metrics, rerr error) {
   107  	globalTelemetryEnabled = cfg.Enabled
   108  	if !cfg.Enabled {
   109  		return nil, nil
   110  	}
   111  
   112  	if numGlobalLabels := len(cfg.GlobalLabels); numGlobalLabels > 0 {
   113  		parsedGlobalLabels := make([]metrics.Label, numGlobalLabels)
   114  		for i, gl := range cfg.GlobalLabels {
   115  			parsedGlobalLabels[i] = NewLabel(gl[0], gl[1])
   116  		}
   117  		globalLabels = parsedGlobalLabels
   118  	}
   119  
   120  	metricsConf := metrics.DefaultConfig(cfg.ServiceName)
   121  	metricsConf.EnableHostname = cfg.EnableHostname
   122  	metricsConf.EnableHostnameLabel = cfg.EnableHostnameLabel
   123  
   124  	var (
   125  		sink metrics.MetricSink
   126  		err  error
   127  	)
   128  	switch cfg.MetricsSink {
   129  	case MetricSinkStatsd:
   130  		sink, err = metrics.NewStatsdSink(cfg.StatsdAddr)
   131  	case MetricSinkDogsStatsd:
   132  		sink, err = datadog.NewDogStatsdSink(cfg.StatsdAddr, cfg.DatadogHostname)
   133  	default:
   134  		memSink := metrics.NewInmemSink(10*time.Second, time.Minute)
   135  		sink = memSink
   136  		inMemSig := metrics.DefaultInmemSignal(memSink)
   137  		defer func() {
   138  			if rerr != nil {
   139  				inMemSig.Stop()
   140  			}
   141  		}()
   142  	}
   143  
   144  	if err != nil {
   145  		return nil, err
   146  	}
   147  
   148  	m := &Metrics{sink: sink}
   149  	fanout := metrics.FanoutSink{sink}
   150  
   151  	if cfg.PrometheusRetentionTime > 0 {
   152  		m.prometheusEnabled = true
   153  		prometheusOpts := metricsprom.PrometheusOpts{
   154  			Expiration: time.Duration(cfg.PrometheusRetentionTime) * time.Second,
   155  		}
   156  
   157  		promSink, err := metricsprom.NewPrometheusSinkFrom(prometheusOpts)
   158  		if err != nil {
   159  			return nil, err
   160  		}
   161  
   162  		fanout = append(fanout, promSink)
   163  	}
   164  
   165  	if _, err := metrics.NewGlobal(metricsConf, fanout); err != nil {
   166  		return nil, err
   167  	}
   168  
   169  	return m, nil
   170  }
   171  
   172  // Gather collects all registered metrics and returns a GatherResponse where the
   173  // metrics are encoded depending on the type. Metrics are either encoded via
   174  // Prometheus or JSON if in-memory.
   175  func (m *Metrics) Gather(format string) (GatherResponse, error) {
   176  	switch format {
   177  	case FormatPrometheus:
   178  		return m.gatherPrometheus()
   179  
   180  	case FormatText:
   181  		return m.gatherGeneric()
   182  
   183  	case FormatDefault:
   184  		return m.gatherGeneric()
   185  
   186  	default:
   187  		return GatherResponse{}, fmt.Errorf("unsupported metrics format: %s", format)
   188  	}
   189  }
   190  
   191  // gatherPrometheus collects Prometheus metrics and returns a GatherResponse.
   192  // If Prometheus metrics are not enabled, it returns an error.
   193  func (m *Metrics) gatherPrometheus() (GatherResponse, error) {
   194  	if !m.prometheusEnabled {
   195  		return GatherResponse{}, fmt.Errorf("prometheus metrics are not enabled")
   196  	}
   197  
   198  	metricsFamilies, err := prometheus.DefaultGatherer.Gather()
   199  	if err != nil {
   200  		return GatherResponse{}, fmt.Errorf("failed to gather prometheus metrics: %w", err)
   201  	}
   202  
   203  	buf := &bytes.Buffer{}
   204  	defer buf.Reset()
   205  
   206  	e := expfmt.NewEncoder(buf, expfmt.NewFormat(expfmt.TypeTextPlain))
   207  
   208  	for _, mf := range metricsFamilies {
   209  		if err := e.Encode(mf); err != nil {
   210  			return GatherResponse{}, fmt.Errorf("failed to encode prometheus metrics: %w", err)
   211  		}
   212  	}
   213  
   214  	return GatherResponse{ContentType: ContentTypeText, Metrics: buf.Bytes()}, nil
   215  }
   216  
   217  // gatherGeneric collects generic metrics and returns a GatherResponse.
   218  func (m *Metrics) gatherGeneric() (GatherResponse, error) {
   219  	gm, ok := m.sink.(DisplayableSink)
   220  	if !ok {
   221  		return GatherResponse{}, fmt.Errorf("non in-memory metrics sink does not support generic format")
   222  	}
   223  
   224  	summary, err := gm.DisplayMetrics(nil, nil)
   225  	if err != nil {
   226  		return GatherResponse{}, fmt.Errorf("failed to gather in-memory metrics: %w", err)
   227  	}
   228  
   229  	content, err := json.Marshal(summary)
   230  	if err != nil {
   231  		return GatherResponse{}, fmt.Errorf("failed to encode in-memory metrics: %w", err)
   232  	}
   233  
   234  	return GatherResponse{ContentType: "application/json", Metrics: content}, nil
   235  }