github.com/cosmos/cosmos-sdk@v0.50.10/telemetry/metrics.go (about) 1 package telemetry 2 3 import ( 4 "bytes" 5 "encoding/json" 6 "fmt" 7 "net/http" 8 "time" 9 10 "github.com/hashicorp/go-metrics" 11 "github.com/hashicorp/go-metrics/datadog" 12 metricsprom "github.com/hashicorp/go-metrics/prometheus" 13 "github.com/prometheus/client_golang/prometheus" 14 "github.com/prometheus/common/expfmt" 15 ) 16 17 // globalTelemetryEnabled is a private variable that stores the telemetry enabled state. 18 // It is set on initialization and does not change for the lifetime of the program. 19 var globalTelemetryEnabled bool 20 21 // IsTelemetryEnabled provides controlled access to check if telemetry is enabled. 22 func IsTelemetryEnabled() bool { 23 return globalTelemetryEnabled 24 } 25 26 // globalLabels defines the set of global labels that will be applied to all 27 // metrics emitted using the telemetry package function wrappers. 28 var globalLabels = []metrics.Label{} 29 30 // Metrics supported format types. 31 const ( 32 FormatDefault = "" 33 FormatPrometheus = "prometheus" 34 FormatText = "text" 35 ContentTypeText = `text/plain; version=` + expfmt.TextVersion + `; charset=utf-8` 36 37 MetricSinkInMem = "mem" 38 MetricSinkStatsd = "statsd" 39 MetricSinkDogsStatsd = "dogstatsd" 40 ) 41 42 // DisplayableSink is an interface that defines a method for displaying metrics. 43 type DisplayableSink interface { 44 DisplayMetrics(resp http.ResponseWriter, req *http.Request) (any, error) 45 } 46 47 // Config defines the configuration options for application telemetry. 48 type Config struct { 49 // Prefixed with keys to separate services 50 ServiceName string `mapstructure:"service-name"` 51 52 // Enabled enables the application telemetry functionality. When enabled, 53 // an in-memory sink is also enabled by default. Operators may also enabled 54 // other sinks such as Prometheus. 55 Enabled bool `mapstructure:"enabled"` 56 57 // Enable prefixing gauge values with hostname 58 EnableHostname bool `mapstructure:"enable-hostname"` 59 60 // Enable adding hostname to labels 61 EnableHostnameLabel bool `mapstructure:"enable-hostname-label"` 62 63 // Enable adding service to labels 64 EnableServiceLabel bool `mapstructure:"enable-service-label"` 65 66 // PrometheusRetentionTime, when positive, enables a Prometheus metrics sink. 67 // It defines the retention duration in seconds. 68 PrometheusRetentionTime int64 `mapstructure:"prometheus-retention-time"` 69 70 // GlobalLabels defines a global set of name/value label tuples applied to all 71 // metrics emitted using the wrapper functions defined in telemetry package. 72 // 73 // Example: 74 // [["chain_id", "cosmoshub-1"]] 75 GlobalLabels [][]string `mapstructure:"global-labels"` 76 77 // MetricsSink defines the type of metrics backend to use. 78 MetricsSink string `mapstructure:"metrics-sink" default:"mem"` 79 80 // StatsdAddr defines the address of a statsd server to send metrics to. 81 // Only utilized if MetricsSink is set to "statsd" or "dogstatsd". 82 StatsdAddr string `mapstructure:"statsd-addr"` 83 84 // DatadogHostname defines the hostname to use when emitting metrics to 85 // Datadog. Only utilized if MetricsSink is set to "dogstatsd". 86 DatadogHostname string `mapstructure:"datadog-hostname"` 87 } 88 89 // Metrics defines a wrapper around application telemetry functionality. It allows 90 // metrics to be gathered at any point in time. When creating a Metrics object, 91 // internally, a global metrics is registered with a set of sinks as configured 92 // by the operator. In addition to the sinks, when a process gets a SIGUSR1, a 93 // dump of formatted recent metrics will be sent to STDERR. 94 type Metrics struct { 95 sink metrics.MetricSink 96 prometheusEnabled bool 97 } 98 99 // GatherResponse is the response type of registered metrics 100 type GatherResponse struct { 101 Metrics []byte 102 ContentType string 103 } 104 105 // New creates a new instance of Metrics 106 func New(cfg Config) (_ *Metrics, rerr error) { 107 globalTelemetryEnabled = cfg.Enabled 108 if !cfg.Enabled { 109 return nil, nil 110 } 111 112 if numGlobalLabels := len(cfg.GlobalLabels); numGlobalLabels > 0 { 113 parsedGlobalLabels := make([]metrics.Label, numGlobalLabels) 114 for i, gl := range cfg.GlobalLabels { 115 parsedGlobalLabels[i] = NewLabel(gl[0], gl[1]) 116 } 117 globalLabels = parsedGlobalLabels 118 } 119 120 metricsConf := metrics.DefaultConfig(cfg.ServiceName) 121 metricsConf.EnableHostname = cfg.EnableHostname 122 metricsConf.EnableHostnameLabel = cfg.EnableHostnameLabel 123 124 var ( 125 sink metrics.MetricSink 126 err error 127 ) 128 switch cfg.MetricsSink { 129 case MetricSinkStatsd: 130 sink, err = metrics.NewStatsdSink(cfg.StatsdAddr) 131 case MetricSinkDogsStatsd: 132 sink, err = datadog.NewDogStatsdSink(cfg.StatsdAddr, cfg.DatadogHostname) 133 default: 134 memSink := metrics.NewInmemSink(10*time.Second, time.Minute) 135 sink = memSink 136 inMemSig := metrics.DefaultInmemSignal(memSink) 137 defer func() { 138 if rerr != nil { 139 inMemSig.Stop() 140 } 141 }() 142 } 143 144 if err != nil { 145 return nil, err 146 } 147 148 m := &Metrics{sink: sink} 149 fanout := metrics.FanoutSink{sink} 150 151 if cfg.PrometheusRetentionTime > 0 { 152 m.prometheusEnabled = true 153 prometheusOpts := metricsprom.PrometheusOpts{ 154 Expiration: time.Duration(cfg.PrometheusRetentionTime) * time.Second, 155 } 156 157 promSink, err := metricsprom.NewPrometheusSinkFrom(prometheusOpts) 158 if err != nil { 159 return nil, err 160 } 161 162 fanout = append(fanout, promSink) 163 } 164 165 if _, err := metrics.NewGlobal(metricsConf, fanout); err != nil { 166 return nil, err 167 } 168 169 return m, nil 170 } 171 172 // Gather collects all registered metrics and returns a GatherResponse where the 173 // metrics are encoded depending on the type. Metrics are either encoded via 174 // Prometheus or JSON if in-memory. 175 func (m *Metrics) Gather(format string) (GatherResponse, error) { 176 switch format { 177 case FormatPrometheus: 178 return m.gatherPrometheus() 179 180 case FormatText: 181 return m.gatherGeneric() 182 183 case FormatDefault: 184 return m.gatherGeneric() 185 186 default: 187 return GatherResponse{}, fmt.Errorf("unsupported metrics format: %s", format) 188 } 189 } 190 191 // gatherPrometheus collects Prometheus metrics and returns a GatherResponse. 192 // If Prometheus metrics are not enabled, it returns an error. 193 func (m *Metrics) gatherPrometheus() (GatherResponse, error) { 194 if !m.prometheusEnabled { 195 return GatherResponse{}, fmt.Errorf("prometheus metrics are not enabled") 196 } 197 198 metricsFamilies, err := prometheus.DefaultGatherer.Gather() 199 if err != nil { 200 return GatherResponse{}, fmt.Errorf("failed to gather prometheus metrics: %w", err) 201 } 202 203 buf := &bytes.Buffer{} 204 defer buf.Reset() 205 206 e := expfmt.NewEncoder(buf, expfmt.NewFormat(expfmt.TypeTextPlain)) 207 208 for _, mf := range metricsFamilies { 209 if err := e.Encode(mf); err != nil { 210 return GatherResponse{}, fmt.Errorf("failed to encode prometheus metrics: %w", err) 211 } 212 } 213 214 return GatherResponse{ContentType: ContentTypeText, Metrics: buf.Bytes()}, nil 215 } 216 217 // gatherGeneric collects generic metrics and returns a GatherResponse. 218 func (m *Metrics) gatherGeneric() (GatherResponse, error) { 219 gm, ok := m.sink.(DisplayableSink) 220 if !ok { 221 return GatherResponse{}, fmt.Errorf("non in-memory metrics sink does not support generic format") 222 } 223 224 summary, err := gm.DisplayMetrics(nil, nil) 225 if err != nil { 226 return GatherResponse{}, fmt.Errorf("failed to gather in-memory metrics: %w", err) 227 } 228 229 content, err := json.Marshal(summary) 230 if err != nil { 231 return GatherResponse{}, fmt.Errorf("failed to encode in-memory metrics: %w", err) 232 } 233 234 return GatherResponse{ContentType: "application/json", Metrics: content}, nil 235 }