github.com/hashicorp/go-metrics@v0.5.3/prometheus/prometheus.go (about) 1 //go:build go1.9 2 // +build go1.9 3 4 package prometheus 5 6 import ( 7 "fmt" 8 "log" 9 "strings" 10 "sync" 11 "time" 12 13 "github.com/hashicorp/go-metrics" 14 "github.com/prometheus/client_golang/prometheus" 15 "github.com/prometheus/client_golang/prometheus/push" 16 ) 17 18 var ( 19 // DefaultPrometheusOpts is the default set of options used when creating a 20 // PrometheusSink. 21 DefaultPrometheusOpts = PrometheusOpts{ 22 Expiration: 60 * time.Second, 23 Name: "default_prometheus_sink", 24 } 25 ) 26 27 // PrometheusOpts is used to configure the Prometheus Sink 28 type PrometheusOpts struct { 29 // Expiration is the duration a metric is valid for, after which it will be 30 // untracked. If the value is zero, a metric is never expired. 31 Expiration time.Duration 32 Registerer prometheus.Registerer 33 34 // Gauges, Summaries, and Counters allow us to pre-declare metrics by giving 35 // their Name, Help, and ConstLabels to the PrometheusSink when it is created. 36 // Metrics declared in this way will be initialized at zero and will not be 37 // deleted or altered when their expiry is reached. 38 // 39 // Ex: PrometheusOpts{ 40 // Expiration: 10 * time.Second, 41 // Gauges: []GaugeDefinition{ 42 // { 43 // Name: []string{ "application", "component", "measurement"}, 44 // Help: "application_component_measurement provides an example of how to declare static metrics", 45 // ConstLabels: []metrics.Label{ { Name: "my_label", Value: "does_not_change" }, }, 46 // }, 47 // }, 48 // } 49 GaugeDefinitions []GaugeDefinition 50 SummaryDefinitions []SummaryDefinition 51 CounterDefinitions []CounterDefinition 52 Name string 53 } 54 55 type PrometheusSink struct { 56 // If these will ever be copied, they should be converted to *sync.Map values and initialized appropriately 57 gauges sync.Map 58 summaries sync.Map 59 counters sync.Map 60 expiration time.Duration 61 help map[string]string 62 name string 63 } 64 65 // GaugeDefinition can be provided to PrometheusOpts to declare a constant gauge that is not deleted on expiry. 66 type GaugeDefinition struct { 67 Name []string 68 ConstLabels []metrics.Label 69 Help string 70 } 71 72 type gauge struct { 73 prometheus.Gauge 74 updatedAt time.Time 75 // canDelete is set if the metric is created during runtime so we know it's ephemeral and can delete it on expiry. 76 canDelete bool 77 } 78 79 // SummaryDefinition can be provided to PrometheusOpts to declare a constant summary that is not deleted on expiry. 80 type SummaryDefinition struct { 81 Name []string 82 ConstLabels []metrics.Label 83 Help string 84 } 85 86 type summary struct { 87 prometheus.Summary 88 updatedAt time.Time 89 canDelete bool 90 } 91 92 // CounterDefinition can be provided to PrometheusOpts to declare a constant counter that is not deleted on expiry. 93 type CounterDefinition struct { 94 Name []string 95 ConstLabels []metrics.Label 96 Help string 97 } 98 99 type counter struct { 100 prometheus.Counter 101 updatedAt time.Time 102 canDelete bool 103 } 104 105 // NewPrometheusSink creates a new PrometheusSink using the default options. 106 func NewPrometheusSink() (*PrometheusSink, error) { 107 return NewPrometheusSinkFrom(DefaultPrometheusOpts) 108 } 109 110 // NewPrometheusSinkFrom creates a new PrometheusSink using the passed options. 111 func NewPrometheusSinkFrom(opts PrometheusOpts) (*PrometheusSink, error) { 112 name := opts.Name 113 if name == "" { 114 name = "default_prometheus_sink" 115 } 116 sink := &PrometheusSink{ 117 gauges: sync.Map{}, 118 summaries: sync.Map{}, 119 counters: sync.Map{}, 120 expiration: opts.Expiration, 121 help: make(map[string]string), 122 name: name, 123 } 124 125 initGauges(&sink.gauges, opts.GaugeDefinitions, sink.help) 126 initSummaries(&sink.summaries, opts.SummaryDefinitions, sink.help) 127 initCounters(&sink.counters, opts.CounterDefinitions, sink.help) 128 129 reg := opts.Registerer 130 if reg == nil { 131 reg = prometheus.DefaultRegisterer 132 } 133 134 return sink, reg.Register(sink) 135 } 136 137 // Describe sends a Collector.Describe value from the descriptor created around PrometheusSink.Name 138 // Note that we cannot describe all the metrics (gauges, counters, summaries) in the sink as 139 // metrics can be added at any point during the lifecycle of the sink, which does not respect 140 // the idempotency aspect of the Collector.Describe() interface 141 func (p *PrometheusSink) Describe(c chan<- *prometheus.Desc) { 142 // dummy value to be able to register and unregister "empty" sinks 143 // Note this is not actually retained in the PrometheusSink so this has no side effects 144 // on the caller's sink. So it shouldn't show up to any of its consumers. 145 prometheus.NewGauge(prometheus.GaugeOpts{Name: p.name, Help: p.name}).Describe(c) 146 } 147 148 // Collect meets the collection interface and allows us to enforce our expiration 149 // logic to clean up ephemeral metrics if their value haven't been set for a 150 // duration exceeding our allowed expiration time. 151 func (p *PrometheusSink) Collect(c chan<- prometheus.Metric) { 152 p.collectAtTime(c, time.Now()) 153 } 154 155 // collectAtTime allows internal testing of the expiry based logic here without 156 // mocking clocks or making tests timing sensitive. 157 func (p *PrometheusSink) collectAtTime(c chan<- prometheus.Metric, t time.Time) { 158 expire := p.expiration != 0 159 p.gauges.Range(func(k, v interface{}) bool { 160 if v == nil { 161 return true 162 } 163 g := v.(*gauge) 164 lastUpdate := g.updatedAt 165 if expire && lastUpdate.Add(p.expiration).Before(t) { 166 if g.canDelete { 167 p.gauges.Delete(k) 168 return true 169 } 170 } 171 g.Collect(c) 172 return true 173 }) 174 p.summaries.Range(func(k, v interface{}) bool { 175 if v == nil { 176 return true 177 } 178 s := v.(*summary) 179 lastUpdate := s.updatedAt 180 if expire && lastUpdate.Add(p.expiration).Before(t) { 181 if s.canDelete { 182 p.summaries.Delete(k) 183 return true 184 } 185 } 186 s.Collect(c) 187 return true 188 }) 189 p.counters.Range(func(k, v interface{}) bool { 190 if v == nil { 191 return true 192 } 193 count := v.(*counter) 194 lastUpdate := count.updatedAt 195 if expire && lastUpdate.Add(p.expiration).Before(t) { 196 if count.canDelete { 197 p.counters.Delete(k) 198 return true 199 } 200 } 201 count.Collect(c) 202 return true 203 }) 204 } 205 206 func initGauges(m *sync.Map, gauges []GaugeDefinition, help map[string]string) { 207 for _, g := range gauges { 208 key, hash := flattenKey(g.Name, g.ConstLabels) 209 help[fmt.Sprintf("gauge.%s", key)] = g.Help 210 pG := prometheus.NewGauge(prometheus.GaugeOpts{ 211 Name: key, 212 Help: g.Help, 213 ConstLabels: prometheusLabels(g.ConstLabels), 214 }) 215 m.Store(hash, &gauge{Gauge: pG}) 216 } 217 return 218 } 219 220 func initSummaries(m *sync.Map, summaries []SummaryDefinition, help map[string]string) { 221 for _, s := range summaries { 222 key, hash := flattenKey(s.Name, s.ConstLabels) 223 help[fmt.Sprintf("summary.%s", key)] = s.Help 224 pS := prometheus.NewSummary(prometheus.SummaryOpts{ 225 Name: key, 226 Help: s.Help, 227 MaxAge: 10 * time.Second, 228 ConstLabels: prometheusLabels(s.ConstLabels), 229 Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, 230 }) 231 m.Store(hash, &summary{Summary: pS}) 232 } 233 return 234 } 235 236 func initCounters(m *sync.Map, counters []CounterDefinition, help map[string]string) { 237 for _, c := range counters { 238 key, hash := flattenKey(c.Name, c.ConstLabels) 239 help[fmt.Sprintf("counter.%s", key)] = c.Help 240 pC := prometheus.NewCounter(prometheus.CounterOpts{ 241 Name: key, 242 Help: c.Help, 243 ConstLabels: prometheusLabels(c.ConstLabels), 244 }) 245 m.Store(hash, &counter{Counter: pC}) 246 } 247 return 248 } 249 250 var forbiddenCharsReplacer = strings.NewReplacer(" ", "_", ".", "_", "=", "_", "-", "_", "/", "_") 251 252 func flattenKey(parts []string, labels []metrics.Label) (string, string) { 253 key := strings.Join(parts, "_") 254 key = forbiddenCharsReplacer.Replace(key) 255 256 hash := key 257 for _, label := range labels { 258 hash += ";" + label.Name + "=" + label.Value 259 } 260 261 return key, hash 262 } 263 264 func prometheusLabels(labels []metrics.Label) prometheus.Labels { 265 l := make(prometheus.Labels) 266 for _, label := range labels { 267 l[label.Name] = label.Value 268 } 269 return l 270 } 271 272 func (p *PrometheusSink) SetGauge(parts []string, val float32) { 273 p.SetPrecisionGauge(parts, float64(val)) 274 } 275 276 func (p *PrometheusSink) SetGaugeWithLabels(parts []string, val float32, labels []metrics.Label) { 277 p.SetPrecisionGaugeWithLabels(parts, float64(val), labels) 278 } 279 280 func (p *PrometheusSink) SetPrecisionGauge(parts []string, val float64) { 281 p.SetPrecisionGaugeWithLabels(parts, val, nil) 282 } 283 284 func (p *PrometheusSink) SetPrecisionGaugeWithLabels(parts []string, val float64, labels []metrics.Label) { 285 key, hash := flattenKey(parts, labels) 286 pg, ok := p.gauges.Load(hash) 287 288 // The sync.Map underlying gauges stores pointers to our structs. If we need to make updates, 289 // rather than modifying the underlying value directly, which would be racy, we make a local 290 // copy by dereferencing the pointer we get back, making the appropriate changes, and then 291 // storing a pointer to our local copy. The underlying Prometheus types are threadsafe, 292 // so there's no issues there. It's possible for racy updates to occur to the updatedAt 293 // value, but since we're always setting it to time.Now(), it doesn't really matter. 294 if ok { 295 localGauge := *pg.(*gauge) 296 localGauge.Set(val) 297 localGauge.updatedAt = time.Now() 298 p.gauges.Store(hash, &localGauge) 299 300 // The gauge does not exist, create the gauge and allow it to be deleted 301 } else { 302 help := key 303 existingHelp, ok := p.help[fmt.Sprintf("gauge.%s", key)] 304 if ok { 305 help = existingHelp 306 } 307 g := prometheus.NewGauge(prometheus.GaugeOpts{ 308 Name: key, 309 Help: help, 310 ConstLabels: prometheusLabels(labels), 311 }) 312 g.Set(val) 313 pg = &gauge{ 314 Gauge: g, 315 updatedAt: time.Now(), 316 canDelete: true, 317 } 318 p.gauges.Store(hash, pg) 319 } 320 } 321 322 func (p *PrometheusSink) AddSample(parts []string, val float32) { 323 p.AddSampleWithLabels(parts, val, nil) 324 } 325 326 func (p *PrometheusSink) AddSampleWithLabels(parts []string, val float32, labels []metrics.Label) { 327 key, hash := flattenKey(parts, labels) 328 ps, ok := p.summaries.Load(hash) 329 330 // Does the summary already exist for this sample type? 331 if ok { 332 localSummary := *ps.(*summary) 333 localSummary.Observe(float64(val)) 334 localSummary.updatedAt = time.Now() 335 p.summaries.Store(hash, &localSummary) 336 337 // The summary does not exist, create the Summary and allow it to be deleted 338 } else { 339 help := key 340 existingHelp, ok := p.help[fmt.Sprintf("summary.%s", key)] 341 if ok { 342 help = existingHelp 343 } 344 s := prometheus.NewSummary(prometheus.SummaryOpts{ 345 Name: key, 346 Help: help, 347 MaxAge: 10 * time.Second, 348 ConstLabels: prometheusLabels(labels), 349 Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, 350 }) 351 s.Observe(float64(val)) 352 ps = &summary{ 353 Summary: s, 354 updatedAt: time.Now(), 355 canDelete: true, 356 } 357 p.summaries.Store(hash, ps) 358 } 359 } 360 361 // EmitKey is not implemented. Prometheus doesn’t offer a type for which an 362 // arbitrary number of values is retained, as Prometheus works with a pull 363 // model, rather than a push model. 364 func (p *PrometheusSink) EmitKey(key []string, val float32) { 365 } 366 367 func (p *PrometheusSink) IncrCounter(parts []string, val float32) { 368 p.IncrCounterWithLabels(parts, val, nil) 369 } 370 371 func (p *PrometheusSink) IncrCounterWithLabels(parts []string, val float32, labels []metrics.Label) { 372 key, hash := flattenKey(parts, labels) 373 pc, ok := p.counters.Load(hash) 374 375 // Prometheus Counter.Add() panics if val < 0. We don't want this to 376 // cause applications to crash, so log an error instead. 377 if val < 0 { 378 log.Printf("[ERR] Attempting to increment Prometheus counter %v with value negative value %v", key, val) 379 return 380 } 381 382 // Does the counter exist? 383 if ok { 384 localCounter := *pc.(*counter) 385 localCounter.Add(float64(val)) 386 localCounter.updatedAt = time.Now() 387 p.counters.Store(hash, &localCounter) 388 389 // The counter does not exist yet, create it and allow it to be deleted 390 } else { 391 help := key 392 existingHelp, ok := p.help[fmt.Sprintf("counter.%s", key)] 393 if ok { 394 help = existingHelp 395 } 396 c := prometheus.NewCounter(prometheus.CounterOpts{ 397 Name: key, 398 Help: help, 399 ConstLabels: prometheusLabels(labels), 400 }) 401 c.Add(float64(val)) 402 pc = &counter{ 403 Counter: c, 404 updatedAt: time.Now(), 405 canDelete: true, 406 } 407 p.counters.Store(hash, pc) 408 } 409 } 410 411 // PrometheusPushSink wraps a normal prometheus sink and provides an address and facilities to export it to an address 412 // on an interval. 413 type PrometheusPushSink struct { 414 *PrometheusSink 415 pusher *push.Pusher 416 address string 417 pushInterval time.Duration 418 stopChan chan struct{} 419 } 420 421 // NewPrometheusPushSink creates a PrometheusPushSink by taking an address, interval, and destination name. 422 func NewPrometheusPushSink(address string, pushInterval time.Duration, name string) (*PrometheusPushSink, error) { 423 promSink := &PrometheusSink{ 424 gauges: sync.Map{}, 425 summaries: sync.Map{}, 426 counters: sync.Map{}, 427 expiration: 60 * time.Second, 428 name: "default_prometheus_sink", 429 } 430 431 pusher := push.New(address, name).Collector(promSink) 432 433 sink := &PrometheusPushSink{ 434 promSink, 435 pusher, 436 address, 437 pushInterval, 438 make(chan struct{}), 439 } 440 441 sink.flushMetrics() 442 return sink, nil 443 } 444 445 func (s *PrometheusPushSink) flushMetrics() { 446 ticker := time.NewTicker(s.pushInterval) 447 448 go func() { 449 for { 450 select { 451 case <-ticker.C: 452 err := s.pusher.Push() 453 if err != nil { 454 log.Printf("[ERR] Error pushing to Prometheus! Err: %s", err) 455 } 456 case <-s.stopChan: 457 ticker.Stop() 458 return 459 } 460 } 461 }() 462 } 463 464 // Shutdown tears down the PrometheusPushSink, and blocks while flushing metrics to the backend. 465 func (s *PrometheusPushSink) Shutdown() { 466 close(s.stopChan) 467 // Closing the channel only stops the running goroutine that pushes metrics. 468 // To minimize the chance of data loss pusher.Push is called one last time. 469 s.pusher.Push() 470 }