github.com/netdata/go.d.plugin@v0.58.1/modules/consul/collect_metrics.go (about) 1 // SPDX-License-Identifier: GPL-3.0-or-later 2 3 package consul 4 5 import ( 6 "fmt" 7 "math" 8 "strconv" 9 "strings" 10 11 "github.com/netdata/go.d.plugin/pkg/prometheus" 12 ) 13 14 func (c *Consul) collectMetricsPrometheus(mx map[string]int64) error { 15 mfs, err := c.prom.Scrape() 16 if err != nil { 17 return err 18 } 19 20 // Key Metrics (https://developer.hashicorp.com/consul/docs/agent/telemetry#key-metrics) 21 22 // prometheus metrics are messy: 23 // - if 'disable_hostname' is false (default): 24 // - consul_autopilot_failure_tolerance => consul_hostname_autopilot_failure_tolerance 25 // - both are exposed 26 // - only the one with the hostname has the correct value 27 // - 1.14.3 (it probably has something to do with cloud management version): 28 // - runtime_sys_bytes => runtime_sys_bytes_sys_bytes; consul_autopilot_healthy => consul_autopilot_healthy_healthy 29 // - both are exposed 30 // - only the one with the double name has the correct value 31 32 if c.isServer() { 33 c.collectSummary(mx, mfs, "raft_thread_main_saturation") 34 c.collectSummary(mx, mfs, "raft_thread_fsm_saturation") 35 c.collectSummary(mx, mfs, "raft_boltdb_logsPerBatch") 36 c.collectSummary(mx, mfs, "kvs_apply") 37 c.collectSummary(mx, mfs, "txn_apply") 38 c.collectSummary(mx, mfs, "raft_boltdb_storeLogs") 39 c.collectSummary(mx, mfs, "raft_rpc_installSnapshot") // make sense for followers only 40 c.collectSummary(mx, mfs, "raft_commitTime") // make sense for leader only 41 c.collectSummary(mx, mfs, "raft_leader_lastContact") // make sense for leader only 42 43 c.collectCounter(mx, mfs, "raft_apply", precision) // make sense for leader only 44 c.collectCounter(mx, mfs, "raft_state_candidate", 1) 45 c.collectCounter(mx, mfs, "raft_state_leader", 1) 46 47 c.collectGaugeBool(mx, mfs, "autopilot_healthy", "autopilot_healthy_healthy") 48 c.collectGaugeBool(mx, mfs, "server_isLeader", "server_isLeader_isLeader") 49 c.collectGauge(mx, mfs, "autopilot_failure_tolerance", 1, "autopilot_failure_tolerance_failure_tolerance") 50 c.collectGauge(mx, mfs, "raft_fsm_lastRestoreDuration", 1) 51 c.collectGauge(mx, mfs, "raft_leader_oldestLogAge", 1, "raft_leader_oldestLogAge_oldestLogAge") 52 c.collectGauge(mx, mfs, "raft_boltdb_freelistBytes", 1, "raft_boltdb_freelistBytes_freelistBytes") 53 54 if isLeader, ok := c.isLeader(mfs); ok { 55 if isLeader && !c.hasLeaderCharts { 56 c.addLeaderCharts() 57 c.hasLeaderCharts = true 58 } 59 if !isLeader && c.hasLeaderCharts { 60 c.removeLeaderCharts() 61 c.hasLeaderCharts = false 62 } 63 if !isLeader && !c.hasFollowerCharts { 64 c.addFollowerCharts() 65 c.hasFollowerCharts = true 66 } 67 if isLeader && c.hasFollowerCharts { 68 c.removeFollowerCharts() 69 c.hasFollowerCharts = false 70 } 71 } 72 } 73 74 c.collectGauge(mx, mfs, "system_licenseExpiration", 3600, "system_licenseExpiration_licenseExpiration") 75 76 c.collectCounter(mx, mfs, "client_rpc", 1) 77 c.collectCounter(mx, mfs, "client_rpc_exceeded", 1) 78 c.collectCounter(mx, mfs, "client_rpc_failed", 1) 79 80 c.collectGauge(mx, mfs, "runtime_alloc_bytes", 1, "runtime_alloc_bytes_alloc_bytes") 81 c.collectGauge(mx, mfs, "runtime_sys_bytes", 1, "runtime_sys_bytes_sys_bytes") 82 c.collectGauge(mx, mfs, "runtime_total_gc_pause_ns", 1, "runtime_total_gc_pause_ns_total_gc_pause_ns") 83 84 return nil 85 } 86 87 func (c *Consul) isLeader(mfs prometheus.MetricFamilies) (bool, bool) { 88 var mf *prometheus.MetricFamily 89 for _, v := range []string{"server_isLeader_isLeader", "server_isLeader"} { 90 if mf = mfs.GetGauge(c.promMetricNameWithHostname(v)); mf != nil { 91 break 92 } 93 if mf = mfs.GetGauge(c.promMetricName(v)); mf != nil { 94 break 95 } 96 } 97 98 if mf == nil { 99 return false, false 100 } 101 102 return mf.Metrics()[0].Gauge().Value() == 1, true 103 } 104 105 func (c *Consul) collectGauge(mx map[string]int64, mfs prometheus.MetricFamilies, name string, mul float64, aliases ...string) { 106 var mf *prometheus.MetricFamily 107 for _, v := range append(aliases, name) { 108 if mf = mfs.GetGauge(c.promMetricNameWithHostname(v)); mf != nil { 109 break 110 } 111 if mf = mfs.GetGauge(c.promMetricName(v)); mf != nil { 112 break 113 } 114 } 115 116 if mf == nil { 117 return 118 } 119 120 v := mf.Metrics()[0].Gauge().Value() 121 122 if !math.IsNaN(v) { 123 mx[name] = int64(v * mul) 124 } 125 } 126 127 func (c *Consul) collectGaugeBool(mx map[string]int64, mfs prometheus.MetricFamilies, name string, aliases ...string) { 128 var mf *prometheus.MetricFamily 129 for _, v := range append(aliases, name) { 130 if mf = mfs.GetGauge(c.promMetricNameWithHostname(v)); mf != nil { 131 break 132 } 133 if mf = mfs.GetGauge(c.promMetricName(v)); mf != nil { 134 break 135 } 136 } 137 138 if mf == nil { 139 return 140 } 141 142 v := mf.Metrics()[0].Gauge().Value() 143 144 if !math.IsNaN(v) { 145 mx[name+"_yes"] = boolToInt(v == 1) 146 mx[name+"_no"] = boolToInt(v == 0) 147 } 148 } 149 150 func (c *Consul) collectCounter(mx map[string]int64, mfs prometheus.MetricFamilies, name string, mul float64) { 151 mf := mfs.GetCounter(c.promMetricName(name)) 152 if mf == nil { 153 return 154 } 155 156 v := mf.Metrics()[0].Counter().Value() 157 158 if !math.IsNaN(v) { 159 mx[name] = int64(v * mul) 160 } 161 } 162 163 func (c *Consul) collectSummary(mx map[string]int64, mfs prometheus.MetricFamilies, name string) { 164 mf := mfs.GetSummary(c.promMetricName(name)) 165 if mf == nil { 166 return 167 } 168 169 m := mf.Metrics()[0] 170 171 for _, q := range m.Summary().Quantiles() { 172 v := q.Value() 173 // MaxAge is 10 seconds (hardcoded) 174 // https://github.com/hashicorp/go-metrics/blob/b6d5c860c07ef6eeec89f4a662c7b452dd4d0c93/prometheus/prometheus.go#L227 175 if math.IsNaN(v) { 176 v = 0 177 } 178 179 id := fmt.Sprintf("%s_quantile=%s", name, formatFloat(q.Quantile())) 180 mx[id] = int64(v * precision * precision) 181 } 182 183 mx[name+"_sum"] = int64(m.Summary().Sum() * precision) 184 mx[name+"_count"] = int64(m.Summary().Count()) 185 } 186 187 func (c *Consul) promMetricName(name string) string { 188 px := c.cfg.DebugConfig.Telemetry.MetricsPrefix 189 return px + "_" + name 190 } 191 192 var forbiddenCharsReplacer = strings.NewReplacer(" ", "_", ".", "_", "=", "_", "-", "_", "/", "_") 193 194 // controlled by 'disable_hostname' 195 // https://developer.hashicorp.com/consul/docs/agent/config/config-files#telemetry-disable_hostname 196 func (c *Consul) promMetricNameWithHostname(name string) string { 197 px := c.cfg.DebugConfig.Telemetry.MetricsPrefix 198 node := forbiddenCharsReplacer.Replace(c.cfg.Config.NodeName) 199 200 return px + "_" + node + "_" + name 201 } 202 203 func formatFloat(v float64) string { 204 return strconv.FormatFloat(v, 'f', -1, 64) 205 }