github.com/netdata/go.d.plugin@v0.58.1/modules/envoy/collect.go (about) 1 // SPDX-License-Identifier: GPL-3.0-or-later 2 3 package envoy 4 5 import ( 6 "strconv" 7 "strings" 8 9 "github.com/netdata/go.d.plugin/pkg/prometheus" 10 11 "github.com/prometheus/prometheus/model/labels" 12 ) 13 14 // Server stats: https://www.envoyproxy.io/docs/envoy/latest/configuration/observability/statistics# 15 // Server state: https://www.envoyproxy.io/docs/envoy/latest/api-v3/admin/v3/server_info.proto#enum-admin-v3-serverinfo-state 16 // Listener stats: https://www.envoyproxy.io/docs/envoy/latest/configuration/listeners/stats 17 18 func (e *Envoy) collect() (map[string]int64, error) { 19 mfs, err := e.prom.Scrape() 20 if err != nil { 21 return nil, err 22 } 23 24 mx := make(map[string]int64) 25 26 e.collectServerStats(mx, mfs) 27 e.collectClusterManagerStats(mx, mfs) 28 e.collectClusterUpstreamStats(mx, mfs) 29 e.collectListenerManagerStats(mx, mfs) 30 e.collectListenerAdminDownstreamStats(mx, mfs) 31 e.collectListenerDownstreamStats(mx, mfs) 32 33 return mx, nil 34 } 35 36 func (e *Envoy) collectServerStats(mx map[string]int64, mfs prometheus.MetricFamilies) { 37 seen := make(map[string]bool) 38 for _, n := range []string{ 39 "envoy_server_uptime", 40 "envoy_server_memory_allocated", 41 "envoy_server_memory_heap_size", 42 "envoy_server_memory_physical_size", 43 "envoy_server_parent_connections", 44 "envoy_server_total_connections", 45 } { 46 e.collectGauge(mfs, n, func(name string, m prometheus.Metric) { 47 id := e.joinLabels(m.Labels()) 48 seen[id] = true 49 50 if !e.servers[id] { 51 e.servers[id] = true 52 e.addServerCharts(id, m.Labels()) 53 } 54 55 mx[join(name, id)] += int64(m.Gauge().Value()) 56 }) 57 } 58 59 e.collectGauge(mfs, "envoy_server_state", func(name string, m prometheus.Metric) { 60 id := e.joinLabels(m.Labels()) 61 for _, v := range []string{"live", "draining", "pre_initializing", "initializing"} { 62 mx[join(name, v, id)] = 0 63 } 64 65 switch m.Gauge().Value() { 66 case 0: 67 mx[join(name, "live", id)] = 1 68 case 1: 69 mx[join(name, "draining", id)] = 1 70 case 2: 71 mx[join(name, "pre_initializing", id)] = 1 72 case 3: 73 mx[join(name, "initializing", id)] = 1 74 } 75 }) 76 77 for id := range e.servers { 78 if id != "" && !seen[id] { 79 delete(e.servers, id) 80 e.removeCharts(id) 81 } 82 } 83 } 84 85 func (e *Envoy) collectClusterManagerStats(mx map[string]int64, mfs prometheus.MetricFamilies) { 86 seen := make(map[string]bool) 87 for _, n := range []string{ 88 "envoy_cluster_manager_cluster_added", 89 "envoy_cluster_manager_cluster_modified", 90 "envoy_cluster_manager_cluster_removed", 91 "envoy_cluster_manager_cluster_updated", 92 "envoy_cluster_manager_cluster_updated_via_merge", 93 "envoy_cluster_manager_update_merge_cancelled", 94 "envoy_cluster_manager_update_out_of_merge_window", 95 } { 96 e.collectCounter(mfs, n, func(name string, m prometheus.Metric) { 97 id := e.joinLabels(m.Labels()) 98 seen[id] = true 99 100 if !e.clusterMgrs[id] { 101 e.clusterMgrs[id] = true 102 e.addClusterManagerCharts(id, m.Labels()) 103 } 104 105 mx[join(name, id)] += int64(m.Counter().Value()) 106 }) 107 } 108 109 for _, n := range []string{ 110 "envoy_cluster_manager_active_clusters", 111 "envoy_cluster_manager_warming_clusters", 112 } { 113 e.collectGauge(mfs, n, func(name string, m prometheus.Metric) { 114 id := e.joinLabels(m.Labels()) 115 mx[join(name, id)] += int64(m.Gauge().Value()) 116 }) 117 } 118 119 for id := range e.clusterMgrs { 120 if id != "" && !seen[id] { 121 delete(e.clusterMgrs, id) 122 e.removeCharts(id) 123 } 124 } 125 } 126 127 func (e *Envoy) collectListenerAdminDownstreamStats(mx map[string]int64, mfs prometheus.MetricFamilies) { 128 seen := make(map[string]bool) 129 for _, n := range []string{ 130 "envoy_listener_admin_downstream_cx_total", 131 "envoy_listener_admin_downstream_cx_destroy", 132 "envoy_listener_admin_downstream_cx_transport_socket_connect_timeout", 133 "envoy_listener_admin_downstream_cx_overflow", 134 "envoy_listener_admin_downstream_cx_overload_reject", 135 "envoy_listener_admin_downstream_global_cx_overflow", 136 "envoy_listener_admin_downstream_pre_cx_timeout", 137 "envoy_listener_admin_downstream_listener_filter_remote_close", 138 "envoy_listener_admin_downstream_listener_filter_error", 139 } { 140 e.collectCounter(mfs, n, func(name string, m prometheus.Metric) { 141 id := e.joinLabels(m.Labels()) 142 seen[id] = true 143 144 if !e.listenerAdminDownstream[id] { 145 e.listenerAdminDownstream[id] = true 146 e.addListenerAdminDownstreamCharts(id, m.Labels()) 147 } 148 149 mx[join(name, id)] += int64(m.Counter().Value()) 150 }) 151 } 152 for _, n := range []string{ 153 "envoy_listener_admin_downstream_cx_active", 154 "envoy_listener_admin_downstream_pre_cx_active", 155 } { 156 e.collectGauge(mfs, n, func(name string, m prometheus.Metric) { 157 id := e.joinLabels(m.Labels()) 158 seen[id] = true 159 160 if !e.listenerAdminDownstream[id] { 161 e.listenerAdminDownstream[id] = true 162 e.addListenerAdminDownstreamCharts(id, m.Labels()) 163 } 164 165 mx[join(name, id)] += int64(m.Gauge().Value()) 166 }) 167 } 168 169 for id := range e.listenerAdminDownstream { 170 if id != "" && !seen[id] { 171 delete(e.listenerAdminDownstream, id) 172 e.removeCharts(id) 173 } 174 } 175 } 176 177 func (e *Envoy) collectListenerDownstreamStats(mx map[string]int64, mfs prometheus.MetricFamilies) { 178 seen := make(map[string]bool) 179 for _, n := range []string{ 180 "envoy_listener_downstream_cx_total", 181 "envoy_listener_downstream_cx_destroy", 182 "envoy_listener_downstream_cx_transport_socket_connect_timeout", 183 "envoy_listener_downstream_cx_overflow", 184 "envoy_listener_downstream_cx_overload_reject", 185 "envoy_listener_downstream_global_cx_overflow", 186 "envoy_listener_downstream_pre_cx_timeout", 187 "envoy_listener_downstream_listener_filter_remote_close", 188 "envoy_listener_downstream_listener_filter_error", 189 } { 190 e.collectCounter(mfs, n, func(name string, m prometheus.Metric) { 191 id := e.joinLabels(m.Labels()) 192 seen[id] = true 193 194 if !e.listenerDownstream[id] { 195 e.listenerDownstream[id] = true 196 e.addListenerDownstreamCharts(id, m.Labels()) 197 } 198 199 mx[join(name, id)] += int64(m.Counter().Value()) 200 }) 201 } 202 for _, n := range []string{ 203 "envoy_listener_downstream_cx_active", 204 "envoy_listener_downstream_pre_cx_active", 205 } { 206 e.collectGauge(mfs, n, func(name string, m prometheus.Metric) { 207 id := e.joinLabels(m.Labels()) 208 seen[id] = true 209 210 if !e.listenerDownstream[id] { 211 e.listenerDownstream[id] = true 212 e.addListenerDownstreamCharts(id, m.Labels()) 213 } 214 215 mx[join(name, id)] += int64(m.Gauge().Value()) 216 }) 217 } 218 219 for id := range e.listenerDownstream { 220 if id != "" && !seen[id] { 221 delete(e.listenerDownstream, id) 222 e.removeCharts(id) 223 } 224 } 225 } 226 227 func (e *Envoy) collectClusterUpstreamStats(mx map[string]int64, mfs prometheus.MetricFamilies) { 228 seen := make(map[string]bool) 229 for _, n := range []string{ 230 "envoy_cluster_upstream_cx_total", 231 "envoy_cluster_upstream_cx_http1_total", 232 "envoy_cluster_upstream_cx_http2_total", 233 "envoy_cluster_upstream_cx_http3_total", 234 "envoy_cluster_upstream_cx_http3_total", 235 "envoy_cluster_upstream_cx_connect_fail", 236 "envoy_cluster_upstream_cx_connect_timeout", 237 "envoy_cluster_upstream_cx_idle_timeout", 238 "envoy_cluster_upstream_cx_max_duration_reached", 239 "envoy_cluster_upstream_cx_connect_attempts_exceeded", 240 "envoy_cluster_upstream_cx_overflow", 241 "envoy_cluster_upstream_cx_destroy", 242 "envoy_cluster_upstream_cx_destroy_local", 243 "envoy_cluster_upstream_cx_destroy_remote", 244 "envoy_cluster_upstream_cx_rx_bytes_total", 245 "envoy_cluster_upstream_cx_tx_bytes_total", 246 "envoy_cluster_upstream_rq_total", 247 "envoy_cluster_upstream_rq_pending_total", 248 "envoy_cluster_upstream_rq_pending_overflow", 249 "envoy_cluster_upstream_rq_pending_failure_eject", 250 "envoy_cluster_upstream_rq_cancelled", 251 "envoy_cluster_upstream_rq_maintenance_mode", 252 "envoy_cluster_upstream_rq_timeout", 253 "envoy_cluster_upstream_rq_max_duration_reached", 254 "envoy_cluster_upstream_rq_per_try_timeout", 255 "envoy_cluster_upstream_rq_rx_reset", 256 "envoy_cluster_upstream_rq_tx_reset", 257 "envoy_cluster_upstream_rq_retry", 258 "envoy_cluster_upstream_rq_retry_backoff_exponential", 259 "envoy_cluster_upstream_rq_retry_backoff_ratelimited", 260 "envoy_cluster_upstream_rq_retry_success", 261 "envoy_cluster_membership_change", 262 "envoy_cluster_update_success", 263 "envoy_cluster_update_failure", 264 "envoy_cluster_update_empty", 265 "envoy_cluster_update_no_rebuild", 266 } { 267 e.collectCounter(mfs, n, func(name string, m prometheus.Metric) { 268 id := e.joinLabels(m.Labels()) 269 seen[id] = true 270 271 if !e.clusterUpstream[id] { 272 e.clusterUpstream[id] = true 273 e.addClusterUpstreamCharts(id, m.Labels()) 274 } 275 276 mx[join(name, id)] += int64(m.Counter().Value()) 277 }) 278 } 279 280 for _, n := range []string{ 281 "envoy_cluster_upstream_cx_active", 282 "envoy_cluster_upstream_cx_rx_bytes_buffered", 283 "envoy_cluster_upstream_cx_tx_bytes_buffered", 284 "envoy_cluster_upstream_rq_active", 285 "envoy_cluster_upstream_rq_pending_active", 286 "envoy_cluster_membership_healthy", 287 "envoy_cluster_membership_degraded", 288 "envoy_cluster_membership_excluded", 289 } { 290 e.collectGauge(mfs, n, func(name string, m prometheus.Metric) { 291 id := e.joinLabels(m.Labels()) 292 seen[id] = true 293 294 if !e.clusterUpstream[id] { 295 e.clusterUpstream[id] = true 296 e.addClusterUpstreamCharts(id, m.Labels()) 297 } 298 299 mx[join(name, id)] += int64(m.Gauge().Value()) 300 }) 301 } 302 303 for id := range e.clusterUpstream { 304 if id != "" && !seen[id] { 305 delete(e.clusterUpstream, id) 306 e.removeCharts(id) 307 } 308 } 309 } 310 311 func (e *Envoy) collectListenerManagerStats(mx map[string]int64, mfs prometheus.MetricFamilies) { 312 seen := make(map[string]bool) 313 for _, n := range []string{ 314 "envoy_listener_manager_listener_added", 315 "envoy_listener_manager_listener_modified", 316 "envoy_listener_manager_listener_removed", 317 "envoy_listener_manager_listener_stopped", 318 "envoy_listener_manager_listener_create_success", 319 "envoy_listener_manager_listener_create_failure", 320 "envoy_listener_manager_listener_in_place_updated", 321 } { 322 e.collectCounter(mfs, n, func(name string, m prometheus.Metric) { 323 id := e.joinLabels(m.Labels()) 324 seen[id] = true 325 326 if !e.listenerMgrs[id] { 327 e.listenerMgrs[id] = true 328 e.addListenerManagerCharts(id, m.Labels()) 329 } 330 331 mx[join(name, id)] += int64(m.Counter().Value()) 332 }) 333 } 334 335 for _, n := range []string{ 336 "envoy_listener_manager_total_listeners_warming", 337 "envoy_listener_manager_total_listeners_active", 338 "envoy_listener_manager_total_listeners_draining", 339 } { 340 e.collectGauge(mfs, n, func(name string, m prometheus.Metric) { 341 id := e.joinLabels(m.Labels()) 342 seen[id] = true 343 344 if !e.listenerMgrs[id] { 345 e.listenerMgrs[id] = true 346 e.addListenerManagerCharts(id, m.Labels()) 347 } 348 349 mx[join(name, id)] += int64(m.Gauge().Value()) 350 }) 351 } 352 353 for id := range e.listenerMgrs { 354 if id != "" && !seen[id] { 355 delete(e.listenerMgrs, id) 356 e.removeCharts(id) 357 } 358 } 359 } 360 361 func (e *Envoy) collectGauge(mfs prometheus.MetricFamilies, metric string, process func(name string, m prometheus.Metric)) { 362 if mf := mfs.GetGauge(metric); mf != nil { 363 for _, m := range mf.Metrics() { 364 process(mf.Name(), m) 365 } 366 } 367 } 368 369 func (e *Envoy) collectCounter(mfs prometheus.MetricFamilies, metric string, process func(name string, m prometheus.Metric)) { 370 if mf := mfs.GetCounter(metric); mf != nil { 371 for _, m := range mf.Metrics() { 372 process(mf.Name(), m) 373 } 374 } 375 } 376 377 func (e *Envoy) joinLabels(labels labels.Labels) string { 378 var buf strings.Builder 379 first := true 380 for _, lbl := range labels { 381 v := lbl.Value 382 if v == "" { 383 continue 384 } 385 if strings.IndexByte(v, ' ') != -1 { 386 v = spaceReplacer.Replace(v) 387 } 388 if strings.IndexByte(v, '\\') != -1 { 389 if v = decodeLabelValue(v); strings.IndexByte(v, '\\') != -1 { 390 v = backslashReplacer.Replace(v) 391 } 392 } 393 if first { 394 buf.WriteString(v) 395 first = false 396 } else { 397 buf.WriteString("_" + v) 398 } 399 } 400 return buf.String() 401 } 402 403 var ( 404 spaceReplacer = strings.NewReplacer(" ", "_") 405 backslashReplacer = strings.NewReplacer(`\`, "_") 406 ) 407 408 func decodeLabelValue(value string) string { 409 v, err := strconv.Unquote("\"" + value + "\"") 410 if err != nil { 411 return value 412 } 413 return v 414 } 415 416 func join(name string, elems ...string) string { 417 for _, v := range elems { 418 if v != "" { 419 name += "_" + v 420 } 421 } 422 return name 423 }