github.com/netdata/go.d.plugin@v0.58.1/modules/envoy/collect.go (about)

     1  // SPDX-License-Identifier: GPL-3.0-or-later
     2  
     3  package envoy
     4  
     5  import (
     6  	"strconv"
     7  	"strings"
     8  
     9  	"github.com/netdata/go.d.plugin/pkg/prometheus"
    10  
    11  	"github.com/prometheus/prometheus/model/labels"
    12  )
    13  
    14  // Server stats: https://www.envoyproxy.io/docs/envoy/latest/configuration/observability/statistics#
    15  // Server state: https://www.envoyproxy.io/docs/envoy/latest/api-v3/admin/v3/server_info.proto#enum-admin-v3-serverinfo-state
    16  // Listener stats: https://www.envoyproxy.io/docs/envoy/latest/configuration/listeners/stats
    17  
    18  func (e *Envoy) collect() (map[string]int64, error) {
    19  	mfs, err := e.prom.Scrape()
    20  	if err != nil {
    21  		return nil, err
    22  	}
    23  
    24  	mx := make(map[string]int64)
    25  
    26  	e.collectServerStats(mx, mfs)
    27  	e.collectClusterManagerStats(mx, mfs)
    28  	e.collectClusterUpstreamStats(mx, mfs)
    29  	e.collectListenerManagerStats(mx, mfs)
    30  	e.collectListenerAdminDownstreamStats(mx, mfs)
    31  	e.collectListenerDownstreamStats(mx, mfs)
    32  
    33  	return mx, nil
    34  }
    35  
    36  func (e *Envoy) collectServerStats(mx map[string]int64, mfs prometheus.MetricFamilies) {
    37  	seen := make(map[string]bool)
    38  	for _, n := range []string{
    39  		"envoy_server_uptime",
    40  		"envoy_server_memory_allocated",
    41  		"envoy_server_memory_heap_size",
    42  		"envoy_server_memory_physical_size",
    43  		"envoy_server_parent_connections",
    44  		"envoy_server_total_connections",
    45  	} {
    46  		e.collectGauge(mfs, n, func(name string, m prometheus.Metric) {
    47  			id := e.joinLabels(m.Labels())
    48  			seen[id] = true
    49  
    50  			if !e.servers[id] {
    51  				e.servers[id] = true
    52  				e.addServerCharts(id, m.Labels())
    53  			}
    54  
    55  			mx[join(name, id)] += int64(m.Gauge().Value())
    56  		})
    57  	}
    58  
    59  	e.collectGauge(mfs, "envoy_server_state", func(name string, m prometheus.Metric) {
    60  		id := e.joinLabels(m.Labels())
    61  		for _, v := range []string{"live", "draining", "pre_initializing", "initializing"} {
    62  			mx[join(name, v, id)] = 0
    63  		}
    64  
    65  		switch m.Gauge().Value() {
    66  		case 0:
    67  			mx[join(name, "live", id)] = 1
    68  		case 1:
    69  			mx[join(name, "draining", id)] = 1
    70  		case 2:
    71  			mx[join(name, "pre_initializing", id)] = 1
    72  		case 3:
    73  			mx[join(name, "initializing", id)] = 1
    74  		}
    75  	})
    76  
    77  	for id := range e.servers {
    78  		if id != "" && !seen[id] {
    79  			delete(e.servers, id)
    80  			e.removeCharts(id)
    81  		}
    82  	}
    83  }
    84  
    85  func (e *Envoy) collectClusterManagerStats(mx map[string]int64, mfs prometheus.MetricFamilies) {
    86  	seen := make(map[string]bool)
    87  	for _, n := range []string{
    88  		"envoy_cluster_manager_cluster_added",
    89  		"envoy_cluster_manager_cluster_modified",
    90  		"envoy_cluster_manager_cluster_removed",
    91  		"envoy_cluster_manager_cluster_updated",
    92  		"envoy_cluster_manager_cluster_updated_via_merge",
    93  		"envoy_cluster_manager_update_merge_cancelled",
    94  		"envoy_cluster_manager_update_out_of_merge_window",
    95  	} {
    96  		e.collectCounter(mfs, n, func(name string, m prometheus.Metric) {
    97  			id := e.joinLabels(m.Labels())
    98  			seen[id] = true
    99  
   100  			if !e.clusterMgrs[id] {
   101  				e.clusterMgrs[id] = true
   102  				e.addClusterManagerCharts(id, m.Labels())
   103  			}
   104  
   105  			mx[join(name, id)] += int64(m.Counter().Value())
   106  		})
   107  	}
   108  
   109  	for _, n := range []string{
   110  		"envoy_cluster_manager_active_clusters",
   111  		"envoy_cluster_manager_warming_clusters",
   112  	} {
   113  		e.collectGauge(mfs, n, func(name string, m prometheus.Metric) {
   114  			id := e.joinLabels(m.Labels())
   115  			mx[join(name, id)] += int64(m.Gauge().Value())
   116  		})
   117  	}
   118  
   119  	for id := range e.clusterMgrs {
   120  		if id != "" && !seen[id] {
   121  			delete(e.clusterMgrs, id)
   122  			e.removeCharts(id)
   123  		}
   124  	}
   125  }
   126  
   127  func (e *Envoy) collectListenerAdminDownstreamStats(mx map[string]int64, mfs prometheus.MetricFamilies) {
   128  	seen := make(map[string]bool)
   129  	for _, n := range []string{
   130  		"envoy_listener_admin_downstream_cx_total",
   131  		"envoy_listener_admin_downstream_cx_destroy",
   132  		"envoy_listener_admin_downstream_cx_transport_socket_connect_timeout",
   133  		"envoy_listener_admin_downstream_cx_overflow",
   134  		"envoy_listener_admin_downstream_cx_overload_reject",
   135  		"envoy_listener_admin_downstream_global_cx_overflow",
   136  		"envoy_listener_admin_downstream_pre_cx_timeout",
   137  		"envoy_listener_admin_downstream_listener_filter_remote_close",
   138  		"envoy_listener_admin_downstream_listener_filter_error",
   139  	} {
   140  		e.collectCounter(mfs, n, func(name string, m prometheus.Metric) {
   141  			id := e.joinLabels(m.Labels())
   142  			seen[id] = true
   143  
   144  			if !e.listenerAdminDownstream[id] {
   145  				e.listenerAdminDownstream[id] = true
   146  				e.addListenerAdminDownstreamCharts(id, m.Labels())
   147  			}
   148  
   149  			mx[join(name, id)] += int64(m.Counter().Value())
   150  		})
   151  	}
   152  	for _, n := range []string{
   153  		"envoy_listener_admin_downstream_cx_active",
   154  		"envoy_listener_admin_downstream_pre_cx_active",
   155  	} {
   156  		e.collectGauge(mfs, n, func(name string, m prometheus.Metric) {
   157  			id := e.joinLabels(m.Labels())
   158  			seen[id] = true
   159  
   160  			if !e.listenerAdminDownstream[id] {
   161  				e.listenerAdminDownstream[id] = true
   162  				e.addListenerAdminDownstreamCharts(id, m.Labels())
   163  			}
   164  
   165  			mx[join(name, id)] += int64(m.Gauge().Value())
   166  		})
   167  	}
   168  
   169  	for id := range e.listenerAdminDownstream {
   170  		if id != "" && !seen[id] {
   171  			delete(e.listenerAdminDownstream, id)
   172  			e.removeCharts(id)
   173  		}
   174  	}
   175  }
   176  
   177  func (e *Envoy) collectListenerDownstreamStats(mx map[string]int64, mfs prometheus.MetricFamilies) {
   178  	seen := make(map[string]bool)
   179  	for _, n := range []string{
   180  		"envoy_listener_downstream_cx_total",
   181  		"envoy_listener_downstream_cx_destroy",
   182  		"envoy_listener_downstream_cx_transport_socket_connect_timeout",
   183  		"envoy_listener_downstream_cx_overflow",
   184  		"envoy_listener_downstream_cx_overload_reject",
   185  		"envoy_listener_downstream_global_cx_overflow",
   186  		"envoy_listener_downstream_pre_cx_timeout",
   187  		"envoy_listener_downstream_listener_filter_remote_close",
   188  		"envoy_listener_downstream_listener_filter_error",
   189  	} {
   190  		e.collectCounter(mfs, n, func(name string, m prometheus.Metric) {
   191  			id := e.joinLabels(m.Labels())
   192  			seen[id] = true
   193  
   194  			if !e.listenerDownstream[id] {
   195  				e.listenerDownstream[id] = true
   196  				e.addListenerDownstreamCharts(id, m.Labels())
   197  			}
   198  
   199  			mx[join(name, id)] += int64(m.Counter().Value())
   200  		})
   201  	}
   202  	for _, n := range []string{
   203  		"envoy_listener_downstream_cx_active",
   204  		"envoy_listener_downstream_pre_cx_active",
   205  	} {
   206  		e.collectGauge(mfs, n, func(name string, m prometheus.Metric) {
   207  			id := e.joinLabels(m.Labels())
   208  			seen[id] = true
   209  
   210  			if !e.listenerDownstream[id] {
   211  				e.listenerDownstream[id] = true
   212  				e.addListenerDownstreamCharts(id, m.Labels())
   213  			}
   214  
   215  			mx[join(name, id)] += int64(m.Gauge().Value())
   216  		})
   217  	}
   218  
   219  	for id := range e.listenerDownstream {
   220  		if id != "" && !seen[id] {
   221  			delete(e.listenerDownstream, id)
   222  			e.removeCharts(id)
   223  		}
   224  	}
   225  }
   226  
   227  func (e *Envoy) collectClusterUpstreamStats(mx map[string]int64, mfs prometheus.MetricFamilies) {
   228  	seen := make(map[string]bool)
   229  	for _, n := range []string{
   230  		"envoy_cluster_upstream_cx_total",
   231  		"envoy_cluster_upstream_cx_http1_total",
   232  		"envoy_cluster_upstream_cx_http2_total",
   233  		"envoy_cluster_upstream_cx_http3_total",
   234  		"envoy_cluster_upstream_cx_http3_total",
   235  		"envoy_cluster_upstream_cx_connect_fail",
   236  		"envoy_cluster_upstream_cx_connect_timeout",
   237  		"envoy_cluster_upstream_cx_idle_timeout",
   238  		"envoy_cluster_upstream_cx_max_duration_reached",
   239  		"envoy_cluster_upstream_cx_connect_attempts_exceeded",
   240  		"envoy_cluster_upstream_cx_overflow",
   241  		"envoy_cluster_upstream_cx_destroy",
   242  		"envoy_cluster_upstream_cx_destroy_local",
   243  		"envoy_cluster_upstream_cx_destroy_remote",
   244  		"envoy_cluster_upstream_cx_rx_bytes_total",
   245  		"envoy_cluster_upstream_cx_tx_bytes_total",
   246  		"envoy_cluster_upstream_rq_total",
   247  		"envoy_cluster_upstream_rq_pending_total",
   248  		"envoy_cluster_upstream_rq_pending_overflow",
   249  		"envoy_cluster_upstream_rq_pending_failure_eject",
   250  		"envoy_cluster_upstream_rq_cancelled",
   251  		"envoy_cluster_upstream_rq_maintenance_mode",
   252  		"envoy_cluster_upstream_rq_timeout",
   253  		"envoy_cluster_upstream_rq_max_duration_reached",
   254  		"envoy_cluster_upstream_rq_per_try_timeout",
   255  		"envoy_cluster_upstream_rq_rx_reset",
   256  		"envoy_cluster_upstream_rq_tx_reset",
   257  		"envoy_cluster_upstream_rq_retry",
   258  		"envoy_cluster_upstream_rq_retry_backoff_exponential",
   259  		"envoy_cluster_upstream_rq_retry_backoff_ratelimited",
   260  		"envoy_cluster_upstream_rq_retry_success",
   261  		"envoy_cluster_membership_change",
   262  		"envoy_cluster_update_success",
   263  		"envoy_cluster_update_failure",
   264  		"envoy_cluster_update_empty",
   265  		"envoy_cluster_update_no_rebuild",
   266  	} {
   267  		e.collectCounter(mfs, n, func(name string, m prometheus.Metric) {
   268  			id := e.joinLabels(m.Labels())
   269  			seen[id] = true
   270  
   271  			if !e.clusterUpstream[id] {
   272  				e.clusterUpstream[id] = true
   273  				e.addClusterUpstreamCharts(id, m.Labels())
   274  			}
   275  
   276  			mx[join(name, id)] += int64(m.Counter().Value())
   277  		})
   278  	}
   279  
   280  	for _, n := range []string{
   281  		"envoy_cluster_upstream_cx_active",
   282  		"envoy_cluster_upstream_cx_rx_bytes_buffered",
   283  		"envoy_cluster_upstream_cx_tx_bytes_buffered",
   284  		"envoy_cluster_upstream_rq_active",
   285  		"envoy_cluster_upstream_rq_pending_active",
   286  		"envoy_cluster_membership_healthy",
   287  		"envoy_cluster_membership_degraded",
   288  		"envoy_cluster_membership_excluded",
   289  	} {
   290  		e.collectGauge(mfs, n, func(name string, m prometheus.Metric) {
   291  			id := e.joinLabels(m.Labels())
   292  			seen[id] = true
   293  
   294  			if !e.clusterUpstream[id] {
   295  				e.clusterUpstream[id] = true
   296  				e.addClusterUpstreamCharts(id, m.Labels())
   297  			}
   298  
   299  			mx[join(name, id)] += int64(m.Gauge().Value())
   300  		})
   301  	}
   302  
   303  	for id := range e.clusterUpstream {
   304  		if id != "" && !seen[id] {
   305  			delete(e.clusterUpstream, id)
   306  			e.removeCharts(id)
   307  		}
   308  	}
   309  }
   310  
   311  func (e *Envoy) collectListenerManagerStats(mx map[string]int64, mfs prometheus.MetricFamilies) {
   312  	seen := make(map[string]bool)
   313  	for _, n := range []string{
   314  		"envoy_listener_manager_listener_added",
   315  		"envoy_listener_manager_listener_modified",
   316  		"envoy_listener_manager_listener_removed",
   317  		"envoy_listener_manager_listener_stopped",
   318  		"envoy_listener_manager_listener_create_success",
   319  		"envoy_listener_manager_listener_create_failure",
   320  		"envoy_listener_manager_listener_in_place_updated",
   321  	} {
   322  		e.collectCounter(mfs, n, func(name string, m prometheus.Metric) {
   323  			id := e.joinLabels(m.Labels())
   324  			seen[id] = true
   325  
   326  			if !e.listenerMgrs[id] {
   327  				e.listenerMgrs[id] = true
   328  				e.addListenerManagerCharts(id, m.Labels())
   329  			}
   330  
   331  			mx[join(name, id)] += int64(m.Counter().Value())
   332  		})
   333  	}
   334  
   335  	for _, n := range []string{
   336  		"envoy_listener_manager_total_listeners_warming",
   337  		"envoy_listener_manager_total_listeners_active",
   338  		"envoy_listener_manager_total_listeners_draining",
   339  	} {
   340  		e.collectGauge(mfs, n, func(name string, m prometheus.Metric) {
   341  			id := e.joinLabels(m.Labels())
   342  			seen[id] = true
   343  
   344  			if !e.listenerMgrs[id] {
   345  				e.listenerMgrs[id] = true
   346  				e.addListenerManagerCharts(id, m.Labels())
   347  			}
   348  
   349  			mx[join(name, id)] += int64(m.Gauge().Value())
   350  		})
   351  	}
   352  
   353  	for id := range e.listenerMgrs {
   354  		if id != "" && !seen[id] {
   355  			delete(e.listenerMgrs, id)
   356  			e.removeCharts(id)
   357  		}
   358  	}
   359  }
   360  
   361  func (e *Envoy) collectGauge(mfs prometheus.MetricFamilies, metric string, process func(name string, m prometheus.Metric)) {
   362  	if mf := mfs.GetGauge(metric); mf != nil {
   363  		for _, m := range mf.Metrics() {
   364  			process(mf.Name(), m)
   365  		}
   366  	}
   367  }
   368  
   369  func (e *Envoy) collectCounter(mfs prometheus.MetricFamilies, metric string, process func(name string, m prometheus.Metric)) {
   370  	if mf := mfs.GetCounter(metric); mf != nil {
   371  		for _, m := range mf.Metrics() {
   372  			process(mf.Name(), m)
   373  		}
   374  	}
   375  }
   376  
   377  func (e *Envoy) joinLabels(labels labels.Labels) string {
   378  	var buf strings.Builder
   379  	first := true
   380  	for _, lbl := range labels {
   381  		v := lbl.Value
   382  		if v == "" {
   383  			continue
   384  		}
   385  		if strings.IndexByte(v, ' ') != -1 {
   386  			v = spaceReplacer.Replace(v)
   387  		}
   388  		if strings.IndexByte(v, '\\') != -1 {
   389  			if v = decodeLabelValue(v); strings.IndexByte(v, '\\') != -1 {
   390  				v = backslashReplacer.Replace(v)
   391  			}
   392  		}
   393  		if first {
   394  			buf.WriteString(v)
   395  			first = false
   396  		} else {
   397  			buf.WriteString("_" + v)
   398  		}
   399  	}
   400  	return buf.String()
   401  }
   402  
   403  var (
   404  	spaceReplacer     = strings.NewReplacer(" ", "_")
   405  	backslashReplacer = strings.NewReplacer(`\`, "_")
   406  )
   407  
   408  func decodeLabelValue(value string) string {
   409  	v, err := strconv.Unquote("\"" + value + "\"")
   410  	if err != nil {
   411  		return value
   412  	}
   413  	return v
   414  }
   415  
   416  func join(name string, elems ...string) string {
   417  	for _, v := range elems {
   418  		if v != "" {
   419  			name += "_" + v
   420  		}
   421  	}
   422  	return name
   423  }