github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ruler/base/manager_metrics.go (about)

     1  package base
     2  
     3  import (
     4  	"github.com/prometheus/client_golang/prometheus"
     5  
     6  	"github.com/grafana/loki/pkg/util"
     7  )
     8  
     9  // ManagerMetrics aggregates metrics exported by the Prometheus
    10  // rules package and returns them as Cortex metrics
    11  type ManagerMetrics struct {
    12  	regs                  *util.UserRegistries
    13  	disableRuleGroupLabel bool
    14  
    15  	EvalDuration         *prometheus.Desc
    16  	IterationDuration    *prometheus.Desc
    17  	IterationsMissed     *prometheus.Desc
    18  	IterationsScheduled  *prometheus.Desc
    19  	EvalTotal            *prometheus.Desc
    20  	EvalFailures         *prometheus.Desc
    21  	GroupInterval        *prometheus.Desc
    22  	GroupLastEvalTime    *prometheus.Desc
    23  	GroupLastDuration    *prometheus.Desc
    24  	GroupRules           *prometheus.Desc
    25  	GroupLastEvalSamples *prometheus.Desc
    26  }
    27  
    28  // NewManagerMetrics returns a ManagerMetrics struct
    29  func NewManagerMetrics(disableRuleGroupLabel bool) *ManagerMetrics {
    30  	commonLabels := []string{"user"}
    31  	if !disableRuleGroupLabel {
    32  		commonLabels = append(commonLabels, "rule_group")
    33  	}
    34  	return &ManagerMetrics{
    35  		regs:                  util.NewUserRegistries(),
    36  		disableRuleGroupLabel: disableRuleGroupLabel,
    37  
    38  		EvalDuration: prometheus.NewDesc(
    39  			"cortex_prometheus_rule_evaluation_duration_seconds",
    40  			"The duration for a rule to execute.",
    41  			[]string{"user"},
    42  			nil,
    43  		),
    44  		IterationDuration: prometheus.NewDesc(
    45  			"cortex_prometheus_rule_group_duration_seconds",
    46  			"The duration of rule group evaluations.",
    47  			[]string{"user"},
    48  			nil,
    49  		),
    50  		IterationsMissed: prometheus.NewDesc(
    51  			"cortex_prometheus_rule_group_iterations_missed_total",
    52  			"The total number of rule group evaluations missed due to slow rule group evaluation.",
    53  			commonLabels,
    54  			nil,
    55  		),
    56  		IterationsScheduled: prometheus.NewDesc(
    57  			"cortex_prometheus_rule_group_iterations_total",
    58  			"The total number of scheduled rule group evaluations, whether executed or missed.",
    59  			commonLabels,
    60  			nil,
    61  		),
    62  		EvalTotal: prometheus.NewDesc(
    63  			"cortex_prometheus_rule_evaluations_total",
    64  			"The total number of rule evaluations.",
    65  			commonLabels,
    66  			nil,
    67  		),
    68  		EvalFailures: prometheus.NewDesc(
    69  			"cortex_prometheus_rule_evaluation_failures_total",
    70  			"The total number of rule evaluation failures.",
    71  			commonLabels,
    72  			nil,
    73  		),
    74  		GroupInterval: prometheus.NewDesc(
    75  			"cortex_prometheus_rule_group_interval_seconds",
    76  			"The interval of a rule group.",
    77  			commonLabels,
    78  			nil,
    79  		),
    80  		GroupLastEvalTime: prometheus.NewDesc(
    81  			"cortex_prometheus_rule_group_last_evaluation_timestamp_seconds",
    82  			"The timestamp of the last rule group evaluation in seconds.",
    83  			commonLabels,
    84  			nil,
    85  		),
    86  		GroupLastDuration: prometheus.NewDesc(
    87  			"cortex_prometheus_rule_group_last_duration_seconds",
    88  			"The duration of the last rule group evaluation.",
    89  			commonLabels,
    90  			nil,
    91  		),
    92  		GroupRules: prometheus.NewDesc(
    93  			"cortex_prometheus_rule_group_rules",
    94  			"The number of rules.",
    95  			commonLabels,
    96  			nil,
    97  		),
    98  		GroupLastEvalSamples: prometheus.NewDesc(
    99  			"cortex_prometheus_last_evaluation_samples",
   100  			"The number of samples returned during the last rule group evaluation.",
   101  			commonLabels,
   102  			nil,
   103  		),
   104  	}
   105  }
   106  
   107  // AddUserRegistry adds a user-specific Prometheus registry.
   108  func (m *ManagerMetrics) AddUserRegistry(user string, reg *prometheus.Registry) {
   109  	m.regs.AddUserRegistry(user, reg)
   110  }
   111  
   112  // RemoveUserRegistry removes user-specific Prometheus registry.
   113  func (m *ManagerMetrics) RemoveUserRegistry(user string) {
   114  	m.regs.RemoveUserRegistry(user, true)
   115  }
   116  
   117  // Describe implements the Collector interface
   118  func (m *ManagerMetrics) Describe(out chan<- *prometheus.Desc) {
   119  	out <- m.EvalDuration
   120  	out <- m.IterationDuration
   121  	out <- m.IterationsMissed
   122  	out <- m.IterationsScheduled
   123  	out <- m.EvalTotal
   124  	out <- m.EvalFailures
   125  	out <- m.GroupInterval
   126  	out <- m.GroupLastEvalTime
   127  	out <- m.GroupLastDuration
   128  	out <- m.GroupRules
   129  	out <- m.GroupLastEvalSamples
   130  }
   131  
   132  // Collect implements the Collector interface
   133  func (m *ManagerMetrics) Collect(out chan<- prometheus.Metric) {
   134  	data := m.regs.BuildMetricFamiliesPerUser()
   135  	labels := []string{}
   136  	if !m.disableRuleGroupLabel {
   137  		labels = append(labels, "rule_group")
   138  	}
   139  	// WARNING: It is important that all metrics generated in this method are "Per User".
   140  	// Thanks to that we can actually *remove* metrics for given user (see RemoveUserRegistry).
   141  	// If same user is later re-added, all metrics will start from 0, which is fine.
   142  
   143  	data.SendSumOfSummariesPerUser(out, m.EvalDuration, "prometheus_rule_evaluation_duration_seconds")
   144  	data.SendSumOfSummariesPerUser(out, m.IterationDuration, "prometheus_rule_group_duration_seconds")
   145  
   146  	data.SendSumOfCountersPerUserWithLabels(out, m.IterationsMissed, "prometheus_rule_group_iterations_missed_total", labels...)
   147  	data.SendSumOfCountersPerUserWithLabels(out, m.IterationsScheduled, "prometheus_rule_group_iterations_total", labels...)
   148  	data.SendSumOfCountersPerUserWithLabels(out, m.EvalTotal, "prometheus_rule_evaluations_total", labels...)
   149  	data.SendSumOfCountersPerUserWithLabels(out, m.EvalFailures, "prometheus_rule_evaluation_failures_total", labels...)
   150  	data.SendSumOfGaugesPerUserWithLabels(out, m.GroupInterval, "prometheus_rule_group_interval_seconds", labels...)
   151  	data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastEvalTime, "prometheus_rule_group_last_evaluation_timestamp_seconds", labels...)
   152  	data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastDuration, "prometheus_rule_group_last_duration_seconds", labels...)
   153  	data.SendSumOfGaugesPerUserWithLabels(out, m.GroupRules, "prometheus_rule_group_rules", labels...)
   154  	data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastEvalSamples, "prometheus_rule_group_last_evaluation_samples", labels...)
   155  }