github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ruler/base/manager_metrics.go (about) 1 package base 2 3 import ( 4 "github.com/prometheus/client_golang/prometheus" 5 6 "github.com/grafana/loki/pkg/util" 7 ) 8 9 // ManagerMetrics aggregates metrics exported by the Prometheus 10 // rules package and returns them as Cortex metrics 11 type ManagerMetrics struct { 12 regs *util.UserRegistries 13 disableRuleGroupLabel bool 14 15 EvalDuration *prometheus.Desc 16 IterationDuration *prometheus.Desc 17 IterationsMissed *prometheus.Desc 18 IterationsScheduled *prometheus.Desc 19 EvalTotal *prometheus.Desc 20 EvalFailures *prometheus.Desc 21 GroupInterval *prometheus.Desc 22 GroupLastEvalTime *prometheus.Desc 23 GroupLastDuration *prometheus.Desc 24 GroupRules *prometheus.Desc 25 GroupLastEvalSamples *prometheus.Desc 26 } 27 28 // NewManagerMetrics returns a ManagerMetrics struct 29 func NewManagerMetrics(disableRuleGroupLabel bool) *ManagerMetrics { 30 commonLabels := []string{"user"} 31 if !disableRuleGroupLabel { 32 commonLabels = append(commonLabels, "rule_group") 33 } 34 return &ManagerMetrics{ 35 regs: util.NewUserRegistries(), 36 disableRuleGroupLabel: disableRuleGroupLabel, 37 38 EvalDuration: prometheus.NewDesc( 39 "cortex_prometheus_rule_evaluation_duration_seconds", 40 "The duration for a rule to execute.", 41 []string{"user"}, 42 nil, 43 ), 44 IterationDuration: prometheus.NewDesc( 45 "cortex_prometheus_rule_group_duration_seconds", 46 "The duration of rule group evaluations.", 47 []string{"user"}, 48 nil, 49 ), 50 IterationsMissed: prometheus.NewDesc( 51 "cortex_prometheus_rule_group_iterations_missed_total", 52 "The total number of rule group evaluations missed due to slow rule group evaluation.", 53 commonLabels, 54 nil, 55 ), 56 IterationsScheduled: prometheus.NewDesc( 57 "cortex_prometheus_rule_group_iterations_total", 58 "The total number of scheduled rule group evaluations, whether executed or missed.", 59 commonLabels, 60 nil, 61 ), 62 EvalTotal: prometheus.NewDesc( 63 "cortex_prometheus_rule_evaluations_total", 64 "The total number of rule evaluations.", 65 commonLabels, 66 nil, 67 ), 68 EvalFailures: prometheus.NewDesc( 69 "cortex_prometheus_rule_evaluation_failures_total", 70 "The total number of rule evaluation failures.", 71 commonLabels, 72 nil, 73 ), 74 GroupInterval: prometheus.NewDesc( 75 "cortex_prometheus_rule_group_interval_seconds", 76 "The interval of a rule group.", 77 commonLabels, 78 nil, 79 ), 80 GroupLastEvalTime: prometheus.NewDesc( 81 "cortex_prometheus_rule_group_last_evaluation_timestamp_seconds", 82 "The timestamp of the last rule group evaluation in seconds.", 83 commonLabels, 84 nil, 85 ), 86 GroupLastDuration: prometheus.NewDesc( 87 "cortex_prometheus_rule_group_last_duration_seconds", 88 "The duration of the last rule group evaluation.", 89 commonLabels, 90 nil, 91 ), 92 GroupRules: prometheus.NewDesc( 93 "cortex_prometheus_rule_group_rules", 94 "The number of rules.", 95 commonLabels, 96 nil, 97 ), 98 GroupLastEvalSamples: prometheus.NewDesc( 99 "cortex_prometheus_last_evaluation_samples", 100 "The number of samples returned during the last rule group evaluation.", 101 commonLabels, 102 nil, 103 ), 104 } 105 } 106 107 // AddUserRegistry adds a user-specific Prometheus registry. 108 func (m *ManagerMetrics) AddUserRegistry(user string, reg *prometheus.Registry) { 109 m.regs.AddUserRegistry(user, reg) 110 } 111 112 // RemoveUserRegistry removes user-specific Prometheus registry. 113 func (m *ManagerMetrics) RemoveUserRegistry(user string) { 114 m.regs.RemoveUserRegistry(user, true) 115 } 116 117 // Describe implements the Collector interface 118 func (m *ManagerMetrics) Describe(out chan<- *prometheus.Desc) { 119 out <- m.EvalDuration 120 out <- m.IterationDuration 121 out <- m.IterationsMissed 122 out <- m.IterationsScheduled 123 out <- m.EvalTotal 124 out <- m.EvalFailures 125 out <- m.GroupInterval 126 out <- m.GroupLastEvalTime 127 out <- m.GroupLastDuration 128 out <- m.GroupRules 129 out <- m.GroupLastEvalSamples 130 } 131 132 // Collect implements the Collector interface 133 func (m *ManagerMetrics) Collect(out chan<- prometheus.Metric) { 134 data := m.regs.BuildMetricFamiliesPerUser() 135 labels := []string{} 136 if !m.disableRuleGroupLabel { 137 labels = append(labels, "rule_group") 138 } 139 // WARNING: It is important that all metrics generated in this method are "Per User". 140 // Thanks to that we can actually *remove* metrics for given user (see RemoveUserRegistry). 141 // If same user is later re-added, all metrics will start from 0, which is fine. 142 143 data.SendSumOfSummariesPerUser(out, m.EvalDuration, "prometheus_rule_evaluation_duration_seconds") 144 data.SendSumOfSummariesPerUser(out, m.IterationDuration, "prometheus_rule_group_duration_seconds") 145 146 data.SendSumOfCountersPerUserWithLabels(out, m.IterationsMissed, "prometheus_rule_group_iterations_missed_total", labels...) 147 data.SendSumOfCountersPerUserWithLabels(out, m.IterationsScheduled, "prometheus_rule_group_iterations_total", labels...) 148 data.SendSumOfCountersPerUserWithLabels(out, m.EvalTotal, "prometheus_rule_evaluations_total", labels...) 149 data.SendSumOfCountersPerUserWithLabels(out, m.EvalFailures, "prometheus_rule_evaluation_failures_total", labels...) 150 data.SendSumOfGaugesPerUserWithLabels(out, m.GroupInterval, "prometheus_rule_group_interval_seconds", labels...) 151 data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastEvalTime, "prometheus_rule_group_last_evaluation_timestamp_seconds", labels...) 152 data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastDuration, "prometheus_rule_group_last_duration_seconds", labels...) 153 data.SendSumOfGaugesPerUserWithLabels(out, m.GroupRules, "prometheus_rule_group_rules", labels...) 154 data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastEvalSamples, "prometheus_rule_group_last_evaluation_samples", labels...) 155 }