github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/ruler/manager_metrics.go (about) 1 package ruler 2 3 import ( 4 "github.com/prometheus/client_golang/prometheus" 5 6 "github.com/cortexproject/cortex/pkg/util" 7 ) 8 9 // ManagerMetrics aggregates metrics exported by the Prometheus 10 // rules package and returns them as Cortex metrics 11 type ManagerMetrics struct { 12 regs *util.UserRegistries 13 14 EvalDuration *prometheus.Desc 15 IterationDuration *prometheus.Desc 16 IterationsMissed *prometheus.Desc 17 IterationsScheduled *prometheus.Desc 18 EvalTotal *prometheus.Desc 19 EvalFailures *prometheus.Desc 20 GroupInterval *prometheus.Desc 21 GroupLastEvalTime *prometheus.Desc 22 GroupLastDuration *prometheus.Desc 23 GroupRules *prometheus.Desc 24 GroupLastEvalSamples *prometheus.Desc 25 } 26 27 // NewManagerMetrics returns a ManagerMetrics struct 28 func NewManagerMetrics() *ManagerMetrics { 29 return &ManagerMetrics{ 30 regs: util.NewUserRegistries(), 31 32 EvalDuration: prometheus.NewDesc( 33 "cortex_prometheus_rule_evaluation_duration_seconds", 34 "The duration for a rule to execute.", 35 []string{"user"}, 36 nil, 37 ), 38 IterationDuration: prometheus.NewDesc( 39 "cortex_prometheus_rule_group_duration_seconds", 40 "The duration of rule group evaluations.", 41 []string{"user"}, 42 nil, 43 ), 44 IterationsMissed: prometheus.NewDesc( 45 "cortex_prometheus_rule_group_iterations_missed_total", 46 "The total number of rule group evaluations missed due to slow rule group evaluation.", 47 []string{"user", "rule_group"}, 48 nil, 49 ), 50 IterationsScheduled: prometheus.NewDesc( 51 "cortex_prometheus_rule_group_iterations_total", 52 "The total number of scheduled rule group evaluations, whether executed or missed.", 53 []string{"user", "rule_group"}, 54 nil, 55 ), 56 EvalTotal: prometheus.NewDesc( 57 "cortex_prometheus_rule_evaluations_total", 58 "The total number of rule evaluations.", 59 []string{"user", "rule_group"}, 60 nil, 61 ), 62 EvalFailures: prometheus.NewDesc( 63 "cortex_prometheus_rule_evaluation_failures_total", 64 "The total number of rule evaluation failures.", 65 []string{"user", "rule_group"}, 66 nil, 67 ), 68 GroupInterval: prometheus.NewDesc( 69 "cortex_prometheus_rule_group_interval_seconds", 70 "The interval of a rule group.", 71 []string{"user", "rule_group"}, 72 nil, 73 ), 74 GroupLastEvalTime: prometheus.NewDesc( 75 "cortex_prometheus_rule_group_last_evaluation_timestamp_seconds", 76 "The timestamp of the last rule group evaluation in seconds.", 77 []string{"user", "rule_group"}, 78 nil, 79 ), 80 GroupLastDuration: prometheus.NewDesc( 81 "cortex_prometheus_rule_group_last_duration_seconds", 82 "The duration of the last rule group evaluation.", 83 []string{"user", "rule_group"}, 84 nil, 85 ), 86 GroupRules: prometheus.NewDesc( 87 "cortex_prometheus_rule_group_rules", 88 "The number of rules.", 89 []string{"user", "rule_group"}, 90 nil, 91 ), 92 GroupLastEvalSamples: prometheus.NewDesc( 93 "cortex_prometheus_last_evaluation_samples", 94 "The number of samples returned during the last rule group evaluation.", 95 []string{"user", "rule_group"}, 96 nil, 97 ), 98 } 99 } 100 101 // AddUserRegistry adds a user-specific Prometheus registry. 102 func (m *ManagerMetrics) AddUserRegistry(user string, reg *prometheus.Registry) { 103 m.regs.AddUserRegistry(user, reg) 104 } 105 106 // RemoveUserRegistry removes user-specific Prometheus registry. 107 func (m *ManagerMetrics) RemoveUserRegistry(user string) { 108 m.regs.RemoveUserRegistry(user, true) 109 } 110 111 // Describe implements the Collector interface 112 func (m *ManagerMetrics) Describe(out chan<- *prometheus.Desc) { 113 out <- m.EvalDuration 114 out <- m.IterationDuration 115 out <- m.IterationsMissed 116 out <- m.IterationsScheduled 117 out <- m.EvalTotal 118 out <- m.EvalFailures 119 out <- m.GroupInterval 120 out <- m.GroupLastEvalTime 121 out <- m.GroupLastDuration 122 out <- m.GroupRules 123 out <- m.GroupLastEvalSamples 124 } 125 126 // Collect implements the Collector interface 127 func (m *ManagerMetrics) Collect(out chan<- prometheus.Metric) { 128 data := m.regs.BuildMetricFamiliesPerUser() 129 130 // WARNING: It is important that all metrics generated in this method are "Per User". 131 // Thanks to that we can actually *remove* metrics for given user (see RemoveUserRegistry). 132 // If same user is later re-added, all metrics will start from 0, which is fine. 133 134 data.SendSumOfSummariesPerUser(out, m.EvalDuration, "prometheus_rule_evaluation_duration_seconds") 135 data.SendSumOfSummariesPerUser(out, m.IterationDuration, "prometheus_rule_group_duration_seconds") 136 137 data.SendSumOfCountersPerUserWithLabels(out, m.IterationsMissed, "prometheus_rule_group_iterations_missed_total", "rule_group") 138 data.SendSumOfCountersPerUserWithLabels(out, m.IterationsScheduled, "prometheus_rule_group_iterations_total", "rule_group") 139 data.SendSumOfCountersPerUserWithLabels(out, m.EvalTotal, "prometheus_rule_evaluations_total", "rule_group") 140 data.SendSumOfCountersPerUserWithLabels(out, m.EvalFailures, "prometheus_rule_evaluation_failures_total", "rule_group") 141 data.SendSumOfGaugesPerUserWithLabels(out, m.GroupInterval, "prometheus_rule_group_interval_seconds", "rule_group") 142 data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastEvalTime, "prometheus_rule_group_last_evaluation_timestamp_seconds", "rule_group") 143 data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastDuration, "prometheus_rule_group_last_duration_seconds", "rule_group") 144 data.SendSumOfGaugesPerUserWithLabels(out, m.GroupRules, "prometheus_rule_group_rules", "rule_group") 145 data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastEvalSamples, "prometheus_rule_group_last_evaluation_samples", "rule_group") 146 }