github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/ruler/manager_metrics.go (about)

     1  package ruler
     2  
     3  import (
     4  	"github.com/prometheus/client_golang/prometheus"
     5  
     6  	"github.com/cortexproject/cortex/pkg/util"
     7  )
     8  
     9  // ManagerMetrics aggregates metrics exported by the Prometheus
    10  // rules package and returns them as Cortex metrics
    11  type ManagerMetrics struct {
    12  	regs *util.UserRegistries
    13  
    14  	EvalDuration         *prometheus.Desc
    15  	IterationDuration    *prometheus.Desc
    16  	IterationsMissed     *prometheus.Desc
    17  	IterationsScheduled  *prometheus.Desc
    18  	EvalTotal            *prometheus.Desc
    19  	EvalFailures         *prometheus.Desc
    20  	GroupInterval        *prometheus.Desc
    21  	GroupLastEvalTime    *prometheus.Desc
    22  	GroupLastDuration    *prometheus.Desc
    23  	GroupRules           *prometheus.Desc
    24  	GroupLastEvalSamples *prometheus.Desc
    25  }
    26  
    27  // NewManagerMetrics returns a ManagerMetrics struct
    28  func NewManagerMetrics() *ManagerMetrics {
    29  	return &ManagerMetrics{
    30  		regs: util.NewUserRegistries(),
    31  
    32  		EvalDuration: prometheus.NewDesc(
    33  			"cortex_prometheus_rule_evaluation_duration_seconds",
    34  			"The duration for a rule to execute.",
    35  			[]string{"user"},
    36  			nil,
    37  		),
    38  		IterationDuration: prometheus.NewDesc(
    39  			"cortex_prometheus_rule_group_duration_seconds",
    40  			"The duration of rule group evaluations.",
    41  			[]string{"user"},
    42  			nil,
    43  		),
    44  		IterationsMissed: prometheus.NewDesc(
    45  			"cortex_prometheus_rule_group_iterations_missed_total",
    46  			"The total number of rule group evaluations missed due to slow rule group evaluation.",
    47  			[]string{"user", "rule_group"},
    48  			nil,
    49  		),
    50  		IterationsScheduled: prometheus.NewDesc(
    51  			"cortex_prometheus_rule_group_iterations_total",
    52  			"The total number of scheduled rule group evaluations, whether executed or missed.",
    53  			[]string{"user", "rule_group"},
    54  			nil,
    55  		),
    56  		EvalTotal: prometheus.NewDesc(
    57  			"cortex_prometheus_rule_evaluations_total",
    58  			"The total number of rule evaluations.",
    59  			[]string{"user", "rule_group"},
    60  			nil,
    61  		),
    62  		EvalFailures: prometheus.NewDesc(
    63  			"cortex_prometheus_rule_evaluation_failures_total",
    64  			"The total number of rule evaluation failures.",
    65  			[]string{"user", "rule_group"},
    66  			nil,
    67  		),
    68  		GroupInterval: prometheus.NewDesc(
    69  			"cortex_prometheus_rule_group_interval_seconds",
    70  			"The interval of a rule group.",
    71  			[]string{"user", "rule_group"},
    72  			nil,
    73  		),
    74  		GroupLastEvalTime: prometheus.NewDesc(
    75  			"cortex_prometheus_rule_group_last_evaluation_timestamp_seconds",
    76  			"The timestamp of the last rule group evaluation in seconds.",
    77  			[]string{"user", "rule_group"},
    78  			nil,
    79  		),
    80  		GroupLastDuration: prometheus.NewDesc(
    81  			"cortex_prometheus_rule_group_last_duration_seconds",
    82  			"The duration of the last rule group evaluation.",
    83  			[]string{"user", "rule_group"},
    84  			nil,
    85  		),
    86  		GroupRules: prometheus.NewDesc(
    87  			"cortex_prometheus_rule_group_rules",
    88  			"The number of rules.",
    89  			[]string{"user", "rule_group"},
    90  			nil,
    91  		),
    92  		GroupLastEvalSamples: prometheus.NewDesc(
    93  			"cortex_prometheus_last_evaluation_samples",
    94  			"The number of samples returned during the last rule group evaluation.",
    95  			[]string{"user", "rule_group"},
    96  			nil,
    97  		),
    98  	}
    99  }
   100  
   101  // AddUserRegistry adds a user-specific Prometheus registry.
   102  func (m *ManagerMetrics) AddUserRegistry(user string, reg *prometheus.Registry) {
   103  	m.regs.AddUserRegistry(user, reg)
   104  }
   105  
   106  // RemoveUserRegistry removes user-specific Prometheus registry.
   107  func (m *ManagerMetrics) RemoveUserRegistry(user string) {
   108  	m.regs.RemoveUserRegistry(user, true)
   109  }
   110  
   111  // Describe implements the Collector interface
   112  func (m *ManagerMetrics) Describe(out chan<- *prometheus.Desc) {
   113  	out <- m.EvalDuration
   114  	out <- m.IterationDuration
   115  	out <- m.IterationsMissed
   116  	out <- m.IterationsScheduled
   117  	out <- m.EvalTotal
   118  	out <- m.EvalFailures
   119  	out <- m.GroupInterval
   120  	out <- m.GroupLastEvalTime
   121  	out <- m.GroupLastDuration
   122  	out <- m.GroupRules
   123  	out <- m.GroupLastEvalSamples
   124  }
   125  
   126  // Collect implements the Collector interface
   127  func (m *ManagerMetrics) Collect(out chan<- prometheus.Metric) {
   128  	data := m.regs.BuildMetricFamiliesPerUser()
   129  
   130  	// WARNING: It is important that all metrics generated in this method are "Per User".
   131  	// Thanks to that we can actually *remove* metrics for given user (see RemoveUserRegistry).
   132  	// If same user is later re-added, all metrics will start from 0, which is fine.
   133  
   134  	data.SendSumOfSummariesPerUser(out, m.EvalDuration, "prometheus_rule_evaluation_duration_seconds")
   135  	data.SendSumOfSummariesPerUser(out, m.IterationDuration, "prometheus_rule_group_duration_seconds")
   136  
   137  	data.SendSumOfCountersPerUserWithLabels(out, m.IterationsMissed, "prometheus_rule_group_iterations_missed_total", "rule_group")
   138  	data.SendSumOfCountersPerUserWithLabels(out, m.IterationsScheduled, "prometheus_rule_group_iterations_total", "rule_group")
   139  	data.SendSumOfCountersPerUserWithLabels(out, m.EvalTotal, "prometheus_rule_evaluations_total", "rule_group")
   140  	data.SendSumOfCountersPerUserWithLabels(out, m.EvalFailures, "prometheus_rule_evaluation_failures_total", "rule_group")
   141  	data.SendSumOfGaugesPerUserWithLabels(out, m.GroupInterval, "prometheus_rule_group_interval_seconds", "rule_group")
   142  	data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastEvalTime, "prometheus_rule_group_last_evaluation_timestamp_seconds", "rule_group")
   143  	data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastDuration, "prometheus_rule_group_last_duration_seconds", "rule_group")
   144  	data.SendSumOfGaugesPerUserWithLabels(out, m.GroupRules, "prometheus_rule_group_rules", "rule_group")
   145  	data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastEvalSamples, "prometheus_rule_group_last_evaluation_samples", "rule_group")
   146  }