github.com/cozy/cozy-stack@v0.0.0-20240603063001-31110fa4cae1/pkg/metrics/workers.go (about)

     1  package metrics
     2  
     3  import (
     4  	"github.com/prometheus/client_golang/prometheus"
     5  )
     6  
     7  const (
     8  	// WorkerExecResultSuccess for success result label
     9  	WorkerExecResultSuccess = "success"
    10  	// WorkerExecResultErrored for errored result label
    11  	WorkerExecResultErrored = "errored"
    12  )
    13  
    14  // WorkerExecDurations is a histogram metric of the execution duration in
    15  // seconds of the workers labelled by worker type and result.
    16  var WorkerExecDurations = prometheus.NewHistogramVec(
    17  	prometheus.HistogramOpts{
    18  		Namespace: "workers",
    19  		Subsystem: "exec",
    20  		Name:      "durations",
    21  
    22  		Help: "Execution duration in seconds of the workers labelled by worker type and result.",
    23  
    24  		// A 30 seconds of granularity should be hopefully be enough. With 10
    25  		// buckets, it gives us a range from 0 to 5 minutes. We may readjust these
    26  		// parameters when we gather more metrics.
    27  		Buckets: prometheus.LinearBuckets(0, 30, 10),
    28  	},
    29  	[]string{"worker_type", "result"},
    30  )
    31  
    32  // WorkerExecCounter is a counter number of total executions, without counting
    33  // retries, of the workers labelled by worker type and result.
    34  var WorkerExecCounter = prometheus.NewCounterVec(
    35  	prometheus.CounterOpts{
    36  		Namespace: "workers",
    37  		Subsystem: "exec",
    38  		Name:      "count",
    39  
    40  		Help: `Number of total executions, without counting retries, of the workers labelled by
    41  worker type and result. This should be equivalent to the number of jobs consumed
    42  from the queue.`,
    43  	},
    44  	[]string{"worker_type", "result"},
    45  )
    46  
    47  // WorkerKonnectorExecDeleteCounter is a counter number of total executions, without counting
    48  // retries, of the konnectors jobs with the "accound_deleted: true" parameter
    49  var WorkerKonnectorExecDeleteCounter = prometheus.NewCounterVec(
    50  	prometheus.CounterOpts{
    51  		Namespace: "workers",
    52  		Subsystem: "konnectors",
    53  		Name:      "delete_count",
    54  
    55  		Help: `Number of konnectors executions, with the "account_deleted: true" parameter`,
    56  	},
    57  	[]string{"worker_type", "result"},
    58  )
    59  
    60  // WorkerExecTimeoutsCounter is a counter number of total timeouts,
    61  // labelled by worker type and slug.
    62  var WorkerExecTimeoutsCounter = prometheus.NewCounterVec(
    63  	prometheus.CounterOpts{
    64  		Namespace: "workers",
    65  		Subsystem: "exec",
    66  		Name:      "timeouts",
    67  
    68  		Help: `Number of total timeouts, of the workers labelled by worker type and slug.`,
    69  	},
    70  	[]string{"worker_type", "slug"},
    71  )
    72  
    73  // WorkerExecRetries is a histogram metric of the number of retries of the
    74  // workers labelled by worker type.
    75  var WorkerExecRetries = prometheus.NewHistogramVec(
    76  	prometheus.HistogramOpts{
    77  		Namespace: "workers",
    78  		Subsystem: "exec",
    79  		Name:      "retries",
    80  
    81  		Help: `Number of retries of the workers labelled by worker type.`,
    82  
    83  		// Execution count should usually not be greater than 5.
    84  		Buckets: prometheus.LinearBuckets(0, 1, 5),
    85  	},
    86  	[]string{"worker_type"},
    87  )
    88  
    89  // WorkersKonnectorsExecDurations is a histogram metric of the number of
    90  // execution durations of the commands executed for konnectors and services,
    91  // labelled by application slug
    92  var WorkersKonnectorsExecDurations = prometheus.NewHistogramVec(
    93  	prometheus.HistogramOpts{
    94  		Namespace: "workers",
    95  		Subsystem: "konnectors",
    96  		Name:      "durations",
    97  
    98  		Help: `Execution durations of the commands executed for konnectors and services,
    99  labelled by application slug. This should be a sub-duration of the
   100  workers_exec_durations for the "konnector" and "service" worker types, but offers
   101  a label by slug.`,
   102  
   103  		// Using the same buckets as WorkerExecDurations
   104  		Buckets: prometheus.LinearBuckets(0, 30, 10),
   105  	},
   106  	[]string{"slug", "result"},
   107  )
   108  
   109  func init() {
   110  	prometheus.MustRegister(
   111  		WorkerExecDurations,
   112  		WorkerExecCounter,
   113  		WorkerExecRetries,
   114  		WorkerExecTimeoutsCounter,
   115  		WorkerKonnectorExecDeleteCounter,
   116  
   117  		WorkersKonnectorsExecDurations,
   118  	)
   119  }