zotregistry.dev/zot@v1.4.4-0.20240314164342-eec277e14d20/pkg/extensions/monitoring/extension.go (about)

     1  //go:build metrics
     2  // +build metrics
     3  
     4  package monitoring
     5  
     6  import (
     7  	"path"
     8  	"time"
     9  
    10  	"github.com/prometheus/client_golang/prometheus"
    11  	"github.com/prometheus/client_golang/prometheus/promauto"
    12  
    13  	"zotregistry.dev/zot/errors"
    14  	"zotregistry.dev/zot/pkg/log"
    15  )
    16  
    17  const metricsNamespace = "zot"
    18  
    19  var (
    20  	httpConnRequests = promauto.NewCounterVec( //nolint: gochecknoglobals
    21  		prometheus.CounterOpts{
    22  			Namespace: metricsNamespace,
    23  			Name:      "http_requests_total",
    24  			Help:      "Total number of http request in zot",
    25  		},
    26  		[]string{"method", "code"},
    27  	)
    28  	httpRepoLatency = promauto.NewSummaryVec( //nolint: gochecknoglobals
    29  		prometheus.SummaryOpts{
    30  			Namespace: metricsNamespace,
    31  			Name:      "http_repo_latency_seconds",
    32  			Help:      "Latency of serving HTTP requests",
    33  		},
    34  		[]string{"repo"},
    35  	)
    36  	httpMethodLatency = promauto.NewHistogramVec( //nolint: gochecknoglobals
    37  		prometheus.HistogramOpts{
    38  			Namespace: metricsNamespace,
    39  			Name:      "http_method_latency_seconds",
    40  			Help:      "Latency of serving HTTP requests",
    41  			Buckets:   GetDefaultBuckets(),
    42  		},
    43  		[]string{"method"},
    44  	)
    45  	repoStorageBytes = promauto.NewGaugeVec( //nolint: gochecknoglobals
    46  		prometheus.GaugeOpts{
    47  			Namespace: metricsNamespace,
    48  			Name:      "repo_storage_bytes",
    49  			Help:      "Storage used per zot repo",
    50  		},
    51  		[]string{"repo"},
    52  	)
    53  	uploadCounter = promauto.NewCounterVec( //nolint: gochecknoglobals
    54  		prometheus.CounterOpts{
    55  			Namespace: metricsNamespace,
    56  			Name:      "repo_uploads_total",
    57  			Help:      "Total number times an image was uploaded",
    58  		},
    59  		[]string{"repo"},
    60  	)
    61  	downloadCounter = promauto.NewCounterVec( //nolint: gochecknoglobals
    62  		prometheus.CounterOpts{
    63  			Namespace: metricsNamespace,
    64  			Name:      "repo_downloads_total",
    65  			Help:      "Total number times an image was downloaded",
    66  		},
    67  		[]string{"repo"},
    68  	)
    69  	serverInfo = promauto.NewGaugeVec( //nolint: gochecknoglobals
    70  		prometheus.GaugeOpts{
    71  			Namespace: metricsNamespace,
    72  			Name:      "info",
    73  			Help:      "Server general information",
    74  		},
    75  		[]string{"commit", "binaryType", "goVersion", "version"},
    76  	)
    77  	storageLockLatency = promauto.NewHistogramVec( //nolint: gochecknoglobals
    78  		prometheus.HistogramOpts{
    79  			Namespace: metricsNamespace,
    80  			Name:      "storage_lock_latency_seconds",
    81  			Help:      "Latency of serving HTTP requests",
    82  			Buckets:   GetStorageLatencyBuckets(),
    83  		},
    84  		[]string{"storageName", "lockType"},
    85  	)
    86  	schedulerGenerators = promauto.NewCounter( //nolint: gochecknoglobals
    87  		prometheus.CounterOpts{
    88  			Namespace: metricsNamespace,
    89  			Name:      "scheduler_generators_total",
    90  			Help:      "Total number of generators registered in scheduler",
    91  		},
    92  	)
    93  	schedulerGeneratorsStatus = promauto.NewGaugeVec( //nolint: gochecknoglobals
    94  		prometheus.GaugeOpts{
    95  			Namespace: metricsNamespace,
    96  			Name:      "scheduler_generators_status",
    97  			Help:      "Scheduler generators by priority & state",
    98  		},
    99  		[]string{"priority", "state"},
   100  	)
   101  	schedulerNumWorkers = promauto.NewGauge( //nolint: gochecknoglobals
   102  		prometheus.GaugeOpts{ //nolint: promlinter
   103  			Namespace: metricsNamespace,
   104  			Name:      "scheduler_workers_total",
   105  			Help:      "Total number of available workers to perform scheduler tasks",
   106  		},
   107  	)
   108  	schedulerWorkers = promauto.NewGaugeVec( //nolint: gochecknoglobals
   109  		prometheus.GaugeOpts{
   110  			Namespace: metricsNamespace,
   111  			Name:      "scheduler_workers",
   112  			Help:      "Scheduler workers state",
   113  		},
   114  		[]string{"state"},
   115  	)
   116  	schedulerTasksQueue = promauto.NewGaugeVec( //nolint: gochecknoglobals
   117  		prometheus.GaugeOpts{
   118  			Namespace: metricsNamespace,
   119  			Name:      "scheduler_tasksqueue_length",
   120  			Help:      "Number of tasks waiting in the queue to pe processed by scheduler workers",
   121  		},
   122  		[]string{"priority"},
   123  	)
   124  	workersTasksDuration = promauto.NewHistogramVec( //nolint: gochecknoglobals
   125  		prometheus.HistogramOpts{
   126  			Namespace: metricsNamespace,
   127  			Name:      "scheduler_workers_tasks_duration_seconds",
   128  			Help:      "How long it takes for a worker to execute a task",
   129  			Buckets:   GetDefaultBuckets(),
   130  		},
   131  		[]string{"name"},
   132  	)
   133  )
   134  
   135  type metricServer struct {
   136  	enabled bool
   137  	log     log.Logger
   138  }
   139  
   140  func GetDefaultBuckets() []float64 {
   141  	return []float64{.05, .5, 1, 5, 30, 60, 600}
   142  }
   143  
   144  func GetStorageLatencyBuckets() []float64 {
   145  	return []float64{.001, .01, 0.1, 1, 5, 10, 15, 30, 60}
   146  }
   147  
   148  func NewMetricsServer(enabled bool, log log.Logger) MetricServer {
   149  	return &metricServer{
   150  		enabled: enabled,
   151  		log:     log,
   152  	}
   153  }
   154  
   155  // implementing the MetricServer interface.
   156  func (ms *metricServer) SendMetric(mfunc interface{}) {
   157  	if ms.enabled {
   158  		mfn, ok := mfunc.(func())
   159  		if !ok {
   160  			ms.log.Error().Err(errors.ErrInvalidMetric).
   161  				Msgf("failed to cast type, expected '%T' but got '%T'", func() {}, mfunc)
   162  
   163  			return
   164  		}
   165  
   166  		mfn()
   167  	}
   168  }
   169  
   170  func (ms *metricServer) ForceSendMetric(mfunc interface{}) {
   171  	mfn, ok := mfunc.(func())
   172  	if !ok {
   173  		ms.log.Error().Err(errors.ErrInvalidMetric).
   174  			Msgf("failed to cast type, expected '%T' but got '%T'", func() {}, mfunc)
   175  
   176  		return
   177  	}
   178  
   179  	mfn()
   180  }
   181  
   182  func (ms *metricServer) ReceiveMetrics() interface{} {
   183  	return nil
   184  }
   185  
   186  func (ms *metricServer) IsEnabled() bool {
   187  	return ms.enabled
   188  }
   189  
   190  func IncHTTPConnRequests(ms MetricServer, lvalues ...string) {
   191  	ms.SendMetric(func() {
   192  		httpConnRequests.WithLabelValues(lvalues...).Inc()
   193  	})
   194  }
   195  
   196  func ObserveHTTPRepoLatency(ms MetricServer, path string, latency time.Duration) {
   197  	ms.SendMetric(func() {
   198  		match := re.FindStringSubmatch(path)
   199  
   200  		if len(match) > 1 {
   201  			httpRepoLatency.WithLabelValues(match[1]).Observe(latency.Seconds())
   202  		} else {
   203  			httpRepoLatency.WithLabelValues("N/A").Observe(latency.Seconds())
   204  		}
   205  	})
   206  }
   207  
   208  func ObserveHTTPMethodLatency(ms MetricServer, method string, latency time.Duration) {
   209  	ms.SendMetric(func() {
   210  		httpMethodLatency.WithLabelValues(method).Observe(latency.Seconds())
   211  	})
   212  }
   213  
   214  func IncDownloadCounter(ms MetricServer, repo string) {
   215  	ms.SendMetric(func() {
   216  		downloadCounter.WithLabelValues(repo).Inc()
   217  	})
   218  }
   219  
   220  func SetStorageUsage(ms MetricServer, rootDir, repo string) {
   221  	ms.ForceSendMetric(func() {
   222  		dir := path.Join(rootDir, repo)
   223  		repoSize, err := GetDirSize(dir)
   224  
   225  		if err == nil {
   226  			repoStorageBytes.WithLabelValues(repo).Set(float64(repoSize))
   227  		}
   228  	})
   229  }
   230  
   231  func IncUploadCounter(ms MetricServer, repo string) {
   232  	ms.SendMetric(func() {
   233  		uploadCounter.WithLabelValues(repo).Inc()
   234  	})
   235  }
   236  
   237  func SetServerInfo(ms MetricServer, lvalues ...string) {
   238  	ms.ForceSendMetric(func() {
   239  		serverInfo.WithLabelValues(lvalues...).Set(0)
   240  	})
   241  }
   242  
   243  func ObserveStorageLockLatency(ms MetricServer, latency time.Duration, storageName, lockType string) {
   244  	ms.SendMetric(func() {
   245  		storageLockLatency.WithLabelValues(storageName, lockType).Observe(latency.Seconds())
   246  	})
   247  }
   248  
   249  func IncSchedulerGenerators(ms MetricServer) {
   250  	ms.ForceSendMetric(func() {
   251  		schedulerGenerators.Inc()
   252  	})
   253  }
   254  
   255  func SetSchedulerGenerators(ms MetricServer, gen map[string]map[string]uint64) {
   256  	ms.SendMetric(func() {
   257  		for priority, states := range gen {
   258  			for state, value := range states {
   259  				schedulerGeneratorsStatus.WithLabelValues(priority, state).Set(float64(value))
   260  			}
   261  		}
   262  	})
   263  }
   264  
   265  func SetSchedulerNumWorkers(ms MetricServer, total int) {
   266  	ms.SendMetric(func() {
   267  		schedulerNumWorkers.Set(float64(total))
   268  	})
   269  }
   270  
   271  func SetSchedulerWorkers(ms MetricServer, w map[string]int) {
   272  	ms.SendMetric(func() {
   273  		for state, value := range w {
   274  			schedulerWorkers.WithLabelValues(state).Set(float64(value))
   275  		}
   276  	})
   277  }
   278  
   279  func SetSchedulerTasksQueue(ms MetricServer, tq map[string]int) {
   280  	ms.SendMetric(func() {
   281  		for priority, value := range tq {
   282  			schedulerTasksQueue.WithLabelValues(priority).Set(float64(value))
   283  		}
   284  	})
   285  }
   286  
   287  func ObserveWorkersTasksDuration(ms MetricServer, taskName string, duration time.Duration) {
   288  	ms.SendMetric(func() {
   289  		workersTasksDuration.WithLabelValues(taskName).Observe(duration.Seconds())
   290  	})
   291  }