zotregistry.io/zot@v1.4.4-0.20231124084042-02a8ed785457/pkg/extensions/monitoring/minimal.go (about)

     1  //go:build !metrics
     2  // +build !metrics
     3  
     4  //nolint:varnamelen,forcetypeassert
     5  package monitoring
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  	"path"
    11  	"strconv"
    12  	"sync"
    13  	"time"
    14  
    15  	"zotregistry.io/zot/pkg/log"
    16  )
    17  
    18  const (
    19  	metricsNamespace = "zot"
    20  	// Counters.
    21  	httpConnRequests = metricsNamespace + ".http.requests"
    22  	repoDownloads    = metricsNamespace + ".repo.downloads"
    23  	repoUploads      = metricsNamespace + ".repo.uploads"
    24  	// Gauge.
    25  	repoStorageBytes = metricsNamespace + ".repo.storage.bytes"
    26  	serverInfo       = metricsNamespace + ".info"
    27  	// Summary.
    28  	httpRepoLatencySeconds = metricsNamespace + ".http.repo.latency.seconds"
    29  	// Histogram.
    30  	httpMethodLatencySeconds  = metricsNamespace + ".http.method.latency.seconds"
    31  	storageLockLatencySeconds = metricsNamespace + ".storage.lock.latency.seconds"
    32  
    33  	metricsScrapeTimeout       = 2 * time.Minute
    34  	metricsScrapeCheckInterval = 30 * time.Second
    35  )
    36  
    37  type metricServer struct {
    38  	enabled    bool
    39  	lastCheck  time.Time
    40  	reqChan    chan interface{}
    41  	cache      *MetricsInfo
    42  	cacheChan  chan *MetricsInfo
    43  	bucketsF2S map[float64]string // float64 to string conversion of buckets label
    44  	log        log.Logger
    45  	lock       *sync.RWMutex
    46  }
    47  
    48  type MetricsInfo struct {
    49  	Counters   []*CounterValue
    50  	Gauges     []*GaugeValue
    51  	Summaries  []*SummaryValue
    52  	Histograms []*HistogramValue
    53  }
    54  
    55  // CounterValue stores info about a metric that is incremented over time,
    56  // such as the number of requests to an HTTP endpoint.
    57  type CounterValue struct {
    58  	Name        string
    59  	Count       int
    60  	LabelNames  []string
    61  	LabelValues []string
    62  }
    63  
    64  // GaugeValue stores one value that is updated as time goes on, such as
    65  // the amount of memory allocated.
    66  type GaugeValue struct {
    67  	Name        string
    68  	Value       float64
    69  	LabelNames  []string
    70  	LabelValues []string
    71  }
    72  
    73  // SummaryValue stores info about a metric that is incremented over time,
    74  // such as the number of requests to an HTTP endpoint.
    75  type SummaryValue struct {
    76  	Name        string
    77  	Count       int
    78  	Sum         float64
    79  	LabelNames  []string
    80  	LabelValues []string
    81  }
    82  
    83  type HistogramValue struct {
    84  	Name        string
    85  	Count       int
    86  	Sum         float64
    87  	Buckets     map[string]int
    88  	LabelNames  []string
    89  	LabelValues []string
    90  }
    91  
    92  func GetDefaultBuckets() []float64 {
    93  	return []float64{.05, .5, 1, 5, 30, 60, 600, math.MaxFloat64}
    94  }
    95  
    96  func GetStorageLatencyBuckets() []float64 {
    97  	return []float64{.001, .01, 0.1, 1, 5, 10, 15, 30, 60, math.MaxFloat64}
    98  }
    99  
   100  // implements the MetricServer interface.
   101  func (ms *metricServer) SendMetric(metric interface{}) {
   102  	ms.lock.RLock()
   103  	if ms.enabled {
   104  		ms.lock.RUnlock()
   105  		ms.reqChan <- metric
   106  	} else {
   107  		ms.lock.RUnlock()
   108  	}
   109  }
   110  
   111  func (ms *metricServer) ForceSendMetric(metric interface{}) {
   112  	ms.reqChan <- metric
   113  }
   114  
   115  func (ms *metricServer) ReceiveMetrics() interface{} {
   116  	ms.lock.Lock()
   117  	if !ms.enabled {
   118  		ms.enabled = true
   119  	}
   120  	ms.lock.Unlock()
   121  	ms.cacheChan <- &MetricsInfo{}
   122  
   123  	return <-ms.cacheChan
   124  }
   125  
   126  func (ms *metricServer) IsEnabled() bool {
   127  	ms.lock.RLock()
   128  	defer ms.lock.RUnlock()
   129  
   130  	return ms.enabled
   131  }
   132  
   133  func (ms *metricServer) Run() {
   134  	sendAfter := make(chan time.Duration, 1)
   135  	// periodically send a notification to the metric server to check if we can disable metrics
   136  	go func() {
   137  		for {
   138  			t := metricsScrapeCheckInterval
   139  			time.Sleep(t)
   140  			sendAfter <- t
   141  		}
   142  	}()
   143  
   144  	for {
   145  		select {
   146  		case <-ms.cacheChan:
   147  			ms.lastCheck = time.Now()
   148  			ms.cacheChan <- ms.cache
   149  		case m := <-ms.reqChan:
   150  			switch v := m.(type) {
   151  			case CounterValue:
   152  				cv := m.(CounterValue)
   153  				ms.CounterInc(&cv)
   154  			case GaugeValue:
   155  				gv := m.(GaugeValue)
   156  				ms.GaugeSet(&gv)
   157  			case SummaryValue:
   158  				sv := m.(SummaryValue)
   159  				ms.SummaryObserve(&sv)
   160  			case HistogramValue:
   161  				hv := m.(HistogramValue)
   162  				ms.HistogramObserve(&hv)
   163  			default:
   164  				ms.log.Error().Str("type", fmt.Sprintf("%T", v)).Msg("unexpected type")
   165  			}
   166  		case <-sendAfter:
   167  			// Check if we didn't receive a metrics scrape in a while and if so,
   168  			// disable metrics (possible node exporter down/crashed)
   169  			ms.lock.Lock()
   170  			if ms.enabled {
   171  				lastCheckInterval := time.Since(ms.lastCheck)
   172  				if lastCheckInterval > metricsScrapeTimeout {
   173  					ms.enabled = false
   174  				}
   175  			}
   176  			ms.lock.Unlock()
   177  		}
   178  	}
   179  }
   180  
   181  func NewMetricsServer(enabled bool, log log.Logger) MetricServer {
   182  	mi := &MetricsInfo{
   183  		Counters:   make([]*CounterValue, 0),
   184  		Gauges:     make([]*GaugeValue, 0),
   185  		Summaries:  make([]*SummaryValue, 0),
   186  		Histograms: make([]*HistogramValue, 0),
   187  	}
   188  	// convert to a map for returning easily the string corresponding to a bucket
   189  	bucketsFloat2String := map[float64]string{}
   190  
   191  	for _, fvalue := range append(GetDefaultBuckets(), GetStorageLatencyBuckets()...) {
   192  		if fvalue == math.MaxFloat64 {
   193  			bucketsFloat2String[fvalue] = "+Inf"
   194  		} else {
   195  			s := strconv.FormatFloat(fvalue, 'f', -1, 64)
   196  			bucketsFloat2String[fvalue] = s
   197  		}
   198  	}
   199  
   200  	ms := &metricServer{
   201  		enabled:    enabled,
   202  		reqChan:    make(chan interface{}),
   203  		cacheChan:  make(chan *MetricsInfo),
   204  		cache:      mi,
   205  		bucketsF2S: bucketsFloat2String,
   206  		log:        log,
   207  		lock:       &sync.RWMutex{},
   208  	}
   209  
   210  	go ms.Run()
   211  
   212  	return ms
   213  }
   214  
   215  // contains a map with key=CounterName and value=CounterLabels.
   216  func GetCounters() map[string][]string {
   217  	return map[string][]string{
   218  		httpConnRequests: {"method", "code"},
   219  		repoDownloads:    {"repo"},
   220  		repoUploads:      {"repo"},
   221  	}
   222  }
   223  
   224  func GetGauges() map[string][]string {
   225  	return map[string][]string{
   226  		repoStorageBytes: {"repo"},
   227  		serverInfo:       {"commit", "binaryType", "goVersion", "version"},
   228  	}
   229  }
   230  
   231  func GetSummaries() map[string][]string {
   232  	return map[string][]string{
   233  		httpRepoLatencySeconds: {"repo"},
   234  	}
   235  }
   236  
   237  func GetHistograms() map[string][]string {
   238  	return map[string][]string{
   239  		httpMethodLatencySeconds:  {"method"},
   240  		storageLockLatencySeconds: {"storageName", "lockType"},
   241  	}
   242  }
   243  
   244  // return true if a metric does not have any labels or if the label
   245  // values for searched metric corresponds to the one in the cached slice.
   246  func isMetricMatch(lValues, metricValues []string) bool {
   247  	if len(lValues) == len(metricValues) {
   248  		for i, v := range metricValues {
   249  			if v != lValues[i] {
   250  				return false
   251  			}
   252  		}
   253  	}
   254  
   255  	return true
   256  }
   257  
   258  // returns {-1, false} in case metric was not found in the slice.
   259  func findCounterValueIndex(metricSlice []*CounterValue, name string, labelValues []string) (int, bool) {
   260  	for i, m := range metricSlice {
   261  		if m.Name == name {
   262  			if isMetricMatch(labelValues, m.LabelValues) {
   263  				return i, true
   264  			}
   265  		}
   266  	}
   267  
   268  	return -1, false
   269  }
   270  
   271  // returns {-1, false} in case metric was not found in the slice.
   272  func findGaugeValueIndex(metricSlice []*GaugeValue, name string, labelValues []string) (int, bool) {
   273  	for i, m := range metricSlice {
   274  		if m.Name == name {
   275  			if isMetricMatch(labelValues, m.LabelValues) {
   276  				return i, true
   277  			}
   278  		}
   279  	}
   280  
   281  	return -1, false
   282  }
   283  
   284  // returns {-1, false} in case metric was not found in the slice.
   285  func findSummaryValueIndex(metricSlice []*SummaryValue, name string, labelValues []string) (int, bool) {
   286  	for i, m := range metricSlice {
   287  		if m.Name == name {
   288  			if isMetricMatch(labelValues, m.LabelValues) {
   289  				return i, true
   290  			}
   291  		}
   292  	}
   293  
   294  	return -1, false
   295  }
   296  
   297  // returns {-1, false} in case metric was not found in the slice.
   298  func findHistogramValueIndex(metricSlice []*HistogramValue, name string, labelValues []string) (int, bool) {
   299  	for i, m := range metricSlice {
   300  		if m.Name == name {
   301  			if isMetricMatch(labelValues, m.LabelValues) {
   302  				return i, true
   303  			}
   304  		}
   305  	}
   306  
   307  	return -1, false
   308  }
   309  
   310  func (ms *metricServer) CounterInc(cv *CounterValue) {
   311  	labels, ok := GetCounters()[cv.Name] // known label names for the 'name' counter
   312  
   313  	err := sanityChecks(cv.Name, labels, ok, cv.LabelNames, cv.LabelValues)
   314  	if err != nil {
   315  		// The last thing we want is to panic/stop the server due to instrumentation
   316  		// thus log a message (should be detected during development of new metrics)
   317  		ms.log.Error().Err(err).Msg("Instrumentation error")
   318  
   319  		return
   320  	}
   321  
   322  	index, ok := findCounterValueIndex(ms.cache.Counters, cv.Name, cv.LabelValues)
   323  	if !ok {
   324  		// cv not found in cache: add it
   325  		cv.Count = 1
   326  		ms.cache.Counters = append(ms.cache.Counters, cv)
   327  	} else {
   328  		ms.cache.Counters[index].Count++
   329  	}
   330  }
   331  
   332  func (ms *metricServer) GaugeSet(gv *GaugeValue) {
   333  	labels, ok := GetGauges()[gv.Name] // known label names for the 'name' counter
   334  
   335  	err := sanityChecks(gv.Name, labels, ok, gv.LabelNames, gv.LabelValues)
   336  	if err != nil {
   337  		ms.log.Error().Err(err).Msg("Instrumentation error")
   338  
   339  		return
   340  	}
   341  
   342  	index, ok := findGaugeValueIndex(ms.cache.Gauges, gv.Name, gv.LabelValues)
   343  	if !ok {
   344  		// gv not found in cache: add it
   345  		ms.cache.Gauges = append(ms.cache.Gauges, gv)
   346  	} else {
   347  		ms.cache.Gauges[index].Value = gv.Value
   348  	}
   349  }
   350  
   351  func (ms *metricServer) SummaryObserve(sv *SummaryValue) {
   352  	labels, ok := GetSummaries()[sv.Name] // known label names for the 'name' summary
   353  
   354  	err := sanityChecks(sv.Name, labels, ok, sv.LabelNames, sv.LabelValues)
   355  	if err != nil {
   356  		ms.log.Error().Err(err).Msg("Instrumentation error")
   357  
   358  		return
   359  	}
   360  
   361  	index, ok := findSummaryValueIndex(ms.cache.Summaries, sv.Name, sv.LabelValues)
   362  	if !ok {
   363  		// The SampledValue not found: add it
   364  		sv.Count = 1 // First value, no need to increment
   365  		ms.cache.Summaries = append(ms.cache.Summaries, sv)
   366  	} else {
   367  		ms.cache.Summaries[index].Count++
   368  		ms.cache.Summaries[index].Sum += sv.Sum
   369  	}
   370  }
   371  
   372  func (ms *metricServer) HistogramObserve(hv *HistogramValue) {
   373  	labels, ok := GetHistograms()[hv.Name] // known label names for the 'name' counter
   374  
   375  	err := sanityChecks(hv.Name, labels, ok, hv.LabelNames, hv.LabelValues)
   376  	if err != nil {
   377  		ms.log.Error().Err(err).Msg("Instrumentation error")
   378  
   379  		return
   380  	}
   381  
   382  	index, ok := findHistogramValueIndex(ms.cache.Histograms, hv.Name, hv.LabelValues)
   383  	if !ok {
   384  		// The HistogramValue not found: add it
   385  		buckets := make(map[string]int)
   386  
   387  		for _, fvalue := range GetBuckets(hv.Name) {
   388  			if hv.Sum <= fvalue {
   389  				buckets[ms.bucketsF2S[fvalue]] = 1
   390  			} else {
   391  				buckets[ms.bucketsF2S[fvalue]] = 0
   392  			}
   393  		}
   394  
   395  		hv.Count = 1 // First value, no need to increment
   396  		hv.Buckets = buckets
   397  		ms.cache.Histograms = append(ms.cache.Histograms, hv)
   398  	} else {
   399  		cachedH := ms.cache.Histograms[index]
   400  		cachedH.Count++
   401  		cachedH.Sum += hv.Sum
   402  		for _, fvalue := range GetBuckets(hv.Name) {
   403  			if hv.Sum <= fvalue {
   404  				cachedH.Buckets[ms.bucketsF2S[fvalue]]++
   405  			}
   406  		}
   407  	}
   408  }
   409  
   410  //nolint:goerr113
   411  func sanityChecks(name string, knownLabels []string, found bool, labelNames, labelValues []string) error {
   412  	if !found {
   413  		return fmt.Errorf("metric %s: not found", name)
   414  	}
   415  
   416  	if len(labelNames) != len(labelValues) ||
   417  		len(labelNames) != len(knownLabels) {
   418  		return fmt.Errorf("metric %s: label size mismatch", name)
   419  	}
   420  	// The list of label names defined in init() for the counter must match what was provided in labelNames
   421  	for i, label := range labelNames {
   422  		if label != knownLabels[i] {
   423  			return fmt.Errorf("metric %s: label size mismatch", name)
   424  		}
   425  	}
   426  
   427  	return nil
   428  }
   429  
   430  func IncHTTPConnRequests(ms MetricServer, lvs ...string) {
   431  	req := CounterValue{
   432  		Name:        httpConnRequests,
   433  		LabelNames:  []string{"method", "code"},
   434  		LabelValues: lvs,
   435  	}
   436  	ms.SendMetric(req)
   437  }
   438  
   439  func ObserveHTTPRepoLatency(ms MetricServer, path string, latency time.Duration) {
   440  	var lvs []string
   441  	match := re.FindStringSubmatch(path)
   442  
   443  	if len(match) > 1 {
   444  		lvs = []string{match[1]}
   445  	} else {
   446  		lvs = []string{"N/A"}
   447  	}
   448  
   449  	sv := SummaryValue{
   450  		Name:        httpRepoLatencySeconds,
   451  		Sum:         latency.Seconds(),
   452  		LabelNames:  []string{"repo"},
   453  		LabelValues: lvs,
   454  	}
   455  	ms.SendMetric(sv)
   456  }
   457  
   458  func ObserveHTTPMethodLatency(ms MetricServer, method string, latency time.Duration) {
   459  	h := HistogramValue{
   460  		Name:        httpMethodLatencySeconds,
   461  		Sum:         latency.Seconds(), // convenient temporary store for Histogram latency value
   462  		LabelNames:  []string{"method"},
   463  		LabelValues: []string{method},
   464  	}
   465  	ms.SendMetric(h)
   466  }
   467  
   468  func IncDownloadCounter(ms MetricServer, repo string) {
   469  	dCounter := CounterValue{
   470  		Name:        repoDownloads,
   471  		LabelNames:  []string{"repo"},
   472  		LabelValues: []string{repo},
   473  	}
   474  	ms.SendMetric(dCounter)
   475  }
   476  
   477  func IncUploadCounter(ms MetricServer, repo string) {
   478  	uCounter := CounterValue{
   479  		Name:        repoUploads,
   480  		LabelNames:  []string{"repo"},
   481  		LabelValues: []string{repo},
   482  	}
   483  	ms.SendMetric(uCounter)
   484  }
   485  
   486  func SetStorageUsage(ms MetricServer, rootDir, repo string) {
   487  	dir := path.Join(rootDir, repo)
   488  
   489  	repoSize, err := GetDirSize(dir)
   490  	if err != nil {
   491  		ms.(*metricServer).log.Error().Err(err).Msg("failed to set storage usage")
   492  	}
   493  
   494  	storage := GaugeValue{
   495  		Name:        repoStorageBytes,
   496  		Value:       float64(repoSize),
   497  		LabelNames:  []string{"repo"},
   498  		LabelValues: []string{repo},
   499  	}
   500  	ms.ForceSendMetric(storage)
   501  }
   502  
   503  func SetServerInfo(ms MetricServer, lvs ...string) {
   504  	info := GaugeValue{
   505  		Name:        serverInfo,
   506  		Value:       0,
   507  		LabelNames:  []string{"commit", "binaryType", "goVersion", "version"},
   508  		LabelValues: lvs,
   509  	}
   510  	// This metric is set once at zot startup (set it regardless of metrics enabled)
   511  	ms.ForceSendMetric(info)
   512  }
   513  
   514  func ObserveStorageLockLatency(ms MetricServer, latency time.Duration, storageName, lockType string) {
   515  	h := HistogramValue{
   516  		Name:        storageLockLatencySeconds,
   517  		Sum:         latency.Seconds(), // convenient temporary store for Histogram latency value
   518  		LabelNames:  []string{"storageName", "lockType"},
   519  		LabelValues: []string{storageName, lockType},
   520  	}
   521  	ms.SendMetric(h)
   522  }
   523  
   524  func GetMaxIdleScrapeInterval() time.Duration {
   525  	return metricsScrapeTimeout + metricsScrapeCheckInterval
   526  }
   527  
   528  func GetBuckets(metricName string) []float64 {
   529  	switch metricName {
   530  	case storageLockLatencySeconds:
   531  		return GetStorageLatencyBuckets()
   532  	default:
   533  		return GetDefaultBuckets()
   534  	}
   535  }