github.com/ethersphere/bee/v2@v2.2.0/pkg/storer/metrics.go (about)

     1  // Copyright 2023 The Swarm Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package storer
     6  
     7  import (
     8  	"context"
     9  	"errors"
    10  	"time"
    11  
    12  	m "github.com/ethersphere/bee/v2/pkg/metrics"
    13  	"github.com/ethersphere/bee/v2/pkg/storage"
    14  	"github.com/ethersphere/bee/v2/pkg/swarm"
    15  	"github.com/prometheus/client_golang/prometheus"
    16  )
    17  
    18  // metrics groups storer related prometheus counters.
    19  type metrics struct {
    20  	MethodCalls             prometheus.CounterVec
    21  	MethodCallsDuration     prometheus.HistogramVec
    22  	ReserveSize             prometheus.Gauge
    23  	ReserveSizeWithinRadius prometheus.Gauge
    24  	ReserveCleanup          prometheus.Counter
    25  	StorageRadius           prometheus.Gauge
    26  	CacheSize               prometheus.Gauge
    27  	EvictedChunkCount       prometheus.Counter
    28  	ExpiredChunkCount       prometheus.Counter
    29  	OverCapTriggerCount     prometheus.Counter
    30  	ExpiredBatchCount       prometheus.Counter
    31  	LevelDBStats            prometheus.HistogramVec
    32  	ExpiryTriggersCount     prometheus.Counter
    33  	ExpiryRunsCount         prometheus.Counter
    34  
    35  	ReserveMissingBatch prometheus.Gauge
    36  }
    37  
    38  // newMetrics is a convenient constructor for creating new metrics.
    39  func newMetrics() metrics {
    40  	const subsystem = "localstore"
    41  
    42  	return metrics{
    43  		MethodCalls: *prometheus.NewCounterVec(
    44  			prometheus.CounterOpts{
    45  				Namespace: m.Namespace,
    46  				Subsystem: subsystem,
    47  				Name:      "method_calls",
    48  				Help:      "Number of method calls.",
    49  			},
    50  			[]string{"component", "method", "status"},
    51  		),
    52  		MethodCallsDuration: *prometheus.NewHistogramVec(
    53  			prometheus.HistogramOpts{
    54  				Namespace: m.Namespace,
    55  				Subsystem: subsystem,
    56  				Name:      "method_calls_duration",
    57  				Help:      "Duration of method calls.",
    58  			},
    59  			[]string{"component", "method"},
    60  		),
    61  		ReserveSize: prometheus.NewGauge(
    62  			prometheus.GaugeOpts{
    63  				Namespace: m.Namespace,
    64  				Subsystem: subsystem,
    65  				Name:      "reserve_size",
    66  				Help:      "Number of chunks in reserve.",
    67  			},
    68  		),
    69  		ReserveMissingBatch: prometheus.NewGauge(
    70  			prometheus.GaugeOpts{
    71  				Namespace: m.Namespace,
    72  				Subsystem: subsystem,
    73  				Name:      "reserve_missing_batch",
    74  				Help:      "Number of chunks in reserve with missing batches.",
    75  			},
    76  		),
    77  		ReserveSizeWithinRadius: prometheus.NewGauge(
    78  			prometheus.GaugeOpts{
    79  				Namespace: m.Namespace,
    80  				Subsystem: subsystem,
    81  				Name:      "reserve_size_within_radius",
    82  				Help:      "Number of chunks in reserve with proximity >= storage radius.",
    83  			},
    84  		),
    85  		ReserveCleanup: prometheus.NewCounter(
    86  			prometheus.CounterOpts{
    87  				Namespace: m.Namespace,
    88  				Subsystem: subsystem,
    89  				Name:      "reserve_cleanup",
    90  				Help:      "Number of cleaned-up expired chunks.",
    91  			},
    92  		),
    93  		StorageRadius: prometheus.NewGauge(
    94  			prometheus.GaugeOpts{
    95  				Namespace: m.Namespace,
    96  				Subsystem: subsystem,
    97  				Name:      "storage_radius",
    98  				Help:      "Radius of responsibility reserve storage.",
    99  			},
   100  		),
   101  		CacheSize: prometheus.NewGauge(
   102  			prometheus.GaugeOpts{
   103  				Namespace: m.Namespace,
   104  				Subsystem: subsystem,
   105  				Name:      "cache_size",
   106  				Help:      "Number of chunks in cache.",
   107  			},
   108  		),
   109  		EvictedChunkCount: prometheus.NewCounter(
   110  			prometheus.CounterOpts{
   111  				Namespace: m.Namespace,
   112  				Subsystem: subsystem,
   113  				Name:      "evicted_count",
   114  				Help:      "Number of chunks evicted from reserve.",
   115  			},
   116  		),
   117  		ExpiredChunkCount: prometheus.NewCounter(
   118  			prometheus.CounterOpts{
   119  				Namespace: m.Namespace,
   120  				Subsystem: subsystem,
   121  				Name:      "expired_count",
   122  				Help:      "Number of chunks expired from reserve due to stamp expirations.",
   123  			},
   124  		),
   125  		OverCapTriggerCount: prometheus.NewCounter(
   126  			prometheus.CounterOpts{
   127  				Namespace: m.Namespace,
   128  				Subsystem: subsystem,
   129  				Name:      "over_cap_trigger_count",
   130  				Help:      "Number of times the reserve was over capacity and triggered an eviction.",
   131  			},
   132  		),
   133  		ExpiredBatchCount: prometheus.NewCounter(
   134  			prometheus.CounterOpts{
   135  				Namespace: m.Namespace,
   136  				Subsystem: subsystem,
   137  				Name:      "expired_batch_count",
   138  				Help:      "Number of batches expired, that were processed.",
   139  			},
   140  		),
   141  		LevelDBStats: *prometheus.NewHistogramVec(
   142  			prometheus.HistogramOpts{
   143  				Namespace: m.Namespace,
   144  				Subsystem: subsystem,
   145  				Name:      "leveldb_stats",
   146  				Help:      "LevelDB statistics.",
   147  			},
   148  			[]string{"counter"},
   149  		),
   150  		ExpiryTriggersCount: prometheus.NewCounter(
   151  			prometheus.CounterOpts{
   152  				Namespace: m.Namespace,
   153  				Subsystem: subsystem,
   154  				Name:      "expiry_trigger_count",
   155  				Help:      "Number of batches expiry triggers.",
   156  			},
   157  		),
   158  		ExpiryRunsCount: prometheus.NewCounter(
   159  			prometheus.CounterOpts{
   160  				Namespace: m.Namespace,
   161  				Subsystem: subsystem,
   162  				Name:      "expiry_run_count",
   163  				Help:      "Number of times the expiry worker was fired.",
   164  			},
   165  		),
   166  	}
   167  }
   168  
   169  var _ storage.Putter = (*putterWithMetrics)(nil)
   170  
   171  // putterWithMetrics wraps storage.Putter and adds metrics.
   172  type putterWithMetrics struct {
   173  	storage.Putter
   174  
   175  	metrics   metrics
   176  	component string
   177  }
   178  
   179  func (m putterWithMetrics) Put(ctx context.Context, chunk swarm.Chunk) error {
   180  	dur := captureDuration(time.Now())
   181  	err := m.Putter.Put(ctx, chunk)
   182  	m.metrics.MethodCallsDuration.WithLabelValues(m.component, "Put").Observe(dur())
   183  	if err == nil {
   184  		m.metrics.MethodCalls.WithLabelValues(m.component, "Put", "success").Inc()
   185  	} else {
   186  		m.metrics.MethodCalls.WithLabelValues(m.component, "Put", "failure").Inc()
   187  	}
   188  	return err
   189  }
   190  
   191  var _ storage.Getter = (*getterWithMetrics)(nil)
   192  
   193  // putterWithMetrics wraps storage.Putter and adds metrics.
   194  type getterWithMetrics struct {
   195  	storage.Getter
   196  
   197  	metrics   metrics
   198  	component string
   199  }
   200  
   201  func (m getterWithMetrics) Get(ctx context.Context, address swarm.Address) (swarm.Chunk, error) {
   202  	dur := captureDuration(time.Now())
   203  	chunk, err := m.Getter.Get(ctx, address)
   204  	m.metrics.MethodCallsDuration.WithLabelValues(m.component, "Get").Observe(dur())
   205  	if err == nil || errors.Is(err, storage.ErrNotFound) {
   206  		m.metrics.MethodCalls.WithLabelValues(m.component, "Get", "success").Inc()
   207  	} else {
   208  		m.metrics.MethodCalls.WithLabelValues(m.component, "Get", "failure").Inc()
   209  	}
   210  	return chunk, err
   211  }
   212  
   213  // captureDuration returns a function that returns the duration since the given start.
   214  func captureDuration(start time.Time) func() float64 {
   215  	return func() float64 { return time.Since(start).Seconds() }
   216  }