github.com/koko1123/flow-go-1@v0.29.6/module/metrics/herocache.go (about)

     1  package metrics
     2  
     3  import (
     4  	"fmt"
     5  
     6  	"github.com/prometheus/client_golang/prometheus"
     7  
     8  	"github.com/koko1123/flow-go-1/module"
     9  )
    10  
    11  const subsystemHeroCache = "hero_cache"
    12  
    13  var _ module.HeroCacheMetrics = (*HeroCacheCollector)(nil)
    14  
    15  type HeroCacheCollector struct {
    16  	histogramNormalizedBucketSlotAvailable prometheus.Histogram
    17  
    18  	countKeyGetSuccess prometheus.Counter
    19  	countKeyGetFailure prometheus.Counter
    20  
    21  	countKeyPutSuccess      prometheus.Counter
    22  	countKeyPutDrop         prometheus.Counter
    23  	countKeyPutDeduplicated prometheus.Counter
    24  	countKeyPutAttempt      prometheus.Counter
    25  	countKeyRemoved         prometheus.Counter
    26  
    27  	size prometheus.Gauge
    28  
    29  	countKeyEjectionDueToFullCapacity prometheus.Counter
    30  	countKeyEjectionDueToEmergency    prometheus.Counter
    31  }
    32  
    33  type HeroCacheMetricsRegistrationFunc func(uint64) module.HeroCacheMetrics
    34  
    35  func NetworkReceiveCacheMetricsFactory(registrar prometheus.Registerer) *HeroCacheCollector {
    36  	return NewHeroCacheCollector(namespaceNetwork, ResourceNetworkingReceiveCache, registrar)
    37  }
    38  
    39  func PublicNetworkReceiveCacheMetricsFactory(registrar prometheus.Registerer) *HeroCacheCollector {
    40  	return NewHeroCacheCollector(namespaceNetwork, ResourcePublicNetworkingReceiveCache, registrar)
    41  }
    42  
    43  func NetworkDnsTxtCacheMetricsFactory(registrar prometheus.Registerer) *HeroCacheCollector {
    44  	return NewHeroCacheCollector(namespaceNetwork, ResourceNetworkingDnsTxtCache, registrar)
    45  }
    46  
    47  func NetworkDnsIpCacheMetricsFactory(registrar prometheus.Registerer) *HeroCacheCollector {
    48  	return NewHeroCacheCollector(namespaceNetwork, ResourceNetworkingDnsIpCache, registrar)
    49  }
    50  
    51  func ChunkDataPackRequestQueueMetricsFactory(registrar prometheus.Registerer) *HeroCacheCollector {
    52  	return NewHeroCacheCollector(namespaceExecution, ResourceChunkDataPackRequests, registrar)
    53  }
    54  
    55  func ReceiptRequestsQueueMetricFactory(registrar prometheus.Registerer) *HeroCacheCollector {
    56  	return NewHeroCacheCollector(namespaceExecution, ResourceReceipt, registrar)
    57  }
    58  
    59  func CollectionRequestsQueueMetricFactory(registrar prometheus.Registerer) *HeroCacheCollector {
    60  	return NewHeroCacheCollector(namespaceCollection, ResourceCollection, registrar)
    61  }
    62  
    63  func CollectionNodeTransactionsCacheMetrics(registrar prometheus.Registerer, epoch uint64) *HeroCacheCollector {
    64  	return NewHeroCacheCollector(namespaceCollection, fmt.Sprintf("%s_%d", ResourceTransaction, epoch), registrar)
    65  }
    66  
    67  func NewHeroCacheCollector(nameSpace string, cacheName string, registrar prometheus.Registerer) *HeroCacheCollector {
    68  
    69  	histogramNormalizedBucketSlotAvailable := prometheus.NewHistogram(prometheus.HistogramOpts{
    70  		Namespace: nameSpace,
    71  		Subsystem: subsystemHeroCache,
    72  
    73  		// Note that the notion of "bucket" in HeroCache differs from Prometheus.
    74  		// A HeroCache "bucket" is used to group the keys of the entities.
    75  		// A Prometheus "bucket" is used to group collected data points within a range.
    76  		// This metric represents the histogram of normalized available slots in buckets, where
    77  		// a data point set to 1 represents a bucket with all slots available (i.e., a fully empty bucket),
    78  		// and a data point set to 0 means a bucket with no available slots (i.e., a completely full bucket).
    79  		//
    80  		// We generally set total slots of a bucket in HeroCache to 16. Hence:
    81  		// Prometheus bucket 1 represents total number of HeroCache buckets with at most 16 available slots.
    82  		// Prometheus bucket 0.75 represents total number of HeroCache buckets with at most 12 available slots.
    83  		// Prometheus bucket 0.5 represents total number of HeroCache buckets with at most 8 available slots.
    84  		// Prometheus bucket 0.25 represents total number of HeroCache buckets with at most 4 available slots.
    85  		// Prometheus bucket 0.1 represents total number of HeroCache buckets with at most 1 available slots.
    86  		// Prometheus bucket 0 represents total number of HeroCache buckets with no (i.e., zero) available slots.
    87  		Buckets: []float64{0, 0.1, 0.25, 0.5, 0.75, 1},
    88  		Name:    cacheName + "_" + "normalized_bucket_available_slot_count",
    89  		Help:    "normalized histogram of available slots across all buckets",
    90  	})
    91  
    92  	size := prometheus.NewGauge(prometheus.GaugeOpts{
    93  		Namespace: nameSpace,
    94  		Subsystem: subsystemHeroCache,
    95  		Name:      cacheName + "_" + "items_total",
    96  		Help:      "total number of items in the cache",
    97  	})
    98  
    99  	countKeyGetSuccess := prometheus.NewCounter(prometheus.CounterOpts{
   100  		Namespace: nameSpace,
   101  		Subsystem: subsystemHeroCache,
   102  		Name:      cacheName + "_" + "successful_read_count_total",
   103  		Help:      "total number of successful read queries",
   104  	})
   105  
   106  	countKeyGetFailure := prometheus.NewCounter(prometheus.CounterOpts{
   107  		Namespace: nameSpace,
   108  		Subsystem: subsystemHeroCache,
   109  		Name:      cacheName + "_" + "unsuccessful_read_count_total",
   110  		Help:      "total number of unsuccessful read queries",
   111  	})
   112  
   113  	countKeyPutAttempt := prometheus.NewCounter(prometheus.CounterOpts{
   114  		Namespace: nameSpace,
   115  		Subsystem: subsystemHeroCache,
   116  		Name:      cacheName + "_" + "write_attempt_count_total",
   117  		Help:      "total number of put queries",
   118  	})
   119  
   120  	countKeyPutDrop := prometheus.NewCounter(prometheus.CounterOpts{
   121  		Namespace: nameSpace,
   122  		Subsystem: subsystemHeroCache,
   123  		Name:      cacheName + "_" + "write_drop_count_total",
   124  		Help:      "total number of put queries dropped due to full capacity",
   125  	})
   126  
   127  	countKeyPutSuccess := prometheus.NewCounter(prometheus.CounterOpts{
   128  		Namespace: nameSpace,
   129  		Subsystem: subsystemHeroCache,
   130  		Name:      cacheName + "_" + "successful_write_count_total",
   131  		Help:      "total number successful write queries",
   132  	})
   133  
   134  	countKeyPutDeduplicated := prometheus.NewCounter(prometheus.CounterOpts{
   135  		Namespace: nameSpace,
   136  		Subsystem: subsystemHeroCache,
   137  		Name:      cacheName + "_" + "unsuccessful_write_count_total",
   138  		Help:      "total number of queries writing an already existing (duplicate) entity to the cache",
   139  	})
   140  
   141  	countKeyRemoved := prometheus.NewCounter(prometheus.CounterOpts{
   142  		Namespace: nameSpace,
   143  		Subsystem: subsystemHeroCache,
   144  		Name:      cacheName + "_" + "removed_count_total",
   145  		Help:      "total number of entities removed from the cache",
   146  	})
   147  
   148  	countKeyEjectionDueToFullCapacity := prometheus.NewCounter(prometheus.CounterOpts{
   149  		Namespace: nameSpace,
   150  		Subsystem: subsystemHeroCache,
   151  		Name:      cacheName + "_" + "full_capacity_entity_ejection_total",
   152  		Help:      "total number of entities ejected when writing new entities at full capacity",
   153  	})
   154  
   155  	countKeyEjectionDueToEmergency := prometheus.NewCounter(prometheus.CounterOpts{
   156  		Namespace: nameSpace,
   157  		Subsystem: subsystemHeroCache,
   158  		Name:      cacheName + "_" + "emergency_key_ejection_total",
   159  		Help:      "total number of emergency key ejections at bucket level",
   160  	})
   161  
   162  	registrar.MustRegister(
   163  		// available slot distribution
   164  		histogramNormalizedBucketSlotAvailable,
   165  
   166  		// size
   167  		size,
   168  
   169  		// read
   170  		countKeyGetSuccess,
   171  		countKeyGetFailure,
   172  
   173  		// write
   174  		countKeyPutSuccess,
   175  		countKeyPutDeduplicated,
   176  		countKeyPutDrop,
   177  		countKeyPutAttempt,
   178  
   179  		// remove
   180  		countKeyRemoved,
   181  
   182  		// ejection
   183  		countKeyEjectionDueToFullCapacity,
   184  		countKeyEjectionDueToEmergency)
   185  
   186  	return &HeroCacheCollector{
   187  		histogramNormalizedBucketSlotAvailable: histogramNormalizedBucketSlotAvailable,
   188  		size:                                   size,
   189  		countKeyGetSuccess:                     countKeyGetSuccess,
   190  		countKeyGetFailure:                     countKeyGetFailure,
   191  
   192  		countKeyPutAttempt:      countKeyPutAttempt,
   193  		countKeyPutSuccess:      countKeyPutSuccess,
   194  		countKeyPutDeduplicated: countKeyPutDeduplicated,
   195  		countKeyPutDrop:         countKeyPutDrop,
   196  
   197  		countKeyRemoved: countKeyRemoved,
   198  
   199  		countKeyEjectionDueToFullCapacity: countKeyEjectionDueToFullCapacity,
   200  		countKeyEjectionDueToEmergency:    countKeyEjectionDueToEmergency,
   201  	}
   202  }
   203  
   204  // BucketAvailableSlots keeps track of number of available slots in buckets of cache.
   205  func (h *HeroCacheCollector) BucketAvailableSlots(availableSlots uint64, totalSlots uint64) {
   206  	normalizedAvailableSlots := float64(availableSlots) / float64(totalSlots)
   207  	h.histogramNormalizedBucketSlotAvailable.Observe(normalizedAvailableSlots)
   208  }
   209  
   210  // OnKeyPutSuccess is called whenever a new (key, entity) pair is successfully added to the cache.
   211  // size parameter is the current size of the cache post insertion.
   212  func (h *HeroCacheCollector) OnKeyPutSuccess(size uint32) {
   213  	h.countKeyPutSuccess.Inc()
   214  	h.size.Set(float64(size))
   215  }
   216  
   217  // OnKeyPutDeduplicated is tracking the total number of unsuccessful writes caused by adding a duplicate key to the cache.
   218  // A duplicate key is dropped by the cache when it is written to the cache.
   219  // Note: in context of HeroCache, the key corresponds to the identifier of its entity. Hence, a duplicate key corresponds to
   220  // a duplicate entity.
   221  func (h *HeroCacheCollector) OnKeyPutDeduplicated() {
   222  	h.countKeyPutDeduplicated.Inc()
   223  }
   224  
   225  // OnKeyGetSuccess tracks total number of successful read queries.
   226  // A read query is successful if the entity corresponding to its key is available in the cache.
   227  // Note: in context of HeroCache, the key corresponds to the identifier of its entity.
   228  func (h *HeroCacheCollector) OnKeyGetSuccess() {
   229  	h.countKeyGetSuccess.Inc()
   230  }
   231  
   232  // OnKeyGetFailure tracks total number of unsuccessful read queries.
   233  // A read query is unsuccessful if the entity corresponding to its key is not available in the cache.
   234  // Note: in context of HeroCache, the key corresponds to the identifier of its entity.
   235  func (h *HeroCacheCollector) OnKeyGetFailure() {
   236  	h.countKeyGetFailure.Inc()
   237  }
   238  
   239  // OnKeyPutAttempt is called whenever a new (key, value) pair is attempted to be put in cache.
   240  // It does not reflect whether the put was successful or not.
   241  // A (key, value) pair put attempt may fail if the cache is full, or the key already exists.
   242  // size parameter is the current size of the cache prior to the put attempt.
   243  func (h *HeroCacheCollector) OnKeyPutAttempt(size uint32) {
   244  	h.countKeyPutAttempt.Inc()
   245  	h.size.Set(float64(size))
   246  }
   247  
   248  // OnKeyPutDrop is called whenever a new (key, entity) pair is dropped from the cache due to full cache.
   249  func (h *HeroCacheCollector) OnKeyPutDrop() {
   250  	h.countKeyPutDrop.Inc()
   251  }
   252  
   253  // OnKeyRemoved is called whenever a (key, entity) pair is removed from the cache.
   254  // size parameter is the current size of the cache.
   255  func (h *HeroCacheCollector) OnKeyRemoved(size uint32) {
   256  	h.countKeyRemoved.Inc()
   257  	h.size.Set(float64(size))
   258  }
   259  
   260  // OnEntityEjectionDueToFullCapacity is called whenever adding a new (key, entity) to the cache results in ejection of another (key', entity') pair.
   261  // This normally happens -- and is expected -- when the cache is full.
   262  // Note: in context of HeroCache, the key corresponds to the identifier of its entity.
   263  func (h *HeroCacheCollector) OnEntityEjectionDueToFullCapacity() {
   264  	h.countKeyEjectionDueToFullCapacity.Inc()
   265  }
   266  
   267  // OnEntityEjectionDueToEmergency is called whenever a bucket is found full and all of its keys are valid, i.e.,
   268  // each key belongs to an existing (key, entity) pair.
   269  // Hence, adding a new key to that bucket will replace the oldest valid key inside that bucket.
   270  // Note: in context of HeroCache, the key corresponds to the identifier of its entity.
   271  func (h *HeroCacheCollector) OnEntityEjectionDueToEmergency() {
   272  	h.countKeyEjectionDueToEmergency.Inc()
   273  }