
     1  package metrics
     3  import (
     4  	"fmt"
     6  	""
     8  	""
     9  )
    11  const subsystemHeroCache = "hero_cache"
    13  var _ module.HeroCacheMetrics = (*HeroCacheCollector)(nil)
    15  type HeroCacheCollector struct {
    16  	histogramNormalizedBucketSlotAvailable prometheus.Histogram
    18  	countKeyGetSuccess prometheus.Counter
    19  	countKeyGetFailure prometheus.Counter
    21  	countKeyPutSuccess      prometheus.Counter
    22  	countKeyPutDrop         prometheus.Counter
    23  	countKeyPutDeduplicated prometheus.Counter
    24  	countKeyPutAttempt      prometheus.Counter
    25  	countKeyRemoved         prometheus.Counter
    27  	size prometheus.Gauge
    29  	countKeyEjectionDueToFullCapacity prometheus.Counter
    30  	countKeyEjectionDueToEmergency    prometheus.Counter
    31  }
    33  type HeroCacheMetricsRegistrationFunc func(uint64) module.HeroCacheMetrics
    35  func NetworkReceiveCacheMetricsFactory(registrar prometheus.Registerer) *HeroCacheCollector {
    36  	return NewHeroCacheCollector(namespaceNetwork, ResourceNetworkingReceiveCache, registrar)
    37  }
    39  func PublicNetworkReceiveCacheMetricsFactory(registrar prometheus.Registerer) *HeroCacheCollector {
    40  	return NewHeroCacheCollector(namespaceNetwork, ResourcePublicNetworkingReceiveCache, registrar)
    41  }
    43  func NetworkDnsTxtCacheMetricsFactory(registrar prometheus.Registerer) *HeroCacheCollector {
    44  	return NewHeroCacheCollector(namespaceNetwork, ResourceNetworkingDnsTxtCache, registrar)
    45  }
    47  func NetworkDnsIpCacheMetricsFactory(registrar prometheus.Registerer) *HeroCacheCollector {
    48  	return NewHeroCacheCollector(namespaceNetwork, ResourceNetworkingDnsIpCache, registrar)
    49  }
    51  func ChunkDataPackRequestQueueMetricsFactory(registrar prometheus.Registerer) *HeroCacheCollector {
    52  	return NewHeroCacheCollector(namespaceExecution, ResourceChunkDataPackRequests, registrar)
    53  }
    55  func ReceiptRequestsQueueMetricFactory(registrar prometheus.Registerer) *HeroCacheCollector {
    56  	return NewHeroCacheCollector(namespaceExecution, ResourceReceipt, registrar)
    57  }
    59  func CollectionRequestsQueueMetricFactory(registrar prometheus.Registerer) *HeroCacheCollector {
    60  	return NewHeroCacheCollector(namespaceCollection, ResourceCollection, registrar)
    61  }
    63  func CollectionNodeTransactionsCacheMetrics(registrar prometheus.Registerer, epoch uint64) *HeroCacheCollector {
    64  	return NewHeroCacheCollector(namespaceCollection, fmt.Sprintf("%s_%d", ResourceTransaction, epoch), registrar)
    65  }
    67  func NewHeroCacheCollector(nameSpace string, cacheName string, registrar prometheus.Registerer) *HeroCacheCollector {
    69  	histogramNormalizedBucketSlotAvailable := prometheus.NewHistogram(prometheus.HistogramOpts{
    70  		Namespace: nameSpace,
    71  		Subsystem: subsystemHeroCache,
    73  		// Note that the notion of "bucket" in HeroCache differs from Prometheus.
    74  		// A HeroCache "bucket" is used to group the keys of the entities.
    75  		// A Prometheus "bucket" is used to group collected data points within a range.
    76  		// This metric represents the histogram of normalized available slots in buckets, where
    77  		// a data point set to 1 represents a bucket with all slots available (i.e., a fully empty bucket),
    78  		// and a data point set to 0 means a bucket with no available slots (i.e., a completely full bucket).
    79  		//
    80  		// We generally set total slots of a bucket in HeroCache to 16. Hence:
    81  		// Prometheus bucket 1 represents total number of HeroCache buckets with at most 16 available slots.
    82  		// Prometheus bucket 0.75 represents total number of HeroCache buckets with at most 12 available slots.
    83  		// Prometheus bucket 0.5 represents total number of HeroCache buckets with at most 8 available slots.
    84  		// Prometheus bucket 0.25 represents total number of HeroCache buckets with at most 4 available slots.
    85  		// Prometheus bucket 0.1 represents total number of HeroCache buckets with at most 1 available slots.
    86  		// Prometheus bucket 0 represents total number of HeroCache buckets with no (i.e., zero) available slots.
    87  		Buckets: []float64{0, 0.1, 0.25, 0.5, 0.75, 1},
    88  		Name:    cacheName + "_" + "normalized_bucket_available_slot_count",
    89  		Help:    "normalized histogram of available slots across all buckets",
    90  	})
    92  	size := prometheus.NewGauge(prometheus.GaugeOpts{
    93  		Namespace: nameSpace,
    94  		Subsystem: subsystemHeroCache,
    95  		Name:      cacheName + "_" + "items_total",
    96  		Help:      "total number of items in the cache",
    97  	})
    99  	countKeyGetSuccess := prometheus.NewCounter(prometheus.CounterOpts{
   100  		Namespace: nameSpace,
   101  		Subsystem: subsystemHeroCache,
   102  		Name:      cacheName + "_" + "successful_read_count_total",
   103  		Help:      "total number of successful read queries",
   104  	})
   106  	countKeyGetFailure := prometheus.NewCounter(prometheus.CounterOpts{
   107  		Namespace: nameSpace,
   108  		Subsystem: subsystemHeroCache,
   109  		Name:      cacheName + "_" + "unsuccessful_read_count_total",
   110  		Help:      "total number of unsuccessful read queries",
   111  	})
   113  	countKeyPutAttempt := prometheus.NewCounter(prometheus.CounterOpts{
   114  		Namespace: nameSpace,
   115  		Subsystem: subsystemHeroCache,
   116  		Name:      cacheName + "_" + "write_attempt_count_total",
   117  		Help:      "total number of put queries",
   118  	})
   120  	countKeyPutDrop := prometheus.NewCounter(prometheus.CounterOpts{
   121  		Namespace: nameSpace,
   122  		Subsystem: subsystemHeroCache,
   123  		Name:      cacheName + "_" + "write_drop_count_total",
   124  		Help:      "total number of put queries dropped due to full capacity",
   125  	})
   127  	countKeyPutSuccess := prometheus.NewCounter(prometheus.CounterOpts{
   128  		Namespace: nameSpace,
   129  		Subsystem: subsystemHeroCache,
   130  		Name:      cacheName + "_" + "successful_write_count_total",
   131  		Help:      "total number successful write queries",
   132  	})
   134  	countKeyPutDeduplicated := prometheus.NewCounter(prometheus.CounterOpts{
   135  		Namespace: nameSpace,
   136  		Subsystem: subsystemHeroCache,
   137  		Name:      cacheName + "_" + "unsuccessful_write_count_total",
   138  		Help:      "total number of queries writing an already existing (duplicate) entity to the cache",
   139  	})
   141  	countKeyRemoved := prometheus.NewCounter(prometheus.CounterOpts{
   142  		Namespace: nameSpace,
   143  		Subsystem: subsystemHeroCache,
   144  		Name:      cacheName + "_" + "removed_count_total",
   145  		Help:      "total number of entities removed from the cache",
   146  	})
   148  	countKeyEjectionDueToFullCapacity := prometheus.NewCounter(prometheus.CounterOpts{
   149  		Namespace: nameSpace,
   150  		Subsystem: subsystemHeroCache,
   151  		Name:      cacheName + "_" + "full_capacity_entity_ejection_total",
   152  		Help:      "total number of entities ejected when writing new entities at full capacity",
   153  	})
   155  	countKeyEjectionDueToEmergency := prometheus.NewCounter(prometheus.CounterOpts{
   156  		Namespace: nameSpace,
   157  		Subsystem: subsystemHeroCache,
   158  		Name:      cacheName + "_" + "emergency_key_ejection_total",
   159  		Help:      "total number of emergency key ejections at bucket level",
   160  	})
   162  	registrar.MustRegister(
   163  		// available slot distribution
   164  		histogramNormalizedBucketSlotAvailable,
   166  		// size
   167  		size,
   169  		// read
   170  		countKeyGetSuccess,
   171  		countKeyGetFailure,
   173  		// write
   174  		countKeyPutSuccess,
   175  		countKeyPutDeduplicated,
   176  		countKeyPutDrop,
   177  		countKeyPutAttempt,
   179  		// remove
   180  		countKeyRemoved,
   182  		// ejection
   183  		countKeyEjectionDueToFullCapacity,
   184  		countKeyEjectionDueToEmergency)
   186  	return &HeroCacheCollector{
   187  		histogramNormalizedBucketSlotAvailable: histogramNormalizedBucketSlotAvailable,
   188  		size:                                   size,
   189  		countKeyGetSuccess:                     countKeyGetSuccess,
   190  		countKeyGetFailure:                     countKeyGetFailure,
   192  		countKeyPutAttempt:      countKeyPutAttempt,
   193  		countKeyPutSuccess:      countKeyPutSuccess,
   194  		countKeyPutDeduplicated: countKeyPutDeduplicated,
   195  		countKeyPutDrop:         countKeyPutDrop,
   197  		countKeyRemoved: countKeyRemoved,
   199  		countKeyEjectionDueToFullCapacity: countKeyEjectionDueToFullCapacity,
   200  		countKeyEjectionDueToEmergency:    countKeyEjectionDueToEmergency,
   201  	}
   202  }
   204  // BucketAvailableSlots keeps track of number of available slots in buckets of cache.
   205  func (h *HeroCacheCollector) BucketAvailableSlots(availableSlots uint64, totalSlots uint64) {
   206  	normalizedAvailableSlots := float64(availableSlots) / float64(totalSlots)
   207  	h.histogramNormalizedBucketSlotAvailable.Observe(normalizedAvailableSlots)
   208  }
   210  // OnKeyPutSuccess is called whenever a new (key, entity) pair is successfully added to the cache.
   211  // size parameter is the current size of the cache post insertion.
   212  func (h *HeroCacheCollector) OnKeyPutSuccess(size uint32) {
   213  	h.countKeyPutSuccess.Inc()
   214  	h.size.Set(float64(size))
   215  }
   217  // OnKeyPutDeduplicated is tracking the total number of unsuccessful writes caused by adding a duplicate key to the cache.
   218  // A duplicate key is dropped by the cache when it is written to the cache.
   219  // Note: in context of HeroCache, the key corresponds to the identifier of its entity. Hence, a duplicate key corresponds to
   220  // a duplicate entity.
   221  func (h *HeroCacheCollector) OnKeyPutDeduplicated() {
   222  	h.countKeyPutDeduplicated.Inc()
   223  }
   225  // OnKeyGetSuccess tracks total number of successful read queries.
   226  // A read query is successful if the entity corresponding to its key is available in the cache.
   227  // Note: in context of HeroCache, the key corresponds to the identifier of its entity.
   228  func (h *HeroCacheCollector) OnKeyGetSuccess() {
   229  	h.countKeyGetSuccess.Inc()
   230  }
   232  // OnKeyGetFailure tracks total number of unsuccessful read queries.
   233  // A read query is unsuccessful if the entity corresponding to its key is not available in the cache.
   234  // Note: in context of HeroCache, the key corresponds to the identifier of its entity.
   235  func (h *HeroCacheCollector) OnKeyGetFailure() {
   236  	h.countKeyGetFailure.Inc()
   237  }
   239  // OnKeyPutAttempt is called whenever a new (key, value) pair is attempted to be put in cache.
   240  // It does not reflect whether the put was successful or not.
   241  // A (key, value) pair put attempt may fail if the cache is full, or the key already exists.
   242  // size parameter is the current size of the cache prior to the put attempt.
   243  func (h *HeroCacheCollector) OnKeyPutAttempt(size uint32) {
   244  	h.countKeyPutAttempt.Inc()
   245  	h.size.Set(float64(size))
   246  }
   248  // OnKeyPutDrop is called whenever a new (key, entity) pair is dropped from the cache due to full cache.
   249  func (h *HeroCacheCollector) OnKeyPutDrop() {
   250  	h.countKeyPutDrop.Inc()
   251  }
   253  // OnKeyRemoved is called whenever a (key, entity) pair is removed from the cache.
   254  // size parameter is the current size of the cache.
   255  func (h *HeroCacheCollector) OnKeyRemoved(size uint32) {
   256  	h.countKeyRemoved.Inc()
   257  	h.size.Set(float64(size))
   258  }
   260  // OnEntityEjectionDueToFullCapacity is called whenever adding a new (key, entity) to the cache results in ejection of another (key', entity') pair.
   261  // This normally happens -- and is expected -- when the cache is full.
   262  // Note: in context of HeroCache, the key corresponds to the identifier of its entity.
   263  func (h *HeroCacheCollector) OnEntityEjectionDueToFullCapacity() {
   264  	h.countKeyEjectionDueToFullCapacity.Inc()
   265  }
   267  // OnEntityEjectionDueToEmergency is called whenever a bucket is found full and all of its keys are valid, i.e.,
   268  // each key belongs to an existing (key, entity) pair.
   269  // Hence, adding a new key to that bucket will replace the oldest valid key inside that bucket.
   270  // Note: in context of HeroCache, the key corresponds to the identifier of its entity.
   271  func (h *HeroCacheCollector) OnEntityEjectionDueToEmergency() {
   272  	h.countKeyEjectionDueToEmergency.Inc()
   273  }