github.com/MetalBlockchain/metalgo@v1.11.9/snow/consensus/snowman/metrics.go (about)

     1  // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
     2  // See the file LICENSE for licensing terms.
     3  
     4  package snowman
     5  
     6  import (
     7  	"time"
     8  
     9  	"github.com/prometheus/client_golang/prometheus"
    10  	"go.uber.org/zap"
    11  
    12  	"github.com/MetalBlockchain/metalgo/ids"
    13  	"github.com/MetalBlockchain/metalgo/snow/choices"
    14  	"github.com/MetalBlockchain/metalgo/utils/linked"
    15  	"github.com/MetalBlockchain/metalgo/utils/logging"
    16  	"github.com/MetalBlockchain/metalgo/utils/metric"
    17  	"github.com/MetalBlockchain/metalgo/utils/wrappers"
    18  )
    19  
    20  type processingStart struct {
    21  	time       time.Time
    22  	pollNumber uint64
    23  }
    24  
    25  type metrics struct {
    26  	log logging.Logger
    27  
    28  	currentMaxVerifiedHeight uint64
    29  	maxVerifiedHeight        prometheus.Gauge
    30  
    31  	lastAcceptedHeight    prometheus.Gauge
    32  	lastAcceptedTimestamp prometheus.Gauge
    33  
    34  	// processingBlocks keeps track of the [processingStart] that each block was
    35  	// issued into the consensus instance. This is used to calculate the amount
    36  	// of time to accept or reject the block.
    37  	processingBlocks *linked.Hashmap[ids.ID, processingStart]
    38  
    39  	// numProcessing keeps track of the number of processing blocks
    40  	numProcessing prometheus.Gauge
    41  
    42  	blockSizeAcceptedSum prometheus.Gauge
    43  	// pollsAccepted tracks the number of polls that a block was in processing
    44  	// for before being accepted
    45  	pollsAccepted metric.Averager
    46  	// latAccepted tracks the number of nanoseconds that a block was processing
    47  	// before being accepted
    48  	latAccepted          metric.Averager
    49  	buildLatencyAccepted prometheus.Gauge
    50  
    51  	blockSizeRejectedSum prometheus.Gauge
    52  	// pollsRejected tracks the number of polls that a block was in processing
    53  	// for before being rejected
    54  	pollsRejected metric.Averager
    55  	// latRejected tracks the number of nanoseconds that a block was processing
    56  	// before being rejected
    57  	latRejected metric.Averager
    58  
    59  	// numFailedPolls keeps track of the number of polls that failed
    60  	numFailedPolls prometheus.Counter
    61  
    62  	// numSuccessfulPolls keeps track of the number of polls that succeeded
    63  	numSuccessfulPolls prometheus.Counter
    64  }
    65  
    66  func newMetrics(
    67  	log logging.Logger,
    68  	reg prometheus.Registerer,
    69  	lastAcceptedHeight uint64,
    70  	lastAcceptedTime time.Time,
    71  ) (*metrics, error) {
    72  	errs := wrappers.Errs{}
    73  	m := &metrics{
    74  		log:                      log,
    75  		currentMaxVerifiedHeight: lastAcceptedHeight,
    76  		maxVerifiedHeight: prometheus.NewGauge(prometheus.GaugeOpts{
    77  			Name: "max_verified_height",
    78  			Help: "highest verified height",
    79  		}),
    80  		lastAcceptedHeight: prometheus.NewGauge(prometheus.GaugeOpts{
    81  			Name: "last_accepted_height",
    82  			Help: "last height accepted",
    83  		}),
    84  		lastAcceptedTimestamp: prometheus.NewGauge(prometheus.GaugeOpts{
    85  			Name: "last_accepted_timestamp",
    86  			Help: "timestamp of the last accepted block in unix seconds",
    87  		}),
    88  
    89  		processingBlocks: linked.NewHashmap[ids.ID, processingStart](),
    90  
    91  		numProcessing: prometheus.NewGauge(prometheus.GaugeOpts{
    92  			Name: "blks_processing",
    93  			Help: "number of currently processing blocks",
    94  		}),
    95  
    96  		blockSizeAcceptedSum: prometheus.NewGauge(prometheus.GaugeOpts{
    97  			Name: "blks_accepted_container_size_sum",
    98  			Help: "cumulative size of all accepted blocks",
    99  		}),
   100  		pollsAccepted: metric.NewAveragerWithErrs(
   101  			"blks_polls_accepted",
   102  			"number of polls from the issuance of a block to its acceptance",
   103  			reg,
   104  			&errs,
   105  		),
   106  		latAccepted: metric.NewAveragerWithErrs(
   107  			"blks_accepted",
   108  			"time (in ns) from the issuance of a block to its acceptance",
   109  			reg,
   110  			&errs,
   111  		),
   112  		buildLatencyAccepted: prometheus.NewGauge(prometheus.GaugeOpts{
   113  			Name: "blks_build_accept_latency",
   114  			Help: "time (in ns) from the timestamp of a block to the time it was accepted",
   115  		}),
   116  
   117  		blockSizeRejectedSum: prometheus.NewGauge(prometheus.GaugeOpts{
   118  			Name: "blks_rejected_container_size_sum",
   119  			Help: "cumulative size of all rejected blocks",
   120  		}),
   121  		pollsRejected: metric.NewAveragerWithErrs(
   122  			"blks_polls_rejected",
   123  			"number of polls from the issuance of a block to its rejection",
   124  			reg,
   125  			&errs,
   126  		),
   127  		latRejected: metric.NewAveragerWithErrs(
   128  			"blks_rejected",
   129  			"time (in ns) from the issuance of a block to its rejection",
   130  			reg,
   131  			&errs,
   132  		),
   133  
   134  		numSuccessfulPolls: prometheus.NewCounter(prometheus.CounterOpts{
   135  			Name: "polls_successful",
   136  			Help: "number of successful polls",
   137  		}),
   138  		numFailedPolls: prometheus.NewCounter(prometheus.CounterOpts{
   139  			Name: "polls_failed",
   140  			Help: "number of failed polls",
   141  		}),
   142  	}
   143  
   144  	// Initially set the metrics for the last accepted block.
   145  	m.maxVerifiedHeight.Set(float64(lastAcceptedHeight))
   146  	m.lastAcceptedHeight.Set(float64(lastAcceptedHeight))
   147  	m.lastAcceptedTimestamp.Set(float64(lastAcceptedTime.Unix()))
   148  
   149  	errs.Add(
   150  		reg.Register(m.maxVerifiedHeight),
   151  		reg.Register(m.lastAcceptedHeight),
   152  		reg.Register(m.lastAcceptedTimestamp),
   153  		reg.Register(m.numProcessing),
   154  		reg.Register(m.blockSizeAcceptedSum),
   155  		reg.Register(m.buildLatencyAccepted),
   156  		reg.Register(m.blockSizeRejectedSum),
   157  		reg.Register(m.numSuccessfulPolls),
   158  		reg.Register(m.numFailedPolls),
   159  	)
   160  	return m, errs.Err
   161  }
   162  
   163  func (m *metrics) Issued(blkID ids.ID, pollNumber uint64) {
   164  	m.processingBlocks.Put(blkID, processingStart{
   165  		time:       time.Now(),
   166  		pollNumber: pollNumber,
   167  	})
   168  	m.numProcessing.Inc()
   169  }
   170  
   171  func (m *metrics) Verified(height uint64) {
   172  	m.currentMaxVerifiedHeight = max(m.currentMaxVerifiedHeight, height)
   173  	m.maxVerifiedHeight.Set(float64(m.currentMaxVerifiedHeight))
   174  }
   175  
   176  func (m *metrics) Accepted(
   177  	blkID ids.ID,
   178  	height uint64,
   179  	timestamp time.Time,
   180  	pollNumber uint64,
   181  	blockSize int,
   182  ) {
   183  	start, ok := m.processingBlocks.Get(blkID)
   184  	if !ok {
   185  		m.log.Error("unable to measure latency",
   186  			zap.Stringer("blkID", blkID),
   187  			zap.Stringer("status", choices.Accepted),
   188  		)
   189  		return
   190  	}
   191  	m.lastAcceptedHeight.Set(float64(height))
   192  	m.lastAcceptedTimestamp.Set(float64(timestamp.Unix()))
   193  	m.processingBlocks.Delete(blkID)
   194  	m.numProcessing.Dec()
   195  
   196  	m.blockSizeAcceptedSum.Add(float64(blockSize))
   197  
   198  	m.pollsAccepted.Observe(float64(pollNumber - start.pollNumber))
   199  
   200  	now := time.Now()
   201  	processingDuration := now.Sub(start.time)
   202  	m.latAccepted.Observe(float64(processingDuration))
   203  
   204  	builtDuration := now.Sub(timestamp)
   205  	m.buildLatencyAccepted.Add(float64(builtDuration))
   206  }
   207  
   208  func (m *metrics) Rejected(blkID ids.ID, pollNumber uint64, blockSize int) {
   209  	start, ok := m.processingBlocks.Get(blkID)
   210  	if !ok {
   211  		m.log.Error("unable to measure latency",
   212  			zap.Stringer("blkID", blkID),
   213  			zap.Stringer("status", choices.Rejected),
   214  		)
   215  		return
   216  	}
   217  	m.processingBlocks.Delete(blkID)
   218  	m.numProcessing.Dec()
   219  
   220  	m.blockSizeRejectedSum.Add(float64(blockSize))
   221  
   222  	m.pollsRejected.Observe(float64(pollNumber - start.pollNumber))
   223  
   224  	duration := time.Since(start.time)
   225  	m.latRejected.Observe(float64(duration))
   226  }
   227  
   228  func (m *metrics) MeasureAndGetOldestDuration() time.Duration {
   229  	_, oldestOp, exists := m.processingBlocks.Oldest()
   230  	if !exists {
   231  		return 0
   232  	}
   233  	return time.Since(oldestOp.time)
   234  }
   235  
   236  func (m *metrics) SuccessfulPoll() {
   237  	m.numSuccessfulPolls.Inc()
   238  }
   239  
   240  func (m *metrics) FailedPoll() {
   241  	m.numFailedPolls.Inc()
   242  }