github.com/celestiaorg/celestia-node@v0.15.0-beta.1/das/metrics.go (about)

     1  package das
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync/atomic"
     7  	"time"
     8  
     9  	"go.opentelemetry.io/otel"
    10  	"go.opentelemetry.io/otel/attribute"
    11  	"go.opentelemetry.io/otel/metric"
    12  
    13  	"github.com/celestiaorg/celestia-node/header"
    14  	"github.com/celestiaorg/celestia-node/libs/utils"
    15  )
    16  
    17  const (
    18  	jobTypeLabel     = "job_type"
    19  	headerWidthLabel = "header_width"
    20  	failedLabel      = "failed"
    21  )
    22  
    23  var meter = otel.Meter("das")
    24  
    25  type metrics struct {
    26  	sampled       metric.Int64Counter
    27  	sampleTime    metric.Float64Histogram
    28  	getHeaderTime metric.Float64Histogram
    29  	newHead       metric.Int64Counter
    30  
    31  	lastSampledTS uint64
    32  }
    33  
    34  func (d *DASer) InitMetrics() error {
    35  	sampled, err := meter.Int64Counter("das_sampled_headers_counter",
    36  		metric.WithDescription("sampled headers counter"))
    37  	if err != nil {
    38  		return err
    39  	}
    40  
    41  	sampleTime, err := meter.Float64Histogram("das_sample_time_hist",
    42  		metric.WithDescription("duration of sampling a single header"))
    43  	if err != nil {
    44  		return err
    45  	}
    46  
    47  	getHeaderTime, err := meter.Float64Histogram("das_get_header_time_hist",
    48  		metric.WithDescription("duration of getting header from header store"))
    49  	if err != nil {
    50  		return err
    51  	}
    52  
    53  	newHead, err := meter.Int64Counter("das_head_updated_counter",
    54  		metric.WithDescription("amount of times DAS'er advanced network head"))
    55  	if err != nil {
    56  		return err
    57  	}
    58  
    59  	lastSampledTS, err := meter.Int64ObservableGauge("das_latest_sampled_ts",
    60  		metric.WithDescription("latest sampled timestamp"))
    61  	if err != nil {
    62  		return err
    63  	}
    64  
    65  	busyWorkers, err := meter.Int64ObservableGauge("das_busy_workers_amount",
    66  		metric.WithDescription("number of active parallel workers in DAS'er"))
    67  	if err != nil {
    68  		return err
    69  	}
    70  
    71  	networkHead, err := meter.Int64ObservableGauge("das_network_head",
    72  		metric.WithDescription("most recent network head"))
    73  	if err != nil {
    74  		return err
    75  	}
    76  
    77  	sampledChainHead, err := meter.Int64ObservableGauge("das_sampled_chain_head",
    78  		metric.WithDescription("height of the sampled chain - all previous headers have been successfully sampled"))
    79  	if err != nil {
    80  		return err
    81  	}
    82  
    83  	totalSampled, err := meter.Int64ObservableGauge("das_total_sampled_headers",
    84  		metric.WithDescription("total sampled headers gauge"),
    85  	)
    86  	if err != nil {
    87  		return err
    88  	}
    89  
    90  	d.sampler.metrics = &metrics{
    91  		sampled:       sampled,
    92  		sampleTime:    sampleTime,
    93  		getHeaderTime: getHeaderTime,
    94  		newHead:       newHead,
    95  	}
    96  
    97  	callback := func(ctx context.Context, observer metric.Observer) error {
    98  		stats, err := d.sampler.stats(ctx)
    99  		if err != nil {
   100  			log.Errorf("observing stats: %s", err.Error())
   101  			return err
   102  		}
   103  
   104  		for jobType, amount := range stats.workersByJobType() {
   105  			observer.ObserveInt64(busyWorkers, amount,
   106  				metric.WithAttributes(
   107  					attribute.String(jobTypeLabel, string(jobType)),
   108  				))
   109  		}
   110  
   111  		observer.ObserveInt64(networkHead, int64(stats.NetworkHead))
   112  		observer.ObserveInt64(sampledChainHead, int64(stats.SampledChainHead))
   113  
   114  		if ts := atomic.LoadUint64(&d.sampler.metrics.lastSampledTS); ts != 0 {
   115  			observer.ObserveInt64(lastSampledTS, int64(ts))
   116  		}
   117  
   118  		observer.ObserveInt64(totalSampled, int64(stats.totalSampled()))
   119  		return nil
   120  	}
   121  
   122  	_, err = meter.RegisterCallback(callback,
   123  		lastSampledTS,
   124  		busyWorkers,
   125  		networkHead,
   126  		sampledChainHead,
   127  		totalSampled,
   128  	)
   129  	if err != nil {
   130  		return fmt.Errorf("registering metrics callback: %w", err)
   131  	}
   132  
   133  	return nil
   134  }
   135  
   136  // observeSample records the time it took to sample a header +
   137  // the amount of sampled contiguous headers
   138  func (m *metrics) observeSample(
   139  	ctx context.Context,
   140  	h *header.ExtendedHeader,
   141  	sampleTime time.Duration,
   142  	jobType jobType,
   143  	err error,
   144  ) {
   145  	if m == nil {
   146  		return
   147  	}
   148  
   149  	ctx = utils.ResetContextOnError(ctx)
   150  
   151  	m.sampleTime.Record(ctx, sampleTime.Seconds(),
   152  		metric.WithAttributes(
   153  			attribute.Bool(failedLabel, err != nil),
   154  			attribute.Int(headerWidthLabel, len(h.DAH.RowRoots)),
   155  			attribute.String(jobTypeLabel, string(jobType)),
   156  		))
   157  
   158  	m.sampled.Add(ctx, 1,
   159  		metric.WithAttributes(
   160  			attribute.Bool(failedLabel, err != nil),
   161  			attribute.Int(headerWidthLabel, len(h.DAH.RowRoots)),
   162  			attribute.String(jobTypeLabel, string(jobType)),
   163  		))
   164  
   165  	atomic.StoreUint64(&m.lastSampledTS, uint64(time.Now().UTC().Unix()))
   166  }
   167  
   168  // observeGetHeader records the time it took to get a header from the header store.
   169  func (m *metrics) observeGetHeader(ctx context.Context, d time.Duration) {
   170  	if m == nil {
   171  		return
   172  	}
   173  	ctx = utils.ResetContextOnError(ctx)
   174  	m.getHeaderTime.Record(ctx, d.Seconds())
   175  }
   176  
   177  // observeNewHead records the network head.
   178  func (m *metrics) observeNewHead(ctx context.Context) {
   179  	if m == nil {
   180  		return
   181  	}
   182  	ctx = utils.ResetContextOnError(ctx)
   183  	m.newHead.Add(ctx, 1)
   184  }