github.com/celestiaorg/celestia-node@v0.15.0-beta.1/share/eds/metrics.go (about)

     1  package eds
     2  
     3  import (
     4  	"context"
     5  	"time"
     6  
     7  	"go.opentelemetry.io/otel"
     8  	"go.opentelemetry.io/otel/attribute"
     9  	"go.opentelemetry.io/otel/metric"
    10  
    11  	"github.com/celestiaorg/celestia-node/libs/utils"
    12  )
    13  
    14  const (
    15  	failedKey = "failed"
    16  	sizeKey   = "eds_size"
    17  
    18  	putResultKey           = "result"
    19  	putOK        putResult = "ok"
    20  	putExists    putResult = "exists"
    21  	putFailed    putResult = "failed"
    22  
    23  	opNameKey                     = "op"
    24  	longOpResultKey               = "result"
    25  	longOpUnresolved longOpResult = "unresolved"
    26  	longOpOK         longOpResult = "ok"
    27  	longOpFailed     longOpResult = "failed"
    28  
    29  	dagstoreShardStatusKey = "shard_status"
    30  )
    31  
    32  var meter = otel.Meter("eds_store")
    33  
    34  type putResult string
    35  
    36  type longOpResult string
    37  
    38  type metrics struct {
    39  	putTime              metric.Float64Histogram
    40  	getCARTime           metric.Float64Histogram
    41  	getCARBlockstoreTime metric.Float64Histogram
    42  	getDAHTime           metric.Float64Histogram
    43  	removeTime           metric.Float64Histogram
    44  	getTime              metric.Float64Histogram
    45  	hasTime              metric.Float64Histogram
    46  	listTime             metric.Float64Histogram
    47  
    48  	shardFailureCount metric.Int64Counter
    49  
    50  	longOpTime metric.Float64Histogram
    51  	gcTime     metric.Float64Histogram
    52  }
    53  
    54  func (s *Store) WithMetrics() error {
    55  	putTime, err := meter.Float64Histogram("eds_store_put_time_histogram",
    56  		metric.WithDescription("eds store put time histogram(s)"))
    57  	if err != nil {
    58  		return err
    59  	}
    60  
    61  	getCARTime, err := meter.Float64Histogram("eds_store_get_car_time_histogram",
    62  		metric.WithDescription("eds store get car time histogram(s)"))
    63  	if err != nil {
    64  		return err
    65  	}
    66  
    67  	getCARBlockstoreTime, err := meter.Float64Histogram("eds_store_get_car_blockstore_time_histogram",
    68  		metric.WithDescription("eds store get car blockstore time histogram(s)"))
    69  	if err != nil {
    70  		return err
    71  	}
    72  
    73  	getDAHTime, err := meter.Float64Histogram("eds_store_get_dah_time_histogram",
    74  		metric.WithDescription("eds store get dah time histogram(s)"))
    75  	if err != nil {
    76  		return err
    77  	}
    78  
    79  	removeTime, err := meter.Float64Histogram("eds_store_remove_time_histogram",
    80  		metric.WithDescription("eds store remove time histogram(s)"))
    81  	if err != nil {
    82  		return err
    83  	}
    84  
    85  	getTime, err := meter.Float64Histogram("eds_store_get_time_histogram",
    86  		metric.WithDescription("eds store get time histogram(s)"))
    87  	if err != nil {
    88  		return err
    89  	}
    90  
    91  	hasTime, err := meter.Float64Histogram("eds_store_has_time_histogram",
    92  		metric.WithDescription("eds store has time histogram(s)"))
    93  	if err != nil {
    94  		return err
    95  	}
    96  
    97  	listTime, err := meter.Float64Histogram("eds_store_list_time_histogram",
    98  		metric.WithDescription("eds store list time histogram(s)"))
    99  	if err != nil {
   100  		return err
   101  	}
   102  
   103  	shardFailureCount, err := meter.Int64Counter("eds_store_shard_failure_counter",
   104  		metric.WithDescription("eds store OpShardFail counter"))
   105  	if err != nil {
   106  		return err
   107  	}
   108  
   109  	longOpTime, err := meter.Float64Histogram("eds_store_long_operation_time_histogram",
   110  		metric.WithDescription("eds store long operation time histogram(s)"))
   111  	if err != nil {
   112  		return err
   113  	}
   114  
   115  	gcTime, err := meter.Float64Histogram("eds_store_gc_time",
   116  		metric.WithDescription("dagstore gc time histogram(s)"))
   117  	if err != nil {
   118  		return err
   119  	}
   120  
   121  	dagStoreShards, err := meter.Int64ObservableGauge("eds_store_dagstore_shards",
   122  		metric.WithDescription("dagstore amount of shards by status"))
   123  	if err != nil {
   124  		return err
   125  	}
   126  
   127  	if err = s.cache.Load().EnableMetrics(); err != nil {
   128  		return err
   129  	}
   130  
   131  	callback := func(ctx context.Context, observer metric.Observer) error {
   132  		stats := s.dgstr.Stats()
   133  		for status, amount := range stats {
   134  			observer.ObserveInt64(dagStoreShards, int64(amount),
   135  				metric.WithAttributes(
   136  					attribute.String(dagstoreShardStatusKey, status.String()),
   137  				))
   138  		}
   139  		return nil
   140  	}
   141  
   142  	if _, err := meter.RegisterCallback(callback, dagStoreShards); err != nil {
   143  		return err
   144  	}
   145  
   146  	s.metrics = &metrics{
   147  		putTime:              putTime,
   148  		getCARTime:           getCARTime,
   149  		getCARBlockstoreTime: getCARBlockstoreTime,
   150  		getDAHTime:           getDAHTime,
   151  		removeTime:           removeTime,
   152  		getTime:              getTime,
   153  		hasTime:              hasTime,
   154  		listTime:             listTime,
   155  		shardFailureCount:    shardFailureCount,
   156  		longOpTime:           longOpTime,
   157  		gcTime:               gcTime,
   158  	}
   159  	return nil
   160  }
   161  
   162  func (m *metrics) observeGCtime(ctx context.Context, dur time.Duration, failed bool) {
   163  	if m == nil {
   164  		return
   165  	}
   166  	ctx = utils.ResetContextOnError(ctx)
   167  	m.gcTime.Record(ctx, dur.Seconds(), metric.WithAttributes(
   168  		attribute.Bool(failedKey, failed)))
   169  }
   170  
   171  func (m *metrics) observeShardFailure(ctx context.Context, shardKey string) {
   172  	if m == nil {
   173  		return
   174  	}
   175  	ctx = utils.ResetContextOnError(ctx)
   176  
   177  	m.shardFailureCount.Add(ctx, 1, metric.WithAttributes(attribute.String("shard_key", shardKey)))
   178  }
   179  
   180  func (m *metrics) observePut(ctx context.Context, dur time.Duration, result putResult, size uint) {
   181  	if m == nil {
   182  		return
   183  	}
   184  	ctx = utils.ResetContextOnError(ctx)
   185  
   186  	m.putTime.Record(ctx, dur.Seconds(), metric.WithAttributes(
   187  		attribute.String(putResultKey, string(result)),
   188  		attribute.Int(sizeKey, int(size))))
   189  }
   190  
   191  func (m *metrics) observeLongOp(ctx context.Context, opName string, dur time.Duration, result longOpResult) {
   192  	if m == nil {
   193  		return
   194  	}
   195  	ctx = utils.ResetContextOnError(ctx)
   196  
   197  	m.longOpTime.Record(ctx, dur.Seconds(), metric.WithAttributes(
   198  		attribute.String(opNameKey, opName),
   199  		attribute.String(longOpResultKey, string(result))))
   200  }
   201  
   202  func (m *metrics) observeGetCAR(ctx context.Context, dur time.Duration, failed bool) {
   203  	if m == nil {
   204  		return
   205  	}
   206  	ctx = utils.ResetContextOnError(ctx)
   207  
   208  	m.getCARTime.Record(ctx, dur.Seconds(), metric.WithAttributes(
   209  		attribute.Bool(failedKey, failed)))
   210  }
   211  
   212  func (m *metrics) observeCARBlockstore(ctx context.Context, dur time.Duration, failed bool) {
   213  	if m == nil {
   214  		return
   215  	}
   216  	ctx = utils.ResetContextOnError(ctx)
   217  
   218  	m.getCARBlockstoreTime.Record(ctx, dur.Seconds(), metric.WithAttributes(
   219  		attribute.Bool(failedKey, failed)))
   220  }
   221  
   222  func (m *metrics) observeGetDAH(ctx context.Context, dur time.Duration, failed bool) {
   223  	if m == nil {
   224  		return
   225  	}
   226  	ctx = utils.ResetContextOnError(ctx)
   227  
   228  	m.getDAHTime.Record(ctx, dur.Seconds(), metric.WithAttributes(
   229  		attribute.Bool(failedKey, failed)))
   230  }
   231  
   232  func (m *metrics) observeRemove(ctx context.Context, dur time.Duration, failed bool) {
   233  	if m == nil {
   234  		return
   235  	}
   236  	ctx = utils.ResetContextOnError(ctx)
   237  
   238  	m.removeTime.Record(ctx, dur.Seconds(), metric.WithAttributes(
   239  		attribute.Bool(failedKey, failed)))
   240  }
   241  
   242  func (m *metrics) observeGet(ctx context.Context, dur time.Duration, failed bool) {
   243  	if m == nil {
   244  		return
   245  	}
   246  	ctx = utils.ResetContextOnError(ctx)
   247  
   248  	m.getTime.Record(ctx, dur.Seconds(), metric.WithAttributes(
   249  		attribute.Bool(failedKey, failed)))
   250  }
   251  
   252  func (m *metrics) observeHas(ctx context.Context, dur time.Duration, failed bool) {
   253  	if m == nil {
   254  		return
   255  	}
   256  	ctx = utils.ResetContextOnError(ctx)
   257  
   258  	m.hasTime.Record(ctx, dur.Seconds(), metric.WithAttributes(
   259  		attribute.Bool(failedKey, failed)))
   260  }
   261  
   262  func (m *metrics) observeList(ctx context.Context, dur time.Duration, failed bool) {
   263  	if m == nil {
   264  		return
   265  	}
   266  	ctx = utils.ResetContextOnError(ctx)
   267  
   268  	m.listTime.Record(ctx, dur.Seconds(), metric.WithAttributes(
   269  		attribute.Bool(failedKey, failed)))
   270  }