github.com/celestiaorg/celestia-node@v0.15.0-beta.1/das/metrics.go (about) 1 package das 2 3 import ( 4 "context" 5 "fmt" 6 "sync/atomic" 7 "time" 8 9 "go.opentelemetry.io/otel" 10 "go.opentelemetry.io/otel/attribute" 11 "go.opentelemetry.io/otel/metric" 12 13 "github.com/celestiaorg/celestia-node/header" 14 "github.com/celestiaorg/celestia-node/libs/utils" 15 ) 16 17 const ( 18 jobTypeLabel = "job_type" 19 headerWidthLabel = "header_width" 20 failedLabel = "failed" 21 ) 22 23 var meter = otel.Meter("das") 24 25 type metrics struct { 26 sampled metric.Int64Counter 27 sampleTime metric.Float64Histogram 28 getHeaderTime metric.Float64Histogram 29 newHead metric.Int64Counter 30 31 lastSampledTS uint64 32 } 33 34 func (d *DASer) InitMetrics() error { 35 sampled, err := meter.Int64Counter("das_sampled_headers_counter", 36 metric.WithDescription("sampled headers counter")) 37 if err != nil { 38 return err 39 } 40 41 sampleTime, err := meter.Float64Histogram("das_sample_time_hist", 42 metric.WithDescription("duration of sampling a single header")) 43 if err != nil { 44 return err 45 } 46 47 getHeaderTime, err := meter.Float64Histogram("das_get_header_time_hist", 48 metric.WithDescription("duration of getting header from header store")) 49 if err != nil { 50 return err 51 } 52 53 newHead, err := meter.Int64Counter("das_head_updated_counter", 54 metric.WithDescription("amount of times DAS'er advanced network head")) 55 if err != nil { 56 return err 57 } 58 59 lastSampledTS, err := meter.Int64ObservableGauge("das_latest_sampled_ts", 60 metric.WithDescription("latest sampled timestamp")) 61 if err != nil { 62 return err 63 } 64 65 busyWorkers, err := meter.Int64ObservableGauge("das_busy_workers_amount", 66 metric.WithDescription("number of active parallel workers in DAS'er")) 67 if err != nil { 68 return err 69 } 70 71 networkHead, err := meter.Int64ObservableGauge("das_network_head", 72 metric.WithDescription("most recent network head")) 73 if err != nil { 74 return err 75 } 76 77 sampledChainHead, err := meter.Int64ObservableGauge("das_sampled_chain_head", 78 metric.WithDescription("height of the sampled chain - all previous headers have been successfully sampled")) 79 if err != nil { 80 return err 81 } 82 83 totalSampled, err := meter.Int64ObservableGauge("das_total_sampled_headers", 84 metric.WithDescription("total sampled headers gauge"), 85 ) 86 if err != nil { 87 return err 88 } 89 90 d.sampler.metrics = &metrics{ 91 sampled: sampled, 92 sampleTime: sampleTime, 93 getHeaderTime: getHeaderTime, 94 newHead: newHead, 95 } 96 97 callback := func(ctx context.Context, observer metric.Observer) error { 98 stats, err := d.sampler.stats(ctx) 99 if err != nil { 100 log.Errorf("observing stats: %s", err.Error()) 101 return err 102 } 103 104 for jobType, amount := range stats.workersByJobType() { 105 observer.ObserveInt64(busyWorkers, amount, 106 metric.WithAttributes( 107 attribute.String(jobTypeLabel, string(jobType)), 108 )) 109 } 110 111 observer.ObserveInt64(networkHead, int64(stats.NetworkHead)) 112 observer.ObserveInt64(sampledChainHead, int64(stats.SampledChainHead)) 113 114 if ts := atomic.LoadUint64(&d.sampler.metrics.lastSampledTS); ts != 0 { 115 observer.ObserveInt64(lastSampledTS, int64(ts)) 116 } 117 118 observer.ObserveInt64(totalSampled, int64(stats.totalSampled())) 119 return nil 120 } 121 122 _, err = meter.RegisterCallback(callback, 123 lastSampledTS, 124 busyWorkers, 125 networkHead, 126 sampledChainHead, 127 totalSampled, 128 ) 129 if err != nil { 130 return fmt.Errorf("registering metrics callback: %w", err) 131 } 132 133 return nil 134 } 135 136 // observeSample records the time it took to sample a header + 137 // the amount of sampled contiguous headers 138 func (m *metrics) observeSample( 139 ctx context.Context, 140 h *header.ExtendedHeader, 141 sampleTime time.Duration, 142 jobType jobType, 143 err error, 144 ) { 145 if m == nil { 146 return 147 } 148 149 ctx = utils.ResetContextOnError(ctx) 150 151 m.sampleTime.Record(ctx, sampleTime.Seconds(), 152 metric.WithAttributes( 153 attribute.Bool(failedLabel, err != nil), 154 attribute.Int(headerWidthLabel, len(h.DAH.RowRoots)), 155 attribute.String(jobTypeLabel, string(jobType)), 156 )) 157 158 m.sampled.Add(ctx, 1, 159 metric.WithAttributes( 160 attribute.Bool(failedLabel, err != nil), 161 attribute.Int(headerWidthLabel, len(h.DAH.RowRoots)), 162 attribute.String(jobTypeLabel, string(jobType)), 163 )) 164 165 atomic.StoreUint64(&m.lastSampledTS, uint64(time.Now().UTC().Unix())) 166 } 167 168 // observeGetHeader records the time it took to get a header from the header store. 169 func (m *metrics) observeGetHeader(ctx context.Context, d time.Duration) { 170 if m == nil { 171 return 172 } 173 ctx = utils.ResetContextOnError(ctx) 174 m.getHeaderTime.Record(ctx, d.Seconds()) 175 } 176 177 // observeNewHead records the network head. 178 func (m *metrics) observeNewHead(ctx context.Context) { 179 if m == nil { 180 return 181 } 182 ctx = utils.ResetContextOnError(ctx) 183 m.newHead.Add(ctx, 1) 184 }