github.com/celestiaorg/celestia-node@v0.15.0-beta.1/share/eds/metrics.go (about) 1 package eds 2 3 import ( 4 "context" 5 "time" 6 7 "go.opentelemetry.io/otel" 8 "go.opentelemetry.io/otel/attribute" 9 "go.opentelemetry.io/otel/metric" 10 11 "github.com/celestiaorg/celestia-node/libs/utils" 12 ) 13 14 const ( 15 failedKey = "failed" 16 sizeKey = "eds_size" 17 18 putResultKey = "result" 19 putOK putResult = "ok" 20 putExists putResult = "exists" 21 putFailed putResult = "failed" 22 23 opNameKey = "op" 24 longOpResultKey = "result" 25 longOpUnresolved longOpResult = "unresolved" 26 longOpOK longOpResult = "ok" 27 longOpFailed longOpResult = "failed" 28 29 dagstoreShardStatusKey = "shard_status" 30 ) 31 32 var meter = otel.Meter("eds_store") 33 34 type putResult string 35 36 type longOpResult string 37 38 type metrics struct { 39 putTime metric.Float64Histogram 40 getCARTime metric.Float64Histogram 41 getCARBlockstoreTime metric.Float64Histogram 42 getDAHTime metric.Float64Histogram 43 removeTime metric.Float64Histogram 44 getTime metric.Float64Histogram 45 hasTime metric.Float64Histogram 46 listTime metric.Float64Histogram 47 48 shardFailureCount metric.Int64Counter 49 50 longOpTime metric.Float64Histogram 51 gcTime metric.Float64Histogram 52 } 53 54 func (s *Store) WithMetrics() error { 55 putTime, err := meter.Float64Histogram("eds_store_put_time_histogram", 56 metric.WithDescription("eds store put time histogram(s)")) 57 if err != nil { 58 return err 59 } 60 61 getCARTime, err := meter.Float64Histogram("eds_store_get_car_time_histogram", 62 metric.WithDescription("eds store get car time histogram(s)")) 63 if err != nil { 64 return err 65 } 66 67 getCARBlockstoreTime, err := meter.Float64Histogram("eds_store_get_car_blockstore_time_histogram", 68 metric.WithDescription("eds store get car blockstore time histogram(s)")) 69 if err != nil { 70 return err 71 } 72 73 getDAHTime, err := meter.Float64Histogram("eds_store_get_dah_time_histogram", 74 metric.WithDescription("eds store get dah time histogram(s)")) 75 if err != nil { 76 return err 77 } 78 79 removeTime, err := meter.Float64Histogram("eds_store_remove_time_histogram", 80 metric.WithDescription("eds store remove time histogram(s)")) 81 if err != nil { 82 return err 83 } 84 85 getTime, err := meter.Float64Histogram("eds_store_get_time_histogram", 86 metric.WithDescription("eds store get time histogram(s)")) 87 if err != nil { 88 return err 89 } 90 91 hasTime, err := meter.Float64Histogram("eds_store_has_time_histogram", 92 metric.WithDescription("eds store has time histogram(s)")) 93 if err != nil { 94 return err 95 } 96 97 listTime, err := meter.Float64Histogram("eds_store_list_time_histogram", 98 metric.WithDescription("eds store list time histogram(s)")) 99 if err != nil { 100 return err 101 } 102 103 shardFailureCount, err := meter.Int64Counter("eds_store_shard_failure_counter", 104 metric.WithDescription("eds store OpShardFail counter")) 105 if err != nil { 106 return err 107 } 108 109 longOpTime, err := meter.Float64Histogram("eds_store_long_operation_time_histogram", 110 metric.WithDescription("eds store long operation time histogram(s)")) 111 if err != nil { 112 return err 113 } 114 115 gcTime, err := meter.Float64Histogram("eds_store_gc_time", 116 metric.WithDescription("dagstore gc time histogram(s)")) 117 if err != nil { 118 return err 119 } 120 121 dagStoreShards, err := meter.Int64ObservableGauge("eds_store_dagstore_shards", 122 metric.WithDescription("dagstore amount of shards by status")) 123 if err != nil { 124 return err 125 } 126 127 if err = s.cache.Load().EnableMetrics(); err != nil { 128 return err 129 } 130 131 callback := func(ctx context.Context, observer metric.Observer) error { 132 stats := s.dgstr.Stats() 133 for status, amount := range stats { 134 observer.ObserveInt64(dagStoreShards, int64(amount), 135 metric.WithAttributes( 136 attribute.String(dagstoreShardStatusKey, status.String()), 137 )) 138 } 139 return nil 140 } 141 142 if _, err := meter.RegisterCallback(callback, dagStoreShards); err != nil { 143 return err 144 } 145 146 s.metrics = &metrics{ 147 putTime: putTime, 148 getCARTime: getCARTime, 149 getCARBlockstoreTime: getCARBlockstoreTime, 150 getDAHTime: getDAHTime, 151 removeTime: removeTime, 152 getTime: getTime, 153 hasTime: hasTime, 154 listTime: listTime, 155 shardFailureCount: shardFailureCount, 156 longOpTime: longOpTime, 157 gcTime: gcTime, 158 } 159 return nil 160 } 161 162 func (m *metrics) observeGCtime(ctx context.Context, dur time.Duration, failed bool) { 163 if m == nil { 164 return 165 } 166 ctx = utils.ResetContextOnError(ctx) 167 m.gcTime.Record(ctx, dur.Seconds(), metric.WithAttributes( 168 attribute.Bool(failedKey, failed))) 169 } 170 171 func (m *metrics) observeShardFailure(ctx context.Context, shardKey string) { 172 if m == nil { 173 return 174 } 175 ctx = utils.ResetContextOnError(ctx) 176 177 m.shardFailureCount.Add(ctx, 1, metric.WithAttributes(attribute.String("shard_key", shardKey))) 178 } 179 180 func (m *metrics) observePut(ctx context.Context, dur time.Duration, result putResult, size uint) { 181 if m == nil { 182 return 183 } 184 ctx = utils.ResetContextOnError(ctx) 185 186 m.putTime.Record(ctx, dur.Seconds(), metric.WithAttributes( 187 attribute.String(putResultKey, string(result)), 188 attribute.Int(sizeKey, int(size)))) 189 } 190 191 func (m *metrics) observeLongOp(ctx context.Context, opName string, dur time.Duration, result longOpResult) { 192 if m == nil { 193 return 194 } 195 ctx = utils.ResetContextOnError(ctx) 196 197 m.longOpTime.Record(ctx, dur.Seconds(), metric.WithAttributes( 198 attribute.String(opNameKey, opName), 199 attribute.String(longOpResultKey, string(result)))) 200 } 201 202 func (m *metrics) observeGetCAR(ctx context.Context, dur time.Duration, failed bool) { 203 if m == nil { 204 return 205 } 206 ctx = utils.ResetContextOnError(ctx) 207 208 m.getCARTime.Record(ctx, dur.Seconds(), metric.WithAttributes( 209 attribute.Bool(failedKey, failed))) 210 } 211 212 func (m *metrics) observeCARBlockstore(ctx context.Context, dur time.Duration, failed bool) { 213 if m == nil { 214 return 215 } 216 ctx = utils.ResetContextOnError(ctx) 217 218 m.getCARBlockstoreTime.Record(ctx, dur.Seconds(), metric.WithAttributes( 219 attribute.Bool(failedKey, failed))) 220 } 221 222 func (m *metrics) observeGetDAH(ctx context.Context, dur time.Duration, failed bool) { 223 if m == nil { 224 return 225 } 226 ctx = utils.ResetContextOnError(ctx) 227 228 m.getDAHTime.Record(ctx, dur.Seconds(), metric.WithAttributes( 229 attribute.Bool(failedKey, failed))) 230 } 231 232 func (m *metrics) observeRemove(ctx context.Context, dur time.Duration, failed bool) { 233 if m == nil { 234 return 235 } 236 ctx = utils.ResetContextOnError(ctx) 237 238 m.removeTime.Record(ctx, dur.Seconds(), metric.WithAttributes( 239 attribute.Bool(failedKey, failed))) 240 } 241 242 func (m *metrics) observeGet(ctx context.Context, dur time.Duration, failed bool) { 243 if m == nil { 244 return 245 } 246 ctx = utils.ResetContextOnError(ctx) 247 248 m.getTime.Record(ctx, dur.Seconds(), metric.WithAttributes( 249 attribute.Bool(failedKey, failed))) 250 } 251 252 func (m *metrics) observeHas(ctx context.Context, dur time.Duration, failed bool) { 253 if m == nil { 254 return 255 } 256 ctx = utils.ResetContextOnError(ctx) 257 258 m.hasTime.Record(ctx, dur.Seconds(), metric.WithAttributes( 259 attribute.Bool(failedKey, failed))) 260 } 261 262 func (m *metrics) observeList(ctx context.Context, dur time.Duration, failed bool) { 263 if m == nil { 264 return 265 } 266 ctx = utils.ResetContextOnError(ctx) 267 268 m.listTime.Record(ctx, dur.Seconds(), metric.WithAttributes( 269 attribute.Bool(failedKey, failed))) 270 }