github.com/kubewharf/katalyst-core@v0.5.3/pkg/metrics/otel_prom_metrics.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package metrics 18 19 import ( 20 "context" 21 "fmt" 22 "net/http" 23 "time" 24 25 "go.opentelemetry.io/otel/attribute" 26 "go.opentelemetry.io/otel/exporters/metric/prometheus" 27 "go.opentelemetry.io/otel/metric" 28 "go.opentelemetry.io/otel/metric/number" 29 export "go.opentelemetry.io/otel/sdk/export/metric" 30 "go.opentelemetry.io/otel/sdk/export/metric/aggregation" 31 controller "go.opentelemetry.io/otel/sdk/metric/controller/basic" 32 controllerTime "go.opentelemetry.io/otel/sdk/metric/controller/time" 33 processor "go.opentelemetry.io/otel/sdk/metric/processor/basic" 34 selector "go.opentelemetry.io/otel/sdk/metric/selector/simple" 35 "go.opentelemetry.io/otel/sdk/resource" 36 "k8s.io/apimachinery/pkg/util/wait" 37 "k8s.io/klog/v2" 38 39 "github.com/kubewharf/katalyst-core/pkg/config/generic" 40 ) 41 42 const ( 43 openTelemetryPrometheusCollectPeriod = time.Second * 3 44 ) 45 46 type PrometheusMetricPathName string 47 48 const ( 49 PrometheusMetricPathNameDefault PrometheusMetricPathName = "/metrics" 50 PrometheusMetricPathNameCustomMetric PrometheusMetricPathName = "/custom_metric" 51 ) 52 53 type prometheusClockTicker struct { 54 ticker *time.Ticker 55 } 56 57 func (t *prometheusClockTicker) Stop() { 58 t.ticker.Stop() 59 } 60 61 func (t *prometheusClockTicker) C() <-chan time.Time { 62 return t.ticker.C 63 } 64 65 type prometheusClock struct { 66 last time.Time 67 t *prometheusClockTicker 68 } 69 70 func (c *prometheusClock) Now() time.Time { 71 c.last = time.Now() 72 return c.last 73 } 74 75 func (c *prometheusClock) Ticker(period time.Duration) controllerTime.Ticker { 76 c.t = &prometheusClockTicker{ticker: time.NewTicker(period)} 77 return c.t 78 } 79 80 func (c *prometheusClock) Stop() { 81 c.t.Stop() 82 } 83 84 func (c *prometheusClock) C() <-chan time.Time { 85 return c.t.C() 86 } 87 88 type openTelemetryPrometheusMetricsEmitter struct { 89 c *prometheusClock 90 pathName PrometheusMetricPathName 91 metricsConf *generic.MetricsConfiguration 92 93 exporter *prometheus.Exporter 94 meter metric.Meter 95 } 96 97 var _ MetricEmitter = &openTelemetryPrometheusMetricsEmitter{} 98 99 type customExportKindSelectorWrapper struct { 100 export.ExportKindSelector 101 } 102 103 // ExportKindFor implements ExportKindSelector. 104 // we only use counter and up down counter as CumulativeExportKind to save memory 105 func (c customExportKindSelectorWrapper) ExportKindFor(desc *metric.Descriptor, kind aggregation.Kind) export.ExportKind { 106 switch desc.InstrumentKind() { 107 case metric.CounterInstrumentKind, metric.UpDownCounterInstrumentKind: 108 return export.CumulativeExportKind 109 default: 110 return c.ExportKindSelector.ExportKindFor(desc, kind) 111 } 112 } 113 114 // NewOpenTelemetryPrometheusMetricsEmitter implement a MetricEmitter use open-telemetry sdk. 115 func NewOpenTelemetryPrometheusMetricsEmitter(metricsConf *generic.MetricsConfiguration, pathName PrometheusMetricPathName, 116 mux *http.ServeMux, 117 ) (MetricEmitter, error) { 118 exporter, err := prometheus.NewExporter(prometheus.Config{}, controller.New( 119 processor.New( 120 selector.NewWithInexpensiveDistribution(), 121 customExportKindSelectorWrapper{export.StatelessExportKindSelector()}, 122 processor.WithMemory(false), 123 ), 124 controller.WithCollectPeriod(openTelemetryPrometheusCollectPeriod), 125 controller.WithResource(resource.NewWithAttributes()), 126 )) 127 if err != nil { 128 return nil, fmt.Errorf("failed to initialize prometheus exporter: %w", err) 129 } 130 c := &prometheusClock{last: time.Now()} 131 exporter.Controller().SetClock(c) 132 133 mux.HandleFunc(fmt.Sprintf("%v", pathName), exporter.ServeHTTP) 134 135 meter := exporter.MeterProvider().Meter("") 136 p := &openTelemetryPrometheusMetricsEmitter{ 137 c: c, 138 pathName: pathName, 139 metricsConf: metricsConf, 140 141 exporter: exporter, 142 meter: meter, 143 } 144 145 return p, nil 146 } 147 148 // StoreInt64 store a int64 metrics to prometheus collector. 149 func (p *openTelemetryPrometheusMetricsEmitter) StoreInt64( 150 key string, val int64, emitType MetricTypeName, tags ...MetricTag, 151 ) error { 152 return p.storeInt64(key, val, emitType, p.convertTagsToMap(tags)) 153 } 154 155 // StoreFloat64 store a float64 metrics to prometheus collector. 156 func (p *openTelemetryPrometheusMetricsEmitter) StoreFloat64( 157 key string, val float64, emitType MetricTypeName, tags ...MetricTag, 158 ) error { 159 return p.storeFloat64(key, val, emitType, p.convertTagsToMap(tags)) 160 } 161 162 func (p *openTelemetryPrometheusMetricsEmitter) WithTags( 163 unit string, commonTags ...MetricTag, 164 ) MetricEmitter { 165 newMetricTagWrapper := &MetricTagWrapper{MetricEmitter: p} 166 return newMetricTagWrapper.WithTags(unit, commonTags...) 167 } 168 169 func (p *openTelemetryPrometheusMetricsEmitter) Run(ctx context.Context) { 170 klog.Infof("openTelemetry runs") 171 go wait.Until(p.gc, time.Minute, ctx.Done()) 172 } 173 174 func (p *openTelemetryPrometheusMetricsEmitter) gc() { 175 // usw c.clock.Now() to judge whether we have collected 176 if time.Since(p.c.last) > p.metricsConf.EmitterPrometheusGCTimeout { 177 klog.Infof("trigger manual gc for %v", p.pathName) 178 _ = p.exporter.Controller().Collect(context.Background()) 179 } 180 } 181 182 func (p *openTelemetryPrometheusMetricsEmitter) storeInt64( 183 key string, val int64, emitType MetricTypeName, tags map[string]string, 184 ) error { 185 var err error 186 switch emitType { 187 case MetricTypeNameRaw: 188 err = p.storeRawInt64(key, val, tags) 189 case MetricTypeNameCount: 190 err = p.storeCountInt64(key, val, tags) 191 case MetricTypeNameUpDownCount: 192 err = p.storeUpDownCountInt64(key, val, tags) 193 default: 194 err = fmt.Errorf("metrics type %s is not support", emitType) 195 } 196 197 if err != nil { 198 klog.Errorf("storeInt64 failed emitType: %s, %s", emitType, err) 199 return err 200 } 201 202 return nil 203 } 204 205 func (p *openTelemetryPrometheusMetricsEmitter) storeFloat64(key string, 206 val float64, emitType MetricTypeName, tags map[string]string, 207 ) error { 208 var err error 209 switch emitType { 210 case MetricTypeNameRaw: 211 err = p.storeRawFloat64(key, val, tags) 212 case MetricTypeNameCount: 213 err = p.storeCountFloat64(key, val, tags) 214 case MetricTypeNameUpDownCount: 215 err = p.storeUpDownCountFloat64(key, val, tags) 216 default: 217 err = fmt.Errorf("metrics type %s is not support", emitType) 218 } 219 220 if err != nil { 221 klog.Errorf("storeFloat64 failed with emitType: %s, %s", emitType, err) 222 return err 223 } 224 225 return nil 226 } 227 228 func (p *openTelemetryPrometheusMetricsEmitter) storeRawInt64(key string, val int64, tags map[string]string) error { 229 instrument, err := p.meter.MeterImpl().NewSyncInstrument(metric.NewDescriptor(key, metric.ValueObserverInstrumentKind, number.Int64Kind)) 230 if err != nil { 231 return err 232 } 233 234 instrument.RecordOne(context.TODO(), number.NewInt64Number(val), p.convertMapToKeyValues(tags)) 235 return err 236 } 237 238 func (p *openTelemetryPrometheusMetricsEmitter) storeRawFloat64(key string, val float64, tags map[string]string) error { 239 instrument, err := p.meter.MeterImpl().NewSyncInstrument(metric.NewDescriptor(key, metric.ValueObserverInstrumentKind, number.Float64Kind)) 240 if err != nil { 241 return err 242 } 243 244 instrument.RecordOne(context.TODO(), number.NewFloat64Number(val), p.convertMapToKeyValues(tags)) 245 return err 246 } 247 248 func (p *openTelemetryPrometheusMetricsEmitter) storeCountInt64(key string, val int64, tags map[string]string) error { 249 counter, err := p.meter.NewInt64Counter(key) 250 if err != nil { 251 return err 252 } 253 counter.Add(context.TODO(), val, p.convertMapToKeyValues(tags)...) 254 return nil 255 } 256 257 func (p *openTelemetryPrometheusMetricsEmitter) storeCountFloat64(key string, val float64, tags map[string]string) error { 258 counter, err := p.meter.NewFloat64Counter(key) 259 if err != nil { 260 return err 261 } 262 counter.Add(context.TODO(), val, p.convertMapToKeyValues(tags)...) 263 return nil 264 } 265 266 func (p *openTelemetryPrometheusMetricsEmitter) storeUpDownCountInt64(key string, val int64, tags map[string]string) error { 267 counter, err := p.meter.NewInt64UpDownCounter(key) 268 if err != nil { 269 return err 270 } 271 counter.Add(context.TODO(), val, p.convertMapToKeyValues(tags)...) 272 return nil 273 } 274 275 func (p *openTelemetryPrometheusMetricsEmitter) storeUpDownCountFloat64(key string, val float64, tags map[string]string) error { 276 counter, err := p.meter.NewFloat64UpDownCounter(key) 277 if err != nil { 278 return err 279 } 280 counter.Add(context.TODO(), val, p.convertMapToKeyValues(tags)...) 281 return nil 282 } 283 284 // for simplify, only pass map to metrics related function 285 func (p *openTelemetryPrometheusMetricsEmitter) convertMapToKeyValues(tags map[string]string) []attribute.KeyValue { 286 res := make([]attribute.KeyValue, 0, len(tags)) 287 for k, v := range tags { 288 res = append(res, attribute.String(k, v)) 289 } 290 return res 291 } 292 293 // to avoid duplicate tags, we will convert tags to map first 294 func (p *openTelemetryPrometheusMetricsEmitter) convertTagsToMap(tags []MetricTag) map[string]string { 295 mTags := make(map[string]string) 296 for _, t := range tags { 297 mTags[t.Key] = t.Val 298 } 299 return mTags 300 }