github.com/kubewharf/katalyst-core@v0.5.3/pkg/metrics/otel_prom_metrics.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package metrics
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"net/http"
    23  	"time"
    24  
    25  	"go.opentelemetry.io/otel/attribute"
    26  	"go.opentelemetry.io/otel/exporters/metric/prometheus"
    27  	"go.opentelemetry.io/otel/metric"
    28  	"go.opentelemetry.io/otel/metric/number"
    29  	export "go.opentelemetry.io/otel/sdk/export/metric"
    30  	"go.opentelemetry.io/otel/sdk/export/metric/aggregation"
    31  	controller "go.opentelemetry.io/otel/sdk/metric/controller/basic"
    32  	controllerTime "go.opentelemetry.io/otel/sdk/metric/controller/time"
    33  	processor "go.opentelemetry.io/otel/sdk/metric/processor/basic"
    34  	selector "go.opentelemetry.io/otel/sdk/metric/selector/simple"
    35  	"go.opentelemetry.io/otel/sdk/resource"
    36  	"k8s.io/apimachinery/pkg/util/wait"
    37  	"k8s.io/klog/v2"
    38  
    39  	"github.com/kubewharf/katalyst-core/pkg/config/generic"
    40  )
    41  
    42  const (
    43  	openTelemetryPrometheusCollectPeriod = time.Second * 3
    44  )
    45  
    46  type PrometheusMetricPathName string
    47  
    48  const (
    49  	PrometheusMetricPathNameDefault      PrometheusMetricPathName = "/metrics"
    50  	PrometheusMetricPathNameCustomMetric PrometheusMetricPathName = "/custom_metric"
    51  )
    52  
    53  type prometheusClockTicker struct {
    54  	ticker *time.Ticker
    55  }
    56  
    57  func (t *prometheusClockTicker) Stop() {
    58  	t.ticker.Stop()
    59  }
    60  
    61  func (t *prometheusClockTicker) C() <-chan time.Time {
    62  	return t.ticker.C
    63  }
    64  
    65  type prometheusClock struct {
    66  	last time.Time
    67  	t    *prometheusClockTicker
    68  }
    69  
    70  func (c *prometheusClock) Now() time.Time {
    71  	c.last = time.Now()
    72  	return c.last
    73  }
    74  
    75  func (c *prometheusClock) Ticker(period time.Duration) controllerTime.Ticker {
    76  	c.t = &prometheusClockTicker{ticker: time.NewTicker(period)}
    77  	return c.t
    78  }
    79  
    80  func (c *prometheusClock) Stop() {
    81  	c.t.Stop()
    82  }
    83  
    84  func (c *prometheusClock) C() <-chan time.Time {
    85  	return c.t.C()
    86  }
    87  
    88  type openTelemetryPrometheusMetricsEmitter struct {
    89  	c           *prometheusClock
    90  	pathName    PrometheusMetricPathName
    91  	metricsConf *generic.MetricsConfiguration
    92  
    93  	exporter *prometheus.Exporter
    94  	meter    metric.Meter
    95  }
    96  
    97  var _ MetricEmitter = &openTelemetryPrometheusMetricsEmitter{}
    98  
    99  type customExportKindSelectorWrapper struct {
   100  	export.ExportKindSelector
   101  }
   102  
   103  // ExportKindFor implements ExportKindSelector.
   104  // we only use counter and up down counter as CumulativeExportKind to save memory
   105  func (c customExportKindSelectorWrapper) ExportKindFor(desc *metric.Descriptor, kind aggregation.Kind) export.ExportKind {
   106  	switch desc.InstrumentKind() {
   107  	case metric.CounterInstrumentKind, metric.UpDownCounterInstrumentKind:
   108  		return export.CumulativeExportKind
   109  	default:
   110  		return c.ExportKindSelector.ExportKindFor(desc, kind)
   111  	}
   112  }
   113  
   114  // NewOpenTelemetryPrometheusMetricsEmitter implement a MetricEmitter use open-telemetry sdk.
   115  func NewOpenTelemetryPrometheusMetricsEmitter(metricsConf *generic.MetricsConfiguration, pathName PrometheusMetricPathName,
   116  	mux *http.ServeMux,
   117  ) (MetricEmitter, error) {
   118  	exporter, err := prometheus.NewExporter(prometheus.Config{}, controller.New(
   119  		processor.New(
   120  			selector.NewWithInexpensiveDistribution(),
   121  			customExportKindSelectorWrapper{export.StatelessExportKindSelector()},
   122  			processor.WithMemory(false),
   123  		),
   124  		controller.WithCollectPeriod(openTelemetryPrometheusCollectPeriod),
   125  		controller.WithResource(resource.NewWithAttributes()),
   126  	))
   127  	if err != nil {
   128  		return nil, fmt.Errorf("failed to initialize prometheus exporter: %w", err)
   129  	}
   130  	c := &prometheusClock{last: time.Now()}
   131  	exporter.Controller().SetClock(c)
   132  
   133  	mux.HandleFunc(fmt.Sprintf("%v", pathName), exporter.ServeHTTP)
   134  
   135  	meter := exporter.MeterProvider().Meter("")
   136  	p := &openTelemetryPrometheusMetricsEmitter{
   137  		c:           c,
   138  		pathName:    pathName,
   139  		metricsConf: metricsConf,
   140  
   141  		exporter: exporter,
   142  		meter:    meter,
   143  	}
   144  
   145  	return p, nil
   146  }
   147  
   148  // StoreInt64 store a int64 metrics to prometheus collector.
   149  func (p *openTelemetryPrometheusMetricsEmitter) StoreInt64(
   150  	key string, val int64, emitType MetricTypeName, tags ...MetricTag,
   151  ) error {
   152  	return p.storeInt64(key, val, emitType, p.convertTagsToMap(tags))
   153  }
   154  
   155  // StoreFloat64 store a float64 metrics to prometheus collector.
   156  func (p *openTelemetryPrometheusMetricsEmitter) StoreFloat64(
   157  	key string, val float64, emitType MetricTypeName, tags ...MetricTag,
   158  ) error {
   159  	return p.storeFloat64(key, val, emitType, p.convertTagsToMap(tags))
   160  }
   161  
   162  func (p *openTelemetryPrometheusMetricsEmitter) WithTags(
   163  	unit string, commonTags ...MetricTag,
   164  ) MetricEmitter {
   165  	newMetricTagWrapper := &MetricTagWrapper{MetricEmitter: p}
   166  	return newMetricTagWrapper.WithTags(unit, commonTags...)
   167  }
   168  
   169  func (p *openTelemetryPrometheusMetricsEmitter) Run(ctx context.Context) {
   170  	klog.Infof("openTelemetry runs")
   171  	go wait.Until(p.gc, time.Minute, ctx.Done())
   172  }
   173  
   174  func (p *openTelemetryPrometheusMetricsEmitter) gc() {
   175  	// usw c.clock.Now() to judge whether we have collected
   176  	if time.Since(p.c.last) > p.metricsConf.EmitterPrometheusGCTimeout {
   177  		klog.Infof("trigger manual gc for %v", p.pathName)
   178  		_ = p.exporter.Controller().Collect(context.Background())
   179  	}
   180  }
   181  
   182  func (p *openTelemetryPrometheusMetricsEmitter) storeInt64(
   183  	key string, val int64, emitType MetricTypeName, tags map[string]string,
   184  ) error {
   185  	var err error
   186  	switch emitType {
   187  	case MetricTypeNameRaw:
   188  		err = p.storeRawInt64(key, val, tags)
   189  	case MetricTypeNameCount:
   190  		err = p.storeCountInt64(key, val, tags)
   191  	case MetricTypeNameUpDownCount:
   192  		err = p.storeUpDownCountInt64(key, val, tags)
   193  	default:
   194  		err = fmt.Errorf("metrics type %s is not support", emitType)
   195  	}
   196  
   197  	if err != nil {
   198  		klog.Errorf("storeInt64 failed emitType: %s, %s", emitType, err)
   199  		return err
   200  	}
   201  
   202  	return nil
   203  }
   204  
   205  func (p *openTelemetryPrometheusMetricsEmitter) storeFloat64(key string,
   206  	val float64, emitType MetricTypeName, tags map[string]string,
   207  ) error {
   208  	var err error
   209  	switch emitType {
   210  	case MetricTypeNameRaw:
   211  		err = p.storeRawFloat64(key, val, tags)
   212  	case MetricTypeNameCount:
   213  		err = p.storeCountFloat64(key, val, tags)
   214  	case MetricTypeNameUpDownCount:
   215  		err = p.storeUpDownCountFloat64(key, val, tags)
   216  	default:
   217  		err = fmt.Errorf("metrics type %s is not support", emitType)
   218  	}
   219  
   220  	if err != nil {
   221  		klog.Errorf("storeFloat64 failed with emitType: %s, %s", emitType, err)
   222  		return err
   223  	}
   224  
   225  	return nil
   226  }
   227  
   228  func (p *openTelemetryPrometheusMetricsEmitter) storeRawInt64(key string, val int64, tags map[string]string) error {
   229  	instrument, err := p.meter.MeterImpl().NewSyncInstrument(metric.NewDescriptor(key, metric.ValueObserverInstrumentKind, number.Int64Kind))
   230  	if err != nil {
   231  		return err
   232  	}
   233  
   234  	instrument.RecordOne(context.TODO(), number.NewInt64Number(val), p.convertMapToKeyValues(tags))
   235  	return err
   236  }
   237  
   238  func (p *openTelemetryPrometheusMetricsEmitter) storeRawFloat64(key string, val float64, tags map[string]string) error {
   239  	instrument, err := p.meter.MeterImpl().NewSyncInstrument(metric.NewDescriptor(key, metric.ValueObserverInstrumentKind, number.Float64Kind))
   240  	if err != nil {
   241  		return err
   242  	}
   243  
   244  	instrument.RecordOne(context.TODO(), number.NewFloat64Number(val), p.convertMapToKeyValues(tags))
   245  	return err
   246  }
   247  
   248  func (p *openTelemetryPrometheusMetricsEmitter) storeCountInt64(key string, val int64, tags map[string]string) error {
   249  	counter, err := p.meter.NewInt64Counter(key)
   250  	if err != nil {
   251  		return err
   252  	}
   253  	counter.Add(context.TODO(), val, p.convertMapToKeyValues(tags)...)
   254  	return nil
   255  }
   256  
   257  func (p *openTelemetryPrometheusMetricsEmitter) storeCountFloat64(key string, val float64, tags map[string]string) error {
   258  	counter, err := p.meter.NewFloat64Counter(key)
   259  	if err != nil {
   260  		return err
   261  	}
   262  	counter.Add(context.TODO(), val, p.convertMapToKeyValues(tags)...)
   263  	return nil
   264  }
   265  
   266  func (p *openTelemetryPrometheusMetricsEmitter) storeUpDownCountInt64(key string, val int64, tags map[string]string) error {
   267  	counter, err := p.meter.NewInt64UpDownCounter(key)
   268  	if err != nil {
   269  		return err
   270  	}
   271  	counter.Add(context.TODO(), val, p.convertMapToKeyValues(tags)...)
   272  	return nil
   273  }
   274  
   275  func (p *openTelemetryPrometheusMetricsEmitter) storeUpDownCountFloat64(key string, val float64, tags map[string]string) error {
   276  	counter, err := p.meter.NewFloat64UpDownCounter(key)
   277  	if err != nil {
   278  		return err
   279  	}
   280  	counter.Add(context.TODO(), val, p.convertMapToKeyValues(tags)...)
   281  	return nil
   282  }
   283  
   284  // for simplify, only pass map to metrics related function
   285  func (p *openTelemetryPrometheusMetricsEmitter) convertMapToKeyValues(tags map[string]string) []attribute.KeyValue {
   286  	res := make([]attribute.KeyValue, 0, len(tags))
   287  	for k, v := range tags {
   288  		res = append(res, attribute.String(k, v))
   289  	}
   290  	return res
   291  }
   292  
   293  // to avoid duplicate tags, we will convert tags to map first
   294  func (p *openTelemetryPrometheusMetricsEmitter) convertTagsToMap(tags []MetricTag) map[string]string {
   295  	mTags := make(map[string]string)
   296  	for _, t := range tags {
   297  		mTags[t.Key] = t.Val
   298  	}
   299  	return mTags
   300  }