github.com/rudderlabs/rudder-go-kit@v0.30.0/stats/internal/otel/prometheus/exporter.go (about)

     1  // Package prometheus is imported from the official OpenTelemetry package:
     2  // https://github.com/open-telemetry/opentelemetry-go/tree/v1.14.0/exporters/prometheus
     3  // The version of the exporter would be v0.37.0 (not v1.14.0, see releases).
     4  //
     5  // Customisations applied:
     6  //
     7  //  1. scope info keys are not "otel_scope_name", "otel_scope_version" but we're now using the semconv ones to be
     8  //     consistent if we switch over to gRPC. we're propagating them via the *resource.Resource (see Collect method)
     9  //     see here: https://github.com/open-telemetry/opentelemetry-go/blob/v1.14.0/exporters/prometheus/exporter.go#L48
    10  //
    11  //  2. prometheus counters MUST have a _total suffix but that breaks our dashboards, so we removed it
    12  //     see here: https://github.com/open-telemetry/opentelemetry-go/blob/v1.14.0/exporters/prometheus/exporter.go#L73
    13  //
    14  //  3. a global logger was used, we made it injectable via options
    15  //     see here: https://github.com/open-telemetry/opentelemetry-go/blob/v1.14.0/exporters/prometheus/exporter.go#L393
    16  //
    17  //  4. removed unnecessary otel_scope_info metric
    18  package prometheus
    19  
    20  import (
    21  	"context"
    22  	"errors"
    23  	"fmt"
    24  	"sort"
    25  	"strings"
    26  	"sync"
    27  	"unicode"
    28  	"unicode/utf8"
    29  
    30  	"github.com/prometheus/client_golang/prometheus"
    31  	dto "github.com/prometheus/client_model/go"
    32  	semconv "go.opentelemetry.io/otel/semconv/v1.24.0"
    33  	"google.golang.org/protobuf/proto"
    34  
    35  	"go.opentelemetry.io/otel"
    36  	"go.opentelemetry.io/otel/attribute"
    37  	"go.opentelemetry.io/otel/sdk/instrumentation"
    38  	"go.opentelemetry.io/otel/sdk/metric"
    39  	"go.opentelemetry.io/otel/sdk/metric/metricdata"
    40  	"go.opentelemetry.io/otel/sdk/resource"
    41  )
    42  
    43  const (
    44  	targetInfoMetricName  = "target_info"
    45  	targetInfoDescription = "Target metadata"
    46  )
    47  
    48  // Exporter is a Prometheus Exporter that embeds the OTel metric.Reader
    49  // interface for easy instantiation with a MeterProvider.
    50  type Exporter struct {
    51  	metric.Reader
    52  }
    53  
    54  var _ metric.Reader = &Exporter{}
    55  
    56  // collector is used to implement prometheus.Collector.
    57  type collector struct {
    58  	reader metric.Reader
    59  	logger logger
    60  
    61  	disableTargetInfo    bool
    62  	withoutUnits         bool
    63  	targetInfo           prometheus.Metric
    64  	createTargetInfoOnce sync.Once
    65  	scopeInfos           map[instrumentation.Scope]prometheus.Metric
    66  	metricFamilies       map[string]*dto.MetricFamily
    67  	namespace            string
    68  }
    69  
    70  // New returns a Prometheus Exporter.
    71  func New(opts ...Option) (*Exporter, error) {
    72  	cfg := newConfig(opts...)
    73  
    74  	// this assumes that the default temporality selector will always return cumulative.
    75  	// we only support cumulative temporality, so building our own reader enforces this.
    76  	reader := metric.NewManualReader(cfg.manualReaderOptions()...)
    77  
    78  	collector := &collector{
    79  		reader:            reader,
    80  		logger:            cfg.logger,
    81  		disableTargetInfo: cfg.disableTargetInfo,
    82  		withoutUnits:      cfg.withoutUnits,
    83  		scopeInfos:        make(map[instrumentation.Scope]prometheus.Metric),
    84  		metricFamilies:    make(map[string]*dto.MetricFamily),
    85  		namespace:         cfg.namespace,
    86  	}
    87  
    88  	if err := cfg.registerer.Register(collector); err != nil {
    89  		return nil, fmt.Errorf("cannot register the collector: %w", err)
    90  	}
    91  
    92  	e := &Exporter{
    93  		Reader: reader,
    94  	}
    95  
    96  	return e, nil
    97  }
    98  
    99  // Describe implements prometheus.Collector.
   100  func (c *collector) Describe(_ chan<- *prometheus.Desc) {}
   101  
   102  // Collect implements prometheus.Collector.
   103  func (c *collector) Collect(ch chan<- prometheus.Metric) {
   104  	metrics := metricdata.ResourceMetrics{}
   105  	err := c.reader.Collect(context.TODO(), &metrics)
   106  	if err != nil {
   107  		otel.Handle(err)
   108  		if errors.Is(err, metric.ErrReaderNotRegistered) {
   109  			return
   110  		}
   111  	}
   112  
   113  	c.createTargetInfoOnce.Do(func() {
   114  		// Resource should be immutable, we don't need to compute again
   115  		targetInfo, err := c.createInfoMetric(targetInfoMetricName, targetInfoDescription, metrics.Resource)
   116  		if err != nil {
   117  			// If the target info metric is invalid, disable sending it.
   118  			otel.Handle(err)
   119  			c.disableTargetInfo = true
   120  		}
   121  		c.targetInfo = targetInfo
   122  	})
   123  	if !c.disableTargetInfo {
   124  		ch <- c.targetInfo
   125  	}
   126  
   127  	var scopeKeys, scopeValues []string
   128  	for _, attr := range metrics.Resource.Attributes() {
   129  		if string(attr.Key) == string(semconv.ServiceNameKey) {
   130  			scopeKeys = append(scopeKeys, "job")
   131  			scopeValues = append(scopeValues, attr.Value.AsString())
   132  		}
   133  		scopeKeys = append(scopeKeys, strings.Map(sanitizeRune, string(attr.Key)))
   134  		scopeValues = append(scopeValues, attr.Value.AsString())
   135  	}
   136  
   137  	for _, scopeMetrics := range metrics.ScopeMetrics {
   138  		for _, m := range scopeMetrics.Metrics {
   139  			switch v := m.Data.(type) {
   140  			case metricdata.Histogram[int64]:
   141  				addHistogramMetric(ch, v, m, scopeKeys, scopeValues, c.getName(m), c.metricFamilies, c.logger)
   142  			case metricdata.Histogram[float64]:
   143  				addHistogramMetric(ch, v, m, scopeKeys, scopeValues, c.getName(m), c.metricFamilies, c.logger)
   144  			case metricdata.Sum[int64]:
   145  				addSumMetric(ch, v, m, scopeKeys, scopeValues, c.getName(m), c.metricFamilies, c.logger)
   146  			case metricdata.Sum[float64]:
   147  				addSumMetric(ch, v, m, scopeKeys, scopeValues, c.getName(m), c.metricFamilies, c.logger)
   148  			case metricdata.Gauge[int64]:
   149  				addGaugeMetric(ch, v, m, scopeKeys, scopeValues, c.getName(m), c.metricFamilies, c.logger)
   150  			case metricdata.Gauge[float64]:
   151  				addGaugeMetric(ch, v, m, scopeKeys, scopeValues, c.getName(m), c.metricFamilies, c.logger)
   152  			}
   153  		}
   154  	}
   155  }
   156  
   157  func addHistogramMetric[N int64 | float64](
   158  	ch chan<- prometheus.Metric, histogram metricdata.Histogram[N], m metricdata.Metrics,
   159  	ks, vs []string, name string, mfs map[string]*dto.MetricFamily, l logger,
   160  ) {
   161  	drop, help := validateMetrics(name, m.Description, dto.MetricType_HISTOGRAM.Enum(), mfs, l)
   162  	if drop {
   163  		return
   164  	}
   165  	if help != "" {
   166  		m.Description = help
   167  	}
   168  
   169  	for _, dp := range histogram.DataPoints {
   170  		keys, values := getAttrs(dp.Attributes, ks, vs)
   171  
   172  		desc := prometheus.NewDesc(name, m.Description, keys, nil)
   173  		buckets := make(map[float64]uint64, len(dp.Bounds))
   174  
   175  		cumulativeCount := uint64(0)
   176  		for i, bound := range dp.Bounds {
   177  			cumulativeCount += dp.BucketCounts[i]
   178  			buckets[bound] = cumulativeCount
   179  		}
   180  		m, err := prometheus.NewConstHistogram(desc, dp.Count, float64(dp.Sum), buckets, values...)
   181  		if err != nil {
   182  			otel.Handle(err)
   183  			continue
   184  		}
   185  		ch <- m
   186  	}
   187  }
   188  
   189  func addSumMetric[N int64 | float64](
   190  	ch chan<- prometheus.Metric, sum metricdata.Sum[N], m metricdata.Metrics,
   191  	ks, vs []string, name string, mfs map[string]*dto.MetricFamily, l logger,
   192  ) {
   193  	valueType := prometheus.CounterValue
   194  	metricType := dto.MetricType_COUNTER
   195  	if !sum.IsMonotonic {
   196  		valueType = prometheus.GaugeValue
   197  		metricType = dto.MetricType_GAUGE
   198  	}
   199  
   200  	drop, help := validateMetrics(name, m.Description, metricType.Enum(), mfs, l)
   201  	if drop {
   202  		return
   203  	}
   204  	if help != "" {
   205  		m.Description = help
   206  	}
   207  
   208  	for _, dp := range sum.DataPoints {
   209  		keys, values := getAttrs(dp.Attributes, ks, vs)
   210  
   211  		desc := prometheus.NewDesc(name, m.Description, keys, nil)
   212  		m, err := prometheus.NewConstMetric(desc, valueType, float64(dp.Value), values...)
   213  		if err != nil {
   214  			otel.Handle(err)
   215  			continue
   216  		}
   217  		ch <- m
   218  	}
   219  }
   220  
   221  func addGaugeMetric[N int64 | float64](
   222  	ch chan<- prometheus.Metric, gauge metricdata.Gauge[N], m metricdata.Metrics,
   223  	ks, vs []string, name string, mfs map[string]*dto.MetricFamily, l logger,
   224  ) {
   225  	drop, help := validateMetrics(name, m.Description, dto.MetricType_GAUGE.Enum(), mfs, l)
   226  	if drop {
   227  		return
   228  	}
   229  	if help != "" {
   230  		m.Description = help
   231  	}
   232  
   233  	for _, dp := range gauge.DataPoints {
   234  		keys, values := getAttrs(dp.Attributes, ks, vs)
   235  
   236  		desc := prometheus.NewDesc(name, m.Description, keys, nil)
   237  		m, err := prometheus.NewConstMetric(desc, prometheus.GaugeValue, float64(dp.Value), values...)
   238  		if err != nil {
   239  			otel.Handle(err)
   240  			continue
   241  		}
   242  		ch <- m
   243  	}
   244  }
   245  
   246  // getAttrs parses the attribute.Set to two lists of matching Prometheus-style
   247  // keys and values. It sanitizes invalid characters and handles duplicate keys
   248  // (due to sanitization) by sorting and concatenating the values following the spec.
   249  func getAttrs(attrs attribute.Set, ks, vs []string) ([]string, []string) {
   250  	keysMap := make(map[string][]string)
   251  	itr := attrs.Iter()
   252  	for itr.Next() {
   253  		kv := itr.Attribute()
   254  		key := strings.Map(sanitizeRune, string(kv.Key))
   255  		if _, ok := keysMap[key]; !ok {
   256  			keysMap[key] = []string{kv.Value.Emit()}
   257  		} else {
   258  			// if the sanitized key is a duplicate, append to the list of keys
   259  			keysMap[key] = append(keysMap[key], kv.Value.Emit())
   260  		}
   261  	}
   262  
   263  	keys := make([]string, 0, attrs.Len())
   264  	values := make([]string, 0, attrs.Len())
   265  	for key, vals := range keysMap {
   266  		keys = append(keys, key)
   267  		sort.Slice(vals, func(i, j int) bool {
   268  			return i < j
   269  		})
   270  		values = append(values, strings.Join(vals, ";"))
   271  	}
   272  
   273  	if len(ks) > 0 {
   274  		keys = append(keys, ks[:]...)
   275  		values = append(values, vs[:]...)
   276  	}
   277  	return keys, values
   278  }
   279  
   280  func (c *collector) createInfoMetric(name, description string, res *resource.Resource) (prometheus.Metric, error) {
   281  	keys, values := getAttrs(*res.Set(), []string{}, []string{})
   282  	desc := prometheus.NewDesc(name, description, keys, nil)
   283  	return prometheus.NewConstMetric(desc, prometheus.GaugeValue, float64(1), values...)
   284  }
   285  
   286  // BEWARE that we are already sanitizing metric names in the OTel adapter, see sanitizeTagKey function,
   287  // but we still need this function to sanitize metrics coming from the internal OpenTelemetry client
   288  func sanitizeRune(r rune) rune {
   289  	if unicode.IsLetter(r) || unicode.IsDigit(r) || r == ':' || r == '_' {
   290  		return r
   291  	}
   292  	return '_'
   293  }
   294  
   295  var unitSuffixes = map[string]string{
   296  	"1":  "_ratio",
   297  	"By": "_bytes",
   298  	"ms": "_milliseconds",
   299  }
   300  
   301  // getName returns the sanitized name, prefixed with the namespace and suffixed with unit.
   302  func (c *collector) getName(m metricdata.Metrics) string {
   303  	name := sanitizeName(m.Name)
   304  	if c.namespace != "" {
   305  		name = c.namespace + name
   306  	}
   307  	if c.withoutUnits {
   308  		return name
   309  	}
   310  	if suffix, ok := unitSuffixes[m.Unit]; ok {
   311  		name += suffix
   312  	}
   313  	return name
   314  }
   315  
   316  func sanitizeName(n string) string {
   317  	// This algorithm is based on strings.Map from Go 1.19.
   318  	const replacement = '_'
   319  
   320  	valid := func(i int, r rune) bool {
   321  		// Taken from
   322  		// https://github.com/prometheus/common/blob/dfbc25bd00225c70aca0d94c3c4bb7744f28ace0/model/metric.go#L92-L102
   323  		if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || r == '_' || r == ':' || (r >= '0' && r <= '9' && i > 0) {
   324  			return true
   325  		}
   326  		return false
   327  	}
   328  
   329  	// This output buffer b is initialized on demand, the first time a
   330  	// character needs to be replaced.
   331  	var b strings.Builder
   332  	for i, c := range n {
   333  		if valid(i, c) {
   334  			continue
   335  		}
   336  
   337  		if i == 0 && c >= '0' && c <= '9' {
   338  			// Prefix leading number with replacement character.
   339  			b.Grow(len(n) + 1)
   340  			_ = b.WriteByte(byte(replacement))
   341  			break
   342  		}
   343  		b.Grow(len(n))
   344  		_, _ = b.WriteString(n[:i])
   345  		_ = b.WriteByte(byte(replacement))
   346  		width := utf8.RuneLen(c)
   347  		n = n[i+width:]
   348  		break
   349  	}
   350  
   351  	// Fast path for unchanged input.
   352  	if b.Cap() == 0 { // b.Grow was not called above.
   353  		return n
   354  	}
   355  
   356  	for _, c := range n {
   357  		// Due to inlining, it is more performant to invoke WriteByte rather then
   358  		// WriteRune.
   359  		if valid(1, c) { // We are guaranteed to not be at the start.
   360  			_ = b.WriteByte(byte(c))
   361  		} else {
   362  			_ = b.WriteByte(byte(replacement))
   363  		}
   364  	}
   365  
   366  	return b.String()
   367  }
   368  
   369  func validateMetrics(
   370  	name, description string, metricType *dto.MetricType, mfs map[string]*dto.MetricFamily, l logger,
   371  ) (drop bool, help string) {
   372  	emf, exist := mfs[name]
   373  	if !exist {
   374  		mfs[name] = &dto.MetricFamily{
   375  			Name: proto.String(name),
   376  			Help: proto.String(description),
   377  			Type: metricType,
   378  		}
   379  		return false, ""
   380  	}
   381  	if emf.GetType() != *metricType {
   382  		l.Error(
   383  			errors.New("instrument type conflict"),
   384  			"Using existing type definition.",
   385  			"instrument", name,
   386  			"existing", emf.GetType(),
   387  			"dropped", *metricType,
   388  		)
   389  		return true, ""
   390  	}
   391  	if emf.GetHelp() != description {
   392  		l.Info(
   393  			"Instrument description conflict, using existing",
   394  			"instrument", name,
   395  			"existing", emf.GetHelp(),
   396  			"dropped", description,
   397  		)
   398  		return false, emf.GetHelp()
   399  	}
   400  
   401  	return false, ""
   402  }