google.golang.org/grpc@v1.72.2/stats/opentelemetry/opentelemetry.go (about)

     1  /*
     2   * Copyright 2024 gRPC authors.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package opentelemetry implements opentelemetry instrumentation code for
    18  // gRPC-Go clients and servers.
    19  //
    20  // For details on configuring opentelemetry and various instruments that this
    21  // package creates, see
    22  // [gRPC OpenTelemetry Metrics](https://grpc.io/docs/guides/opentelemetry-metrics/).
    23  package opentelemetry
    24  
    25  import (
    26  	"context"
    27  	"strings"
    28  	"time"
    29  
    30  	otelattribute "go.opentelemetry.io/otel/attribute"
    31  	otelmetric "go.opentelemetry.io/otel/metric"
    32  	"go.opentelemetry.io/otel/metric/noop"
    33  	"go.opentelemetry.io/otel/trace"
    34  	"google.golang.org/grpc"
    35  	"google.golang.org/grpc/codes"
    36  	experimental "google.golang.org/grpc/experimental/opentelemetry"
    37  	estats "google.golang.org/grpc/experimental/stats"
    38  	"google.golang.org/grpc/grpclog"
    39  	"google.golang.org/grpc/internal"
    40  	"google.golang.org/grpc/stats"
    41  	otelinternal "google.golang.org/grpc/stats/opentelemetry/internal"
    42  )
    43  
    44  func init() {
    45  	otelinternal.SetPluginOption = func(o *Options, po otelinternal.PluginOption) {
    46  		o.MetricsOptions.pluginOption = po
    47  		// Log an error if one of the options is missing.
    48  		if (o.TraceOptions.TextMapPropagator == nil) != (o.TraceOptions.TracerProvider == nil) {
    49  			logger.Warning("Tracing will not be recorded because traceOptions are not set properly: one of TextMapPropagator or TracerProvider is missing")
    50  		}
    51  	}
    52  }
    53  
    54  var (
    55  	logger          = grpclog.Component("otel-plugin")
    56  	canonicalString = internal.CanonicalString.(func(codes.Code) string)
    57  	joinDialOptions = internal.JoinDialOptions.(func(...grpc.DialOption) grpc.DialOption)
    58  )
    59  
    60  // Options are the options for OpenTelemetry instrumentation.
    61  type Options struct {
    62  	// MetricsOptions are the metrics options for OpenTelemetry instrumentation.
    63  	MetricsOptions MetricsOptions
    64  	// TraceOptions are the tracing options for OpenTelemetry instrumentation.
    65  	TraceOptions experimental.TraceOptions
    66  }
    67  
    68  func (o *Options) isMetricsEnabled() bool {
    69  	return o.MetricsOptions.MeterProvider != nil
    70  }
    71  
    72  func (o *Options) isTracingEnabled() bool {
    73  	return o.TraceOptions.TracerProvider != nil
    74  }
    75  
    76  // MetricsOptions are the metrics options for OpenTelemetry instrumentation.
    77  type MetricsOptions struct {
    78  	// MeterProvider is the MeterProvider instance that will be used to create
    79  	// instruments. To enable metrics collection, set a meter provider. If
    80  	// unset, no metrics will be recorded.
    81  	MeterProvider otelmetric.MeterProvider
    82  
    83  	// Metrics are the metrics to instrument. Will create instrument and record telemetry
    84  	// for corresponding metric supported by the client and server
    85  	// instrumentation components if applicable. If not set, the default metrics
    86  	// will be recorded.
    87  	Metrics *stats.MetricSet
    88  
    89  	// MethodAttributeFilter is a function that determines whether to record the
    90  	// method name of RPCs as an attribute, or to bucket into "other". Take care
    91  	// to limit the values allowed, as allowing too many will increase
    92  	// cardinality and could cause severe memory or performance problems.
    93  	//
    94  	// This only applies for server-side metrics.  For clients, to record the
    95  	// method name in the attributes, pass grpc.StaticMethodCallOption to Invoke
    96  	// or NewStream. Note that when using protobuf generated clients, this
    97  	// CallOption is included automatically.
    98  	MethodAttributeFilter func(string) bool
    99  
   100  	// OptionalLabels specifies a list of optional labels to enable on any
   101  	// metrics that support them.
   102  	OptionalLabels []string
   103  
   104  	// pluginOption is used to get labels to attach to certain metrics, if set.
   105  	pluginOption otelinternal.PluginOption
   106  }
   107  
   108  // DialOption returns a dial option which enables OpenTelemetry instrumentation
   109  // code for a grpc.ClientConn.
   110  //
   111  // Client applications interested in instrumenting their grpc.ClientConn should
   112  // pass the dial option returned from this function as a dial option to
   113  // grpc.NewClient().
   114  //
   115  // For the metrics supported by this instrumentation code, specify the client
   116  // metrics to record in metrics options. Also provide an implementation of a
   117  // MeterProvider. If the passed in Meter Provider does not have the view
   118  // configured for an individual metric turned on, the API call in this component
   119  // will create a default view for that metric.
   120  func DialOption(o Options) grpc.DialOption {
   121  	csh := &clientStatsHandler{options: o}
   122  	csh.initializeMetrics()
   123  	return joinDialOptions(grpc.WithChainUnaryInterceptor(csh.unaryInterceptor), grpc.WithChainStreamInterceptor(csh.streamInterceptor), grpc.WithStatsHandler(csh))
   124  }
   125  
   126  var joinServerOptions = internal.JoinServerOptions.(func(...grpc.ServerOption) grpc.ServerOption)
   127  
   128  // ServerOption returns a server option which enables OpenTelemetry
   129  // instrumentation code for a grpc.Server.
   130  //
   131  // Server applications interested in instrumenting their grpc.Server should pass
   132  // the server option returned from this function as an argument to
   133  // grpc.NewServer().
   134  //
   135  // For the metrics supported by this instrumentation code, specify the server
   136  // metrics to record in metrics options. Also provide an implementation of a
   137  // MeterProvider. If the passed in Meter Provider does not have the view
   138  // configured for an individual metric turned on, the API call in this component
   139  // will create a default view for that metric.
   140  func ServerOption(o Options) grpc.ServerOption {
   141  	ssh := &serverStatsHandler{options: o}
   142  	ssh.initializeMetrics()
   143  	return joinServerOptions(grpc.ChainUnaryInterceptor(ssh.unaryInterceptor), grpc.ChainStreamInterceptor(ssh.streamInterceptor), grpc.StatsHandler(ssh))
   144  }
   145  
   146  // callInfo is information pertaining to the lifespan of the RPC client side.
   147  type callInfo struct {
   148  	target string
   149  
   150  	method string
   151  }
   152  
   153  type callInfoKey struct{}
   154  
   155  func setCallInfo(ctx context.Context, ci *callInfo) context.Context {
   156  	return context.WithValue(ctx, callInfoKey{}, ci)
   157  }
   158  
   159  // getCallInfo returns the callInfo stored in the context, or nil
   160  // if there isn't one.
   161  func getCallInfo(ctx context.Context) *callInfo {
   162  	ci, _ := ctx.Value(callInfoKey{}).(*callInfo)
   163  	return ci
   164  }
   165  
   166  // rpcInfo is RPC information scoped to the RPC attempt life span client side,
   167  // and the RPC life span server side.
   168  type rpcInfo struct {
   169  	ai *attemptInfo
   170  }
   171  
   172  type rpcInfoKey struct{}
   173  
   174  func setRPCInfo(ctx context.Context, ri *rpcInfo) context.Context {
   175  	return context.WithValue(ctx, rpcInfoKey{}, ri)
   176  }
   177  
   178  // getRPCInfo returns the rpcInfo stored in the context, or nil
   179  // if there isn't one.
   180  func getRPCInfo(ctx context.Context) *rpcInfo {
   181  	ri, _ := ctx.Value(rpcInfoKey{}).(*rpcInfo)
   182  	return ri
   183  }
   184  
   185  func removeLeadingSlash(mn string) string {
   186  	return strings.TrimLeft(mn, "/")
   187  }
   188  
   189  // attemptInfo is RPC information scoped to the RPC attempt life span client
   190  // side, and the RPC life span server side.
   191  type attemptInfo struct {
   192  	// access these counts atomically for hedging in the future:
   193  	// number of bytes after compression (within each message) from side (client
   194  	// || server).
   195  	sentCompressedBytes int64
   196  	// number of compressed bytes received (within each message) received on
   197  	// side (client || server).
   198  	recvCompressedBytes int64
   199  
   200  	startTime time.Time
   201  	method    string
   202  
   203  	pluginOptionLabels map[string]string // pluginOptionLabels to attach to metrics emitted
   204  	xdsLabels          map[string]string
   205  
   206  	// traceSpan is data used for recording traces.
   207  	traceSpan trace.Span
   208  	// message counters for sent and received messages (used for
   209  	// generating message IDs), and the number of previous RPC attempts for the
   210  	// associated call.
   211  	countSentMsg        uint32
   212  	countRecvMsg        uint32
   213  	previousRPCAttempts uint32
   214  }
   215  
   216  type clientMetrics struct {
   217  	// "grpc.client.attempt.started"
   218  	attemptStarted otelmetric.Int64Counter
   219  	// "grpc.client.attempt.duration"
   220  	attemptDuration otelmetric.Float64Histogram
   221  	// "grpc.client.attempt.sent_total_compressed_message_size"
   222  	attemptSentTotalCompressedMessageSize otelmetric.Int64Histogram
   223  	// "grpc.client.attempt.rcvd_total_compressed_message_size"
   224  	attemptRcvdTotalCompressedMessageSize otelmetric.Int64Histogram
   225  	// "grpc.client.call.duration"
   226  	callDuration otelmetric.Float64Histogram
   227  }
   228  
   229  type serverMetrics struct {
   230  	// "grpc.server.call.started"
   231  	callStarted otelmetric.Int64Counter
   232  	// "grpc.server.call.sent_total_compressed_message_size"
   233  	callSentTotalCompressedMessageSize otelmetric.Int64Histogram
   234  	// "grpc.server.call.rcvd_total_compressed_message_size"
   235  	callRcvdTotalCompressedMessageSize otelmetric.Int64Histogram
   236  	// "grpc.server.call.duration"
   237  	callDuration otelmetric.Float64Histogram
   238  }
   239  
   240  func createInt64Counter(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Int64CounterOption) otelmetric.Int64Counter {
   241  	if _, ok := setOfMetrics[metricName]; !ok {
   242  		return noop.Int64Counter{}
   243  	}
   244  	ret, err := meter.Int64Counter(string(metricName), options...)
   245  	if err != nil {
   246  		logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err)
   247  		return noop.Int64Counter{}
   248  	}
   249  	return ret
   250  }
   251  
   252  func createFloat64Counter(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Float64CounterOption) otelmetric.Float64Counter {
   253  	if _, ok := setOfMetrics[metricName]; !ok {
   254  		return noop.Float64Counter{}
   255  	}
   256  	ret, err := meter.Float64Counter(string(metricName), options...)
   257  	if err != nil {
   258  		logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err)
   259  		return noop.Float64Counter{}
   260  	}
   261  	return ret
   262  }
   263  
   264  func createInt64Histogram(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Int64HistogramOption) otelmetric.Int64Histogram {
   265  	if _, ok := setOfMetrics[metricName]; !ok {
   266  		return noop.Int64Histogram{}
   267  	}
   268  	ret, err := meter.Int64Histogram(string(metricName), options...)
   269  	if err != nil {
   270  		logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err)
   271  		return noop.Int64Histogram{}
   272  	}
   273  	return ret
   274  }
   275  
   276  func createFloat64Histogram(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Float64HistogramOption) otelmetric.Float64Histogram {
   277  	if _, ok := setOfMetrics[metricName]; !ok {
   278  		return noop.Float64Histogram{}
   279  	}
   280  	ret, err := meter.Float64Histogram(string(metricName), options...)
   281  	if err != nil {
   282  		logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err)
   283  		return noop.Float64Histogram{}
   284  	}
   285  	return ret
   286  }
   287  
   288  func createInt64Gauge(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Int64GaugeOption) otelmetric.Int64Gauge {
   289  	if _, ok := setOfMetrics[metricName]; !ok {
   290  		return noop.Int64Gauge{}
   291  	}
   292  	ret, err := meter.Int64Gauge(string(metricName), options...)
   293  	if err != nil {
   294  		logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err)
   295  		return noop.Int64Gauge{}
   296  	}
   297  	return ret
   298  }
   299  
   300  func optionFromLabels(labelKeys []string, optionalLabelKeys []string, optionalLabels []string, labelVals ...string) otelmetric.MeasurementOption {
   301  	var attributes []otelattribute.KeyValue
   302  
   303  	// Once it hits here lower level has guaranteed length of labelVals matches
   304  	// labelKeys + optionalLabelKeys.
   305  	for i, label := range labelKeys {
   306  		attributes = append(attributes, otelattribute.String(label, labelVals[i]))
   307  	}
   308  
   309  	for i, label := range optionalLabelKeys {
   310  		for _, optLabel := range optionalLabels { // o(n) could build out a set but n is currently capped at < 5
   311  			if label == optLabel {
   312  				attributes = append(attributes, otelattribute.String(label, labelVals[i+len(labelKeys)]))
   313  			}
   314  		}
   315  	}
   316  	return otelmetric.WithAttributeSet(otelattribute.NewSet(attributes...))
   317  }
   318  
   319  // registryMetrics implements MetricsRecorder for the client and server stats
   320  // handlers.
   321  type registryMetrics struct {
   322  	intCounts   map[*estats.MetricDescriptor]otelmetric.Int64Counter
   323  	floatCounts map[*estats.MetricDescriptor]otelmetric.Float64Counter
   324  	intHistos   map[*estats.MetricDescriptor]otelmetric.Int64Histogram
   325  	floatHistos map[*estats.MetricDescriptor]otelmetric.Float64Histogram
   326  	intGauges   map[*estats.MetricDescriptor]otelmetric.Int64Gauge
   327  
   328  	optionalLabels []string
   329  }
   330  
   331  func (rm *registryMetrics) registerMetrics(metrics *stats.MetricSet, meter otelmetric.Meter) {
   332  	rm.intCounts = make(map[*estats.MetricDescriptor]otelmetric.Int64Counter)
   333  	rm.floatCounts = make(map[*estats.MetricDescriptor]otelmetric.Float64Counter)
   334  	rm.intHistos = make(map[*estats.MetricDescriptor]otelmetric.Int64Histogram)
   335  	rm.floatHistos = make(map[*estats.MetricDescriptor]otelmetric.Float64Histogram)
   336  	rm.intGauges = make(map[*estats.MetricDescriptor]otelmetric.Int64Gauge)
   337  
   338  	for metric := range metrics.Metrics() {
   339  		desc := estats.DescriptorForMetric(metric)
   340  		if desc == nil {
   341  			// Either the metric was per call or the metric is not registered.
   342  			// Thus, if this component ever receives the desc as a handle in
   343  			// record it will be a no-op.
   344  			continue
   345  		}
   346  		switch desc.Type {
   347  		case estats.MetricTypeIntCount:
   348  			rm.intCounts[desc] = createInt64Counter(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description))
   349  		case estats.MetricTypeFloatCount:
   350  			rm.floatCounts[desc] = createFloat64Counter(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description))
   351  		case estats.MetricTypeIntHisto:
   352  			rm.intHistos[desc] = createInt64Histogram(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description), otelmetric.WithExplicitBucketBoundaries(desc.Bounds...))
   353  		case estats.MetricTypeFloatHisto:
   354  			rm.floatHistos[desc] = createFloat64Histogram(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description), otelmetric.WithExplicitBucketBoundaries(desc.Bounds...))
   355  		case estats.MetricTypeIntGauge:
   356  			rm.intGauges[desc] = createInt64Gauge(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description))
   357  		}
   358  	}
   359  }
   360  
   361  func (rm *registryMetrics) RecordInt64Count(handle *estats.Int64CountHandle, incr int64, labels ...string) {
   362  	desc := handle.Descriptor()
   363  	if ic, ok := rm.intCounts[desc]; ok {
   364  		ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...)
   365  		ic.Add(context.TODO(), incr, ao)
   366  	}
   367  }
   368  
   369  func (rm *registryMetrics) RecordFloat64Count(handle *estats.Float64CountHandle, incr float64, labels ...string) {
   370  	desc := handle.Descriptor()
   371  	if fc, ok := rm.floatCounts[desc]; ok {
   372  		ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...)
   373  		fc.Add(context.TODO(), incr, ao)
   374  	}
   375  }
   376  
   377  func (rm *registryMetrics) RecordInt64Histo(handle *estats.Int64HistoHandle, incr int64, labels ...string) {
   378  	desc := handle.Descriptor()
   379  	if ih, ok := rm.intHistos[desc]; ok {
   380  		ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...)
   381  		ih.Record(context.TODO(), incr, ao)
   382  	}
   383  }
   384  
   385  func (rm *registryMetrics) RecordFloat64Histo(handle *estats.Float64HistoHandle, incr float64, labels ...string) {
   386  	desc := handle.Descriptor()
   387  	if fh, ok := rm.floatHistos[desc]; ok {
   388  		ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...)
   389  		fh.Record(context.TODO(), incr, ao)
   390  	}
   391  }
   392  
   393  func (rm *registryMetrics) RecordInt64Gauge(handle *estats.Int64GaugeHandle, incr int64, labels ...string) {
   394  	desc := handle.Descriptor()
   395  	if ig, ok := rm.intGauges[desc]; ok {
   396  		ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...)
   397  		ig.Record(context.TODO(), incr, ao)
   398  	}
   399  }
   400  
   401  // Users of this component should use these bucket boundaries as part of their
   402  // SDK MeterProvider passed in. This component sends this as "advice" to the
   403  // API, which works, however this stability is not guaranteed, so for safety the
   404  // SDK Meter Provider provided should set these bounds for corresponding
   405  // metrics.
   406  var (
   407  	// DefaultLatencyBounds are the default bounds for latency metrics.
   408  	DefaultLatencyBounds = []float64{0, 0.00001, 0.00005, 0.0001, 0.0003, 0.0006, 0.0008, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.008, 0.01, 0.013, 0.016, 0.02, 0.025, 0.03, 0.04, 0.05, 0.065, 0.08, 0.1, 0.13, 0.16, 0.2, 0.25, 0.3, 0.4, 0.5, 0.65, 0.8, 1, 2, 5, 10, 20, 50, 100} // provide "advice" through API, SDK should set this too
   409  	// DefaultSizeBounds are the default bounds for metrics which record size.
   410  	DefaultSizeBounds = []float64{0, 1024, 2048, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864, 268435456, 1073741824, 4294967296}
   411  	// defaultPerCallMetrics are the default metrics provided by this module.
   412  	defaultPerCallMetrics = stats.NewMetricSet(ClientAttemptStartedMetricName, ClientAttemptDurationMetricName, ClientAttemptSentCompressedTotalMessageSizeMetricName, ClientAttemptRcvdCompressedTotalMessageSizeMetricName, ClientCallDurationMetricName, ServerCallStartedMetricName, ServerCallSentCompressedTotalMessageSizeMetricName, ServerCallRcvdCompressedTotalMessageSizeMetricName, ServerCallDurationMetricName)
   413  )
   414  
   415  // DefaultMetrics returns a set of default OpenTelemetry metrics.
   416  //
   417  // This should only be invoked after init time.
   418  func DefaultMetrics() *stats.MetricSet {
   419  	return defaultPerCallMetrics.Join(estats.DefaultMetrics)
   420  }