google.golang.org/grpc@v1.74.2/stats/opentelemetry/opentelemetry.go (about)

     1  /*
     2   * Copyright 2024 gRPC authors.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package opentelemetry implements opentelemetry instrumentation code for
    18  // gRPC-Go clients and servers.
    19  //
    20  // For details on configuring opentelemetry and various instruments that this
    21  // package creates, see
    22  // [gRPC OpenTelemetry Metrics](https://grpc.io/docs/guides/opentelemetry-metrics/).
    23  package opentelemetry
    24  
    25  import (
    26  	"context"
    27  	"strings"
    28  	"sync/atomic"
    29  	"time"
    30  
    31  	otelattribute "go.opentelemetry.io/otel/attribute"
    32  	otelmetric "go.opentelemetry.io/otel/metric"
    33  	"go.opentelemetry.io/otel/metric/noop"
    34  	"go.opentelemetry.io/otel/trace"
    35  	"google.golang.org/grpc"
    36  	"google.golang.org/grpc/codes"
    37  	experimental "google.golang.org/grpc/experimental/opentelemetry"
    38  	estats "google.golang.org/grpc/experimental/stats"
    39  	"google.golang.org/grpc/grpclog"
    40  	"google.golang.org/grpc/internal"
    41  	"google.golang.org/grpc/stats"
    42  	otelinternal "google.golang.org/grpc/stats/opentelemetry/internal"
    43  )
    44  
    45  func init() {
    46  	otelinternal.SetPluginOption = func(o *Options, po otelinternal.PluginOption) {
    47  		o.MetricsOptions.pluginOption = po
    48  		// Log an error if one of the options is missing.
    49  		if (o.TraceOptions.TextMapPropagator == nil) != (o.TraceOptions.TracerProvider == nil) {
    50  			logger.Warning("Tracing will not be recorded because traceOptions are not set properly: one of TextMapPropagator or TracerProvider is missing")
    51  		}
    52  	}
    53  }
    54  
    55  var (
    56  	logger          = grpclog.Component("otel-plugin")
    57  	canonicalString = internal.CanonicalString.(func(codes.Code) string)
    58  	joinDialOptions = internal.JoinDialOptions.(func(...grpc.DialOption) grpc.DialOption)
    59  )
    60  
    61  // Options are the options for OpenTelemetry instrumentation.
    62  type Options struct {
    63  	// MetricsOptions are the metrics options for OpenTelemetry instrumentation.
    64  	MetricsOptions MetricsOptions
    65  	// TraceOptions are the tracing options for OpenTelemetry instrumentation.
    66  	TraceOptions experimental.TraceOptions
    67  }
    68  
    69  func (o *Options) isMetricsEnabled() bool {
    70  	return o.MetricsOptions.MeterProvider != nil
    71  }
    72  
    73  func (o *Options) isTracingEnabled() bool {
    74  	return o.TraceOptions.TracerProvider != nil
    75  }
    76  
    77  // MetricsOptions are the metrics options for OpenTelemetry instrumentation.
    78  type MetricsOptions struct {
    79  	// MeterProvider is the MeterProvider instance that will be used to create
    80  	// instruments. To enable metrics collection, set a meter provider. If
    81  	// unset, no metrics will be recorded.
    82  	MeterProvider otelmetric.MeterProvider
    83  
    84  	// Metrics are the metrics to instrument. Will create instrument and record telemetry
    85  	// for corresponding metric supported by the client and server
    86  	// instrumentation components if applicable. If not set, the default metrics
    87  	// will be recorded.
    88  	Metrics *stats.MetricSet
    89  
    90  	// MethodAttributeFilter is a function that determines whether to record the
    91  	// method name of RPCs as an attribute, or to bucket into "other". Take care
    92  	// to limit the values allowed, as allowing too many will increase
    93  	// cardinality and could cause severe memory or performance problems.
    94  	//
    95  	// This only applies for server-side metrics.  For clients, to record the
    96  	// method name in the attributes, pass grpc.StaticMethodCallOption to Invoke
    97  	// or NewStream. Note that when using protobuf generated clients, this
    98  	// CallOption is included automatically.
    99  	MethodAttributeFilter func(string) bool
   100  
   101  	// OptionalLabels specifies a list of optional labels to enable on any
   102  	// metrics that support them.
   103  	OptionalLabels []string
   104  
   105  	// pluginOption is used to get labels to attach to certain metrics, if set.
   106  	pluginOption otelinternal.PluginOption
   107  }
   108  
   109  // DialOption returns a dial option which enables OpenTelemetry instrumentation
   110  // code for a grpc.ClientConn.
   111  //
   112  // Client applications interested in instrumenting their grpc.ClientConn should
   113  // pass the dial option returned from this function as a dial option to
   114  // grpc.NewClient().
   115  //
   116  // For the metrics supported by this instrumentation code, specify the client
   117  // metrics to record in metrics options. Also provide an implementation of a
   118  // MeterProvider. If the passed in Meter Provider does not have the view
   119  // configured for an individual metric turned on, the API call in this component
   120  // will create a default view for that metric.
   121  //
   122  // For the traces supported by this instrumentation code, provide an
   123  // implementation of a TextMapPropagator and OpenTelemetry TracerProvider.
   124  func DialOption(o Options) grpc.DialOption {
   125  	var metricsOpts, tracingOpts []grpc.DialOption
   126  
   127  	if o.isMetricsEnabled() {
   128  		metricsHandler := &clientMetricsHandler{options: o}
   129  		metricsHandler.initializeMetrics()
   130  		metricsOpts = append(metricsOpts, grpc.WithChainUnaryInterceptor(metricsHandler.unaryInterceptor), grpc.WithChainStreamInterceptor(metricsHandler.streamInterceptor), grpc.WithStatsHandler(metricsHandler))
   131  	}
   132  	if o.isTracingEnabled() {
   133  		tracingHandler := &clientTracingHandler{options: o}
   134  		tracingHandler.initializeTraces()
   135  		tracingOpts = append(tracingOpts, grpc.WithChainUnaryInterceptor(tracingHandler.unaryInterceptor), grpc.WithChainStreamInterceptor(tracingHandler.streamInterceptor), grpc.WithStatsHandler(tracingHandler))
   136  	}
   137  	return joinDialOptions(append(metricsOpts, tracingOpts...)...)
   138  }
   139  
   140  var joinServerOptions = internal.JoinServerOptions.(func(...grpc.ServerOption) grpc.ServerOption)
   141  
   142  // ServerOption returns a server option which enables OpenTelemetry
   143  // instrumentation code for a grpc.Server.
   144  //
   145  // Server applications interested in instrumenting their grpc.Server should pass
   146  // the server option returned from this function as an argument to
   147  // grpc.NewServer().
   148  //
   149  // For the metrics supported by this instrumentation code, specify the server
   150  // metrics to record in metrics options. Also provide an implementation of a
   151  // MeterProvider. If the passed in Meter Provider does not have the view
   152  // configured for an individual metric turned on, the API call in this component
   153  // will create a default view for that metric.
   154  //
   155  // For the traces supported by this instrumentation code, provide an
   156  // implementation of a TextMapPropagator and OpenTelemetry TracerProvider.
   157  func ServerOption(o Options) grpc.ServerOption {
   158  	var metricsOpts, tracingOpts []grpc.ServerOption
   159  
   160  	if o.isMetricsEnabled() {
   161  		metricsHandler := &serverMetricsHandler{options: o}
   162  		metricsHandler.initializeMetrics()
   163  		metricsOpts = append(metricsOpts, grpc.ChainUnaryInterceptor(metricsHandler.unaryInterceptor), grpc.ChainStreamInterceptor(metricsHandler.streamInterceptor), grpc.StatsHandler(metricsHandler))
   164  	}
   165  	if o.isTracingEnabled() {
   166  		tracingHandler := &serverTracingHandler{options: o}
   167  		tracingHandler.initializeTraces()
   168  		tracingOpts = append(tracingOpts, grpc.StatsHandler(tracingHandler))
   169  	}
   170  	return joinServerOptions(append(metricsOpts, tracingOpts...)...)
   171  }
   172  
   173  // callInfo is information pertaining to the lifespan of the RPC client side.
   174  type callInfo struct {
   175  	target string
   176  
   177  	method string
   178  
   179  	// nameResolutionEventAdded is set when the resolver delay trace event
   180  	// is added. Prevents duplicate events, since it is reported per-attempt.
   181  	nameResolutionEventAdded atomic.Bool
   182  }
   183  
   184  type callInfoKey struct{}
   185  
   186  func setCallInfo(ctx context.Context, ci *callInfo) context.Context {
   187  	return context.WithValue(ctx, callInfoKey{}, ci)
   188  }
   189  
   190  // getCallInfo returns the callInfo stored in the context, or nil
   191  // if there isn't one.
   192  func getCallInfo(ctx context.Context) *callInfo {
   193  	ci, _ := ctx.Value(callInfoKey{}).(*callInfo)
   194  	return ci
   195  }
   196  
   197  // rpcInfo is RPC information scoped to the RPC attempt life span client side,
   198  // and the RPC life span server side.
   199  type rpcInfo struct {
   200  	ai *attemptInfo
   201  }
   202  
   203  type rpcInfoKey struct{}
   204  
   205  func setRPCInfo(ctx context.Context, ri *rpcInfo) context.Context {
   206  	return context.WithValue(ctx, rpcInfoKey{}, ri)
   207  }
   208  
   209  // getRPCInfo returns the rpcInfo stored in the context, or nil
   210  // if there isn't one.
   211  func getRPCInfo(ctx context.Context) *rpcInfo {
   212  	ri, _ := ctx.Value(rpcInfoKey{}).(*rpcInfo)
   213  	return ri
   214  }
   215  
   216  func removeLeadingSlash(mn string) string {
   217  	return strings.TrimLeft(mn, "/")
   218  }
   219  
   220  // attemptInfo is RPC information scoped to the RPC attempt life span client
   221  // side, and the RPC life span server side.
   222  type attemptInfo struct {
   223  	// access these counts atomically for hedging in the future:
   224  	// number of bytes after compression (within each message) from side (client
   225  	// || server).
   226  	sentCompressedBytes int64
   227  	// number of compressed bytes received (within each message) received on
   228  	// side (client || server).
   229  	recvCompressedBytes int64
   230  
   231  	startTime time.Time
   232  	method    string
   233  
   234  	pluginOptionLabels map[string]string // pluginOptionLabels to attach to metrics emitted
   235  	xdsLabels          map[string]string
   236  
   237  	// traceSpan is data used for recording traces.
   238  	traceSpan trace.Span
   239  	// message counters for sent and received messages (used for
   240  	// generating message IDs), and the number of previous RPC attempts for the
   241  	// associated call.
   242  	countSentMsg        uint32
   243  	countRecvMsg        uint32
   244  	previousRPCAttempts uint32
   245  }
   246  
   247  type clientMetrics struct {
   248  	// "grpc.client.attempt.started"
   249  	attemptStarted otelmetric.Int64Counter
   250  	// "grpc.client.attempt.duration"
   251  	attemptDuration otelmetric.Float64Histogram
   252  	// "grpc.client.attempt.sent_total_compressed_message_size"
   253  	attemptSentTotalCompressedMessageSize otelmetric.Int64Histogram
   254  	// "grpc.client.attempt.rcvd_total_compressed_message_size"
   255  	attemptRcvdTotalCompressedMessageSize otelmetric.Int64Histogram
   256  	// "grpc.client.call.duration"
   257  	callDuration otelmetric.Float64Histogram
   258  }
   259  
   260  type serverMetrics struct {
   261  	// "grpc.server.call.started"
   262  	callStarted otelmetric.Int64Counter
   263  	// "grpc.server.call.sent_total_compressed_message_size"
   264  	callSentTotalCompressedMessageSize otelmetric.Int64Histogram
   265  	// "grpc.server.call.rcvd_total_compressed_message_size"
   266  	callRcvdTotalCompressedMessageSize otelmetric.Int64Histogram
   267  	// "grpc.server.call.duration"
   268  	callDuration otelmetric.Float64Histogram
   269  }
   270  
   271  func createInt64Counter(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Int64CounterOption) otelmetric.Int64Counter {
   272  	if _, ok := setOfMetrics[metricName]; !ok {
   273  		return noop.Int64Counter{}
   274  	}
   275  	ret, err := meter.Int64Counter(string(metricName), options...)
   276  	if err != nil {
   277  		logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err)
   278  		return noop.Int64Counter{}
   279  	}
   280  	return ret
   281  }
   282  
   283  func createFloat64Counter(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Float64CounterOption) otelmetric.Float64Counter {
   284  	if _, ok := setOfMetrics[metricName]; !ok {
   285  		return noop.Float64Counter{}
   286  	}
   287  	ret, err := meter.Float64Counter(string(metricName), options...)
   288  	if err != nil {
   289  		logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err)
   290  		return noop.Float64Counter{}
   291  	}
   292  	return ret
   293  }
   294  
   295  func createInt64Histogram(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Int64HistogramOption) otelmetric.Int64Histogram {
   296  	if _, ok := setOfMetrics[metricName]; !ok {
   297  		return noop.Int64Histogram{}
   298  	}
   299  	ret, err := meter.Int64Histogram(string(metricName), options...)
   300  	if err != nil {
   301  		logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err)
   302  		return noop.Int64Histogram{}
   303  	}
   304  	return ret
   305  }
   306  
   307  func createFloat64Histogram(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Float64HistogramOption) otelmetric.Float64Histogram {
   308  	if _, ok := setOfMetrics[metricName]; !ok {
   309  		return noop.Float64Histogram{}
   310  	}
   311  	ret, err := meter.Float64Histogram(string(metricName), options...)
   312  	if err != nil {
   313  		logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err)
   314  		return noop.Float64Histogram{}
   315  	}
   316  	return ret
   317  }
   318  
   319  func createInt64Gauge(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Int64GaugeOption) otelmetric.Int64Gauge {
   320  	if _, ok := setOfMetrics[metricName]; !ok {
   321  		return noop.Int64Gauge{}
   322  	}
   323  	ret, err := meter.Int64Gauge(string(metricName), options...)
   324  	if err != nil {
   325  		logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err)
   326  		return noop.Int64Gauge{}
   327  	}
   328  	return ret
   329  }
   330  
   331  func optionFromLabels(labelKeys []string, optionalLabelKeys []string, optionalLabels []string, labelVals ...string) otelmetric.MeasurementOption {
   332  	var attributes []otelattribute.KeyValue
   333  
   334  	// Once it hits here lower level has guaranteed length of labelVals matches
   335  	// labelKeys + optionalLabelKeys.
   336  	for i, label := range labelKeys {
   337  		attributes = append(attributes, otelattribute.String(label, labelVals[i]))
   338  	}
   339  
   340  	for i, label := range optionalLabelKeys {
   341  		for _, optLabel := range optionalLabels { // o(n) could build out a set but n is currently capped at < 5
   342  			if label == optLabel {
   343  				attributes = append(attributes, otelattribute.String(label, labelVals[i+len(labelKeys)]))
   344  			}
   345  		}
   346  	}
   347  	return otelmetric.WithAttributeSet(otelattribute.NewSet(attributes...))
   348  }
   349  
   350  // registryMetrics implements MetricsRecorder for the client and server stats
   351  // handlers.
   352  type registryMetrics struct {
   353  	intCounts   map[*estats.MetricDescriptor]otelmetric.Int64Counter
   354  	floatCounts map[*estats.MetricDescriptor]otelmetric.Float64Counter
   355  	intHistos   map[*estats.MetricDescriptor]otelmetric.Int64Histogram
   356  	floatHistos map[*estats.MetricDescriptor]otelmetric.Float64Histogram
   357  	intGauges   map[*estats.MetricDescriptor]otelmetric.Int64Gauge
   358  
   359  	optionalLabels []string
   360  }
   361  
   362  func (rm *registryMetrics) registerMetrics(metrics *stats.MetricSet, meter otelmetric.Meter) {
   363  	rm.intCounts = make(map[*estats.MetricDescriptor]otelmetric.Int64Counter)
   364  	rm.floatCounts = make(map[*estats.MetricDescriptor]otelmetric.Float64Counter)
   365  	rm.intHistos = make(map[*estats.MetricDescriptor]otelmetric.Int64Histogram)
   366  	rm.floatHistos = make(map[*estats.MetricDescriptor]otelmetric.Float64Histogram)
   367  	rm.intGauges = make(map[*estats.MetricDescriptor]otelmetric.Int64Gauge)
   368  
   369  	for metric := range metrics.Metrics() {
   370  		desc := estats.DescriptorForMetric(metric)
   371  		if desc == nil {
   372  			// Either the metric was per call or the metric is not registered.
   373  			// Thus, if this component ever receives the desc as a handle in
   374  			// record it will be a no-op.
   375  			continue
   376  		}
   377  		switch desc.Type {
   378  		case estats.MetricTypeIntCount:
   379  			rm.intCounts[desc] = createInt64Counter(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description))
   380  		case estats.MetricTypeFloatCount:
   381  			rm.floatCounts[desc] = createFloat64Counter(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description))
   382  		case estats.MetricTypeIntHisto:
   383  			rm.intHistos[desc] = createInt64Histogram(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description), otelmetric.WithExplicitBucketBoundaries(desc.Bounds...))
   384  		case estats.MetricTypeFloatHisto:
   385  			rm.floatHistos[desc] = createFloat64Histogram(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description), otelmetric.WithExplicitBucketBoundaries(desc.Bounds...))
   386  		case estats.MetricTypeIntGauge:
   387  			rm.intGauges[desc] = createInt64Gauge(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description))
   388  		}
   389  	}
   390  }
   391  
   392  func (rm *registryMetrics) RecordInt64Count(handle *estats.Int64CountHandle, incr int64, labels ...string) {
   393  	desc := handle.Descriptor()
   394  	if ic, ok := rm.intCounts[desc]; ok {
   395  		ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...)
   396  		ic.Add(context.TODO(), incr, ao)
   397  	}
   398  }
   399  
   400  func (rm *registryMetrics) RecordFloat64Count(handle *estats.Float64CountHandle, incr float64, labels ...string) {
   401  	desc := handle.Descriptor()
   402  	if fc, ok := rm.floatCounts[desc]; ok {
   403  		ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...)
   404  		fc.Add(context.TODO(), incr, ao)
   405  	}
   406  }
   407  
   408  func (rm *registryMetrics) RecordInt64Histo(handle *estats.Int64HistoHandle, incr int64, labels ...string) {
   409  	desc := handle.Descriptor()
   410  	if ih, ok := rm.intHistos[desc]; ok {
   411  		ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...)
   412  		ih.Record(context.TODO(), incr, ao)
   413  	}
   414  }
   415  
   416  func (rm *registryMetrics) RecordFloat64Histo(handle *estats.Float64HistoHandle, incr float64, labels ...string) {
   417  	desc := handle.Descriptor()
   418  	if fh, ok := rm.floatHistos[desc]; ok {
   419  		ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...)
   420  		fh.Record(context.TODO(), incr, ao)
   421  	}
   422  }
   423  
   424  func (rm *registryMetrics) RecordInt64Gauge(handle *estats.Int64GaugeHandle, incr int64, labels ...string) {
   425  	desc := handle.Descriptor()
   426  	if ig, ok := rm.intGauges[desc]; ok {
   427  		ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...)
   428  		ig.Record(context.TODO(), incr, ao)
   429  	}
   430  }
   431  
   432  // Users of this component should use these bucket boundaries as part of their
   433  // SDK MeterProvider passed in. This component sends this as "advice" to the
   434  // API, which works, however this stability is not guaranteed, so for safety the
   435  // SDK Meter Provider provided should set these bounds for corresponding
   436  // metrics.
   437  var (
   438  	// DefaultLatencyBounds are the default bounds for latency metrics.
   439  	DefaultLatencyBounds = []float64{0, 0.00001, 0.00005, 0.0001, 0.0003, 0.0006, 0.0008, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.008, 0.01, 0.013, 0.016, 0.02, 0.025, 0.03, 0.04, 0.05, 0.065, 0.08, 0.1, 0.13, 0.16, 0.2, 0.25, 0.3, 0.4, 0.5, 0.65, 0.8, 1, 2, 5, 10, 20, 50, 100} // provide "advice" through API, SDK should set this too
   440  	// DefaultSizeBounds are the default bounds for metrics which record size.
   441  	DefaultSizeBounds = []float64{0, 1024, 2048, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864, 268435456, 1073741824, 4294967296}
   442  	// defaultPerCallMetrics are the default metrics provided by this module.
   443  	defaultPerCallMetrics = stats.NewMetricSet(ClientAttemptStartedMetricName, ClientAttemptDurationMetricName, ClientAttemptSentCompressedTotalMessageSizeMetricName, ClientAttemptRcvdCompressedTotalMessageSizeMetricName, ClientCallDurationMetricName, ServerCallStartedMetricName, ServerCallSentCompressedTotalMessageSizeMetricName, ServerCallRcvdCompressedTotalMessageSizeMetricName, ServerCallDurationMetricName)
   444  )
   445  
   446  // DefaultMetrics returns a set of default OpenTelemetry metrics.
   447  //
   448  // This should only be invoked after init time.
   449  func DefaultMetrics() *stats.MetricSet {
   450  	return defaultPerCallMetrics.Join(estats.DefaultMetrics)
   451  }