google.golang.org/grpc@v1.74.2/stats/opentelemetry/opentelemetry.go (about) 1 /* 2 * Copyright 2024 gRPC authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Package opentelemetry implements opentelemetry instrumentation code for 18 // gRPC-Go clients and servers. 19 // 20 // For details on configuring opentelemetry and various instruments that this 21 // package creates, see 22 // [gRPC OpenTelemetry Metrics](https://grpc.io/docs/guides/opentelemetry-metrics/). 23 package opentelemetry 24 25 import ( 26 "context" 27 "strings" 28 "sync/atomic" 29 "time" 30 31 otelattribute "go.opentelemetry.io/otel/attribute" 32 otelmetric "go.opentelemetry.io/otel/metric" 33 "go.opentelemetry.io/otel/metric/noop" 34 "go.opentelemetry.io/otel/trace" 35 "google.golang.org/grpc" 36 "google.golang.org/grpc/codes" 37 experimental "google.golang.org/grpc/experimental/opentelemetry" 38 estats "google.golang.org/grpc/experimental/stats" 39 "google.golang.org/grpc/grpclog" 40 "google.golang.org/grpc/internal" 41 "google.golang.org/grpc/stats" 42 otelinternal "google.golang.org/grpc/stats/opentelemetry/internal" 43 ) 44 45 func init() { 46 otelinternal.SetPluginOption = func(o *Options, po otelinternal.PluginOption) { 47 o.MetricsOptions.pluginOption = po 48 // Log an error if one of the options is missing. 49 if (o.TraceOptions.TextMapPropagator == nil) != (o.TraceOptions.TracerProvider == nil) { 50 logger.Warning("Tracing will not be recorded because traceOptions are not set properly: one of TextMapPropagator or TracerProvider is missing") 51 } 52 } 53 } 54 55 var ( 56 logger = grpclog.Component("otel-plugin") 57 canonicalString = internal.CanonicalString.(func(codes.Code) string) 58 joinDialOptions = internal.JoinDialOptions.(func(...grpc.DialOption) grpc.DialOption) 59 ) 60 61 // Options are the options for OpenTelemetry instrumentation. 62 type Options struct { 63 // MetricsOptions are the metrics options for OpenTelemetry instrumentation. 64 MetricsOptions MetricsOptions 65 // TraceOptions are the tracing options for OpenTelemetry instrumentation. 66 TraceOptions experimental.TraceOptions 67 } 68 69 func (o *Options) isMetricsEnabled() bool { 70 return o.MetricsOptions.MeterProvider != nil 71 } 72 73 func (o *Options) isTracingEnabled() bool { 74 return o.TraceOptions.TracerProvider != nil 75 } 76 77 // MetricsOptions are the metrics options for OpenTelemetry instrumentation. 78 type MetricsOptions struct { 79 // MeterProvider is the MeterProvider instance that will be used to create 80 // instruments. To enable metrics collection, set a meter provider. If 81 // unset, no metrics will be recorded. 82 MeterProvider otelmetric.MeterProvider 83 84 // Metrics are the metrics to instrument. Will create instrument and record telemetry 85 // for corresponding metric supported by the client and server 86 // instrumentation components if applicable. If not set, the default metrics 87 // will be recorded. 88 Metrics *stats.MetricSet 89 90 // MethodAttributeFilter is a function that determines whether to record the 91 // method name of RPCs as an attribute, or to bucket into "other". Take care 92 // to limit the values allowed, as allowing too many will increase 93 // cardinality and could cause severe memory or performance problems. 94 // 95 // This only applies for server-side metrics. For clients, to record the 96 // method name in the attributes, pass grpc.StaticMethodCallOption to Invoke 97 // or NewStream. Note that when using protobuf generated clients, this 98 // CallOption is included automatically. 99 MethodAttributeFilter func(string) bool 100 101 // OptionalLabels specifies a list of optional labels to enable on any 102 // metrics that support them. 103 OptionalLabels []string 104 105 // pluginOption is used to get labels to attach to certain metrics, if set. 106 pluginOption otelinternal.PluginOption 107 } 108 109 // DialOption returns a dial option which enables OpenTelemetry instrumentation 110 // code for a grpc.ClientConn. 111 // 112 // Client applications interested in instrumenting their grpc.ClientConn should 113 // pass the dial option returned from this function as a dial option to 114 // grpc.NewClient(). 115 // 116 // For the metrics supported by this instrumentation code, specify the client 117 // metrics to record in metrics options. Also provide an implementation of a 118 // MeterProvider. If the passed in Meter Provider does not have the view 119 // configured for an individual metric turned on, the API call in this component 120 // will create a default view for that metric. 121 // 122 // For the traces supported by this instrumentation code, provide an 123 // implementation of a TextMapPropagator and OpenTelemetry TracerProvider. 124 func DialOption(o Options) grpc.DialOption { 125 var metricsOpts, tracingOpts []grpc.DialOption 126 127 if o.isMetricsEnabled() { 128 metricsHandler := &clientMetricsHandler{options: o} 129 metricsHandler.initializeMetrics() 130 metricsOpts = append(metricsOpts, grpc.WithChainUnaryInterceptor(metricsHandler.unaryInterceptor), grpc.WithChainStreamInterceptor(metricsHandler.streamInterceptor), grpc.WithStatsHandler(metricsHandler)) 131 } 132 if o.isTracingEnabled() { 133 tracingHandler := &clientTracingHandler{options: o} 134 tracingHandler.initializeTraces() 135 tracingOpts = append(tracingOpts, grpc.WithChainUnaryInterceptor(tracingHandler.unaryInterceptor), grpc.WithChainStreamInterceptor(tracingHandler.streamInterceptor), grpc.WithStatsHandler(tracingHandler)) 136 } 137 return joinDialOptions(append(metricsOpts, tracingOpts...)...) 138 } 139 140 var joinServerOptions = internal.JoinServerOptions.(func(...grpc.ServerOption) grpc.ServerOption) 141 142 // ServerOption returns a server option which enables OpenTelemetry 143 // instrumentation code for a grpc.Server. 144 // 145 // Server applications interested in instrumenting their grpc.Server should pass 146 // the server option returned from this function as an argument to 147 // grpc.NewServer(). 148 // 149 // For the metrics supported by this instrumentation code, specify the server 150 // metrics to record in metrics options. Also provide an implementation of a 151 // MeterProvider. If the passed in Meter Provider does not have the view 152 // configured for an individual metric turned on, the API call in this component 153 // will create a default view for that metric. 154 // 155 // For the traces supported by this instrumentation code, provide an 156 // implementation of a TextMapPropagator and OpenTelemetry TracerProvider. 157 func ServerOption(o Options) grpc.ServerOption { 158 var metricsOpts, tracingOpts []grpc.ServerOption 159 160 if o.isMetricsEnabled() { 161 metricsHandler := &serverMetricsHandler{options: o} 162 metricsHandler.initializeMetrics() 163 metricsOpts = append(metricsOpts, grpc.ChainUnaryInterceptor(metricsHandler.unaryInterceptor), grpc.ChainStreamInterceptor(metricsHandler.streamInterceptor), grpc.StatsHandler(metricsHandler)) 164 } 165 if o.isTracingEnabled() { 166 tracingHandler := &serverTracingHandler{options: o} 167 tracingHandler.initializeTraces() 168 tracingOpts = append(tracingOpts, grpc.StatsHandler(tracingHandler)) 169 } 170 return joinServerOptions(append(metricsOpts, tracingOpts...)...) 171 } 172 173 // callInfo is information pertaining to the lifespan of the RPC client side. 174 type callInfo struct { 175 target string 176 177 method string 178 179 // nameResolutionEventAdded is set when the resolver delay trace event 180 // is added. Prevents duplicate events, since it is reported per-attempt. 181 nameResolutionEventAdded atomic.Bool 182 } 183 184 type callInfoKey struct{} 185 186 func setCallInfo(ctx context.Context, ci *callInfo) context.Context { 187 return context.WithValue(ctx, callInfoKey{}, ci) 188 } 189 190 // getCallInfo returns the callInfo stored in the context, or nil 191 // if there isn't one. 192 func getCallInfo(ctx context.Context) *callInfo { 193 ci, _ := ctx.Value(callInfoKey{}).(*callInfo) 194 return ci 195 } 196 197 // rpcInfo is RPC information scoped to the RPC attempt life span client side, 198 // and the RPC life span server side. 199 type rpcInfo struct { 200 ai *attemptInfo 201 } 202 203 type rpcInfoKey struct{} 204 205 func setRPCInfo(ctx context.Context, ri *rpcInfo) context.Context { 206 return context.WithValue(ctx, rpcInfoKey{}, ri) 207 } 208 209 // getRPCInfo returns the rpcInfo stored in the context, or nil 210 // if there isn't one. 211 func getRPCInfo(ctx context.Context) *rpcInfo { 212 ri, _ := ctx.Value(rpcInfoKey{}).(*rpcInfo) 213 return ri 214 } 215 216 func removeLeadingSlash(mn string) string { 217 return strings.TrimLeft(mn, "/") 218 } 219 220 // attemptInfo is RPC information scoped to the RPC attempt life span client 221 // side, and the RPC life span server side. 222 type attemptInfo struct { 223 // access these counts atomically for hedging in the future: 224 // number of bytes after compression (within each message) from side (client 225 // || server). 226 sentCompressedBytes int64 227 // number of compressed bytes received (within each message) received on 228 // side (client || server). 229 recvCompressedBytes int64 230 231 startTime time.Time 232 method string 233 234 pluginOptionLabels map[string]string // pluginOptionLabels to attach to metrics emitted 235 xdsLabels map[string]string 236 237 // traceSpan is data used for recording traces. 238 traceSpan trace.Span 239 // message counters for sent and received messages (used for 240 // generating message IDs), and the number of previous RPC attempts for the 241 // associated call. 242 countSentMsg uint32 243 countRecvMsg uint32 244 previousRPCAttempts uint32 245 } 246 247 type clientMetrics struct { 248 // "grpc.client.attempt.started" 249 attemptStarted otelmetric.Int64Counter 250 // "grpc.client.attempt.duration" 251 attemptDuration otelmetric.Float64Histogram 252 // "grpc.client.attempt.sent_total_compressed_message_size" 253 attemptSentTotalCompressedMessageSize otelmetric.Int64Histogram 254 // "grpc.client.attempt.rcvd_total_compressed_message_size" 255 attemptRcvdTotalCompressedMessageSize otelmetric.Int64Histogram 256 // "grpc.client.call.duration" 257 callDuration otelmetric.Float64Histogram 258 } 259 260 type serverMetrics struct { 261 // "grpc.server.call.started" 262 callStarted otelmetric.Int64Counter 263 // "grpc.server.call.sent_total_compressed_message_size" 264 callSentTotalCompressedMessageSize otelmetric.Int64Histogram 265 // "grpc.server.call.rcvd_total_compressed_message_size" 266 callRcvdTotalCompressedMessageSize otelmetric.Int64Histogram 267 // "grpc.server.call.duration" 268 callDuration otelmetric.Float64Histogram 269 } 270 271 func createInt64Counter(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Int64CounterOption) otelmetric.Int64Counter { 272 if _, ok := setOfMetrics[metricName]; !ok { 273 return noop.Int64Counter{} 274 } 275 ret, err := meter.Int64Counter(string(metricName), options...) 276 if err != nil { 277 logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err) 278 return noop.Int64Counter{} 279 } 280 return ret 281 } 282 283 func createFloat64Counter(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Float64CounterOption) otelmetric.Float64Counter { 284 if _, ok := setOfMetrics[metricName]; !ok { 285 return noop.Float64Counter{} 286 } 287 ret, err := meter.Float64Counter(string(metricName), options...) 288 if err != nil { 289 logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err) 290 return noop.Float64Counter{} 291 } 292 return ret 293 } 294 295 func createInt64Histogram(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Int64HistogramOption) otelmetric.Int64Histogram { 296 if _, ok := setOfMetrics[metricName]; !ok { 297 return noop.Int64Histogram{} 298 } 299 ret, err := meter.Int64Histogram(string(metricName), options...) 300 if err != nil { 301 logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err) 302 return noop.Int64Histogram{} 303 } 304 return ret 305 } 306 307 func createFloat64Histogram(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Float64HistogramOption) otelmetric.Float64Histogram { 308 if _, ok := setOfMetrics[metricName]; !ok { 309 return noop.Float64Histogram{} 310 } 311 ret, err := meter.Float64Histogram(string(metricName), options...) 312 if err != nil { 313 logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err) 314 return noop.Float64Histogram{} 315 } 316 return ret 317 } 318 319 func createInt64Gauge(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Int64GaugeOption) otelmetric.Int64Gauge { 320 if _, ok := setOfMetrics[metricName]; !ok { 321 return noop.Int64Gauge{} 322 } 323 ret, err := meter.Int64Gauge(string(metricName), options...) 324 if err != nil { 325 logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err) 326 return noop.Int64Gauge{} 327 } 328 return ret 329 } 330 331 func optionFromLabels(labelKeys []string, optionalLabelKeys []string, optionalLabels []string, labelVals ...string) otelmetric.MeasurementOption { 332 var attributes []otelattribute.KeyValue 333 334 // Once it hits here lower level has guaranteed length of labelVals matches 335 // labelKeys + optionalLabelKeys. 336 for i, label := range labelKeys { 337 attributes = append(attributes, otelattribute.String(label, labelVals[i])) 338 } 339 340 for i, label := range optionalLabelKeys { 341 for _, optLabel := range optionalLabels { // o(n) could build out a set but n is currently capped at < 5 342 if label == optLabel { 343 attributes = append(attributes, otelattribute.String(label, labelVals[i+len(labelKeys)])) 344 } 345 } 346 } 347 return otelmetric.WithAttributeSet(otelattribute.NewSet(attributes...)) 348 } 349 350 // registryMetrics implements MetricsRecorder for the client and server stats 351 // handlers. 352 type registryMetrics struct { 353 intCounts map[*estats.MetricDescriptor]otelmetric.Int64Counter 354 floatCounts map[*estats.MetricDescriptor]otelmetric.Float64Counter 355 intHistos map[*estats.MetricDescriptor]otelmetric.Int64Histogram 356 floatHistos map[*estats.MetricDescriptor]otelmetric.Float64Histogram 357 intGauges map[*estats.MetricDescriptor]otelmetric.Int64Gauge 358 359 optionalLabels []string 360 } 361 362 func (rm *registryMetrics) registerMetrics(metrics *stats.MetricSet, meter otelmetric.Meter) { 363 rm.intCounts = make(map[*estats.MetricDescriptor]otelmetric.Int64Counter) 364 rm.floatCounts = make(map[*estats.MetricDescriptor]otelmetric.Float64Counter) 365 rm.intHistos = make(map[*estats.MetricDescriptor]otelmetric.Int64Histogram) 366 rm.floatHistos = make(map[*estats.MetricDescriptor]otelmetric.Float64Histogram) 367 rm.intGauges = make(map[*estats.MetricDescriptor]otelmetric.Int64Gauge) 368 369 for metric := range metrics.Metrics() { 370 desc := estats.DescriptorForMetric(metric) 371 if desc == nil { 372 // Either the metric was per call or the metric is not registered. 373 // Thus, if this component ever receives the desc as a handle in 374 // record it will be a no-op. 375 continue 376 } 377 switch desc.Type { 378 case estats.MetricTypeIntCount: 379 rm.intCounts[desc] = createInt64Counter(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description)) 380 case estats.MetricTypeFloatCount: 381 rm.floatCounts[desc] = createFloat64Counter(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description)) 382 case estats.MetricTypeIntHisto: 383 rm.intHistos[desc] = createInt64Histogram(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description), otelmetric.WithExplicitBucketBoundaries(desc.Bounds...)) 384 case estats.MetricTypeFloatHisto: 385 rm.floatHistos[desc] = createFloat64Histogram(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description), otelmetric.WithExplicitBucketBoundaries(desc.Bounds...)) 386 case estats.MetricTypeIntGauge: 387 rm.intGauges[desc] = createInt64Gauge(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description)) 388 } 389 } 390 } 391 392 func (rm *registryMetrics) RecordInt64Count(handle *estats.Int64CountHandle, incr int64, labels ...string) { 393 desc := handle.Descriptor() 394 if ic, ok := rm.intCounts[desc]; ok { 395 ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...) 396 ic.Add(context.TODO(), incr, ao) 397 } 398 } 399 400 func (rm *registryMetrics) RecordFloat64Count(handle *estats.Float64CountHandle, incr float64, labels ...string) { 401 desc := handle.Descriptor() 402 if fc, ok := rm.floatCounts[desc]; ok { 403 ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...) 404 fc.Add(context.TODO(), incr, ao) 405 } 406 } 407 408 func (rm *registryMetrics) RecordInt64Histo(handle *estats.Int64HistoHandle, incr int64, labels ...string) { 409 desc := handle.Descriptor() 410 if ih, ok := rm.intHistos[desc]; ok { 411 ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...) 412 ih.Record(context.TODO(), incr, ao) 413 } 414 } 415 416 func (rm *registryMetrics) RecordFloat64Histo(handle *estats.Float64HistoHandle, incr float64, labels ...string) { 417 desc := handle.Descriptor() 418 if fh, ok := rm.floatHistos[desc]; ok { 419 ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...) 420 fh.Record(context.TODO(), incr, ao) 421 } 422 } 423 424 func (rm *registryMetrics) RecordInt64Gauge(handle *estats.Int64GaugeHandle, incr int64, labels ...string) { 425 desc := handle.Descriptor() 426 if ig, ok := rm.intGauges[desc]; ok { 427 ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...) 428 ig.Record(context.TODO(), incr, ao) 429 } 430 } 431 432 // Users of this component should use these bucket boundaries as part of their 433 // SDK MeterProvider passed in. This component sends this as "advice" to the 434 // API, which works, however this stability is not guaranteed, so for safety the 435 // SDK Meter Provider provided should set these bounds for corresponding 436 // metrics. 437 var ( 438 // DefaultLatencyBounds are the default bounds for latency metrics. 439 DefaultLatencyBounds = []float64{0, 0.00001, 0.00005, 0.0001, 0.0003, 0.0006, 0.0008, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.008, 0.01, 0.013, 0.016, 0.02, 0.025, 0.03, 0.04, 0.05, 0.065, 0.08, 0.1, 0.13, 0.16, 0.2, 0.25, 0.3, 0.4, 0.5, 0.65, 0.8, 1, 2, 5, 10, 20, 50, 100} // provide "advice" through API, SDK should set this too 440 // DefaultSizeBounds are the default bounds for metrics which record size. 441 DefaultSizeBounds = []float64{0, 1024, 2048, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864, 268435456, 1073741824, 4294967296} 442 // defaultPerCallMetrics are the default metrics provided by this module. 443 defaultPerCallMetrics = stats.NewMetricSet(ClientAttemptStartedMetricName, ClientAttemptDurationMetricName, ClientAttemptSentCompressedTotalMessageSizeMetricName, ClientAttemptRcvdCompressedTotalMessageSizeMetricName, ClientCallDurationMetricName, ServerCallStartedMetricName, ServerCallSentCompressedTotalMessageSizeMetricName, ServerCallRcvdCompressedTotalMessageSizeMetricName, ServerCallDurationMetricName) 444 ) 445 446 // DefaultMetrics returns a set of default OpenTelemetry metrics. 447 // 448 // This should only be invoked after init time. 449 func DefaultMetrics() *stats.MetricSet { 450 return defaultPerCallMetrics.Join(estats.DefaultMetrics) 451 }