google.golang.org/grpc@v1.72.2/stats/opentelemetry/opentelemetry.go (about) 1 /* 2 * Copyright 2024 gRPC authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Package opentelemetry implements opentelemetry instrumentation code for 18 // gRPC-Go clients and servers. 19 // 20 // For details on configuring opentelemetry and various instruments that this 21 // package creates, see 22 // [gRPC OpenTelemetry Metrics](https://grpc.io/docs/guides/opentelemetry-metrics/). 23 package opentelemetry 24 25 import ( 26 "context" 27 "strings" 28 "time" 29 30 otelattribute "go.opentelemetry.io/otel/attribute" 31 otelmetric "go.opentelemetry.io/otel/metric" 32 "go.opentelemetry.io/otel/metric/noop" 33 "go.opentelemetry.io/otel/trace" 34 "google.golang.org/grpc" 35 "google.golang.org/grpc/codes" 36 experimental "google.golang.org/grpc/experimental/opentelemetry" 37 estats "google.golang.org/grpc/experimental/stats" 38 "google.golang.org/grpc/grpclog" 39 "google.golang.org/grpc/internal" 40 "google.golang.org/grpc/stats" 41 otelinternal "google.golang.org/grpc/stats/opentelemetry/internal" 42 ) 43 44 func init() { 45 otelinternal.SetPluginOption = func(o *Options, po otelinternal.PluginOption) { 46 o.MetricsOptions.pluginOption = po 47 // Log an error if one of the options is missing. 48 if (o.TraceOptions.TextMapPropagator == nil) != (o.TraceOptions.TracerProvider == nil) { 49 logger.Warning("Tracing will not be recorded because traceOptions are not set properly: one of TextMapPropagator or TracerProvider is missing") 50 } 51 } 52 } 53 54 var ( 55 logger = grpclog.Component("otel-plugin") 56 canonicalString = internal.CanonicalString.(func(codes.Code) string) 57 joinDialOptions = internal.JoinDialOptions.(func(...grpc.DialOption) grpc.DialOption) 58 ) 59 60 // Options are the options for OpenTelemetry instrumentation. 61 type Options struct { 62 // MetricsOptions are the metrics options for OpenTelemetry instrumentation. 63 MetricsOptions MetricsOptions 64 // TraceOptions are the tracing options for OpenTelemetry instrumentation. 65 TraceOptions experimental.TraceOptions 66 } 67 68 func (o *Options) isMetricsEnabled() bool { 69 return o.MetricsOptions.MeterProvider != nil 70 } 71 72 func (o *Options) isTracingEnabled() bool { 73 return o.TraceOptions.TracerProvider != nil 74 } 75 76 // MetricsOptions are the metrics options for OpenTelemetry instrumentation. 77 type MetricsOptions struct { 78 // MeterProvider is the MeterProvider instance that will be used to create 79 // instruments. To enable metrics collection, set a meter provider. If 80 // unset, no metrics will be recorded. 81 MeterProvider otelmetric.MeterProvider 82 83 // Metrics are the metrics to instrument. Will create instrument and record telemetry 84 // for corresponding metric supported by the client and server 85 // instrumentation components if applicable. If not set, the default metrics 86 // will be recorded. 87 Metrics *stats.MetricSet 88 89 // MethodAttributeFilter is a function that determines whether to record the 90 // method name of RPCs as an attribute, or to bucket into "other". Take care 91 // to limit the values allowed, as allowing too many will increase 92 // cardinality and could cause severe memory or performance problems. 93 // 94 // This only applies for server-side metrics. For clients, to record the 95 // method name in the attributes, pass grpc.StaticMethodCallOption to Invoke 96 // or NewStream. Note that when using protobuf generated clients, this 97 // CallOption is included automatically. 98 MethodAttributeFilter func(string) bool 99 100 // OptionalLabels specifies a list of optional labels to enable on any 101 // metrics that support them. 102 OptionalLabels []string 103 104 // pluginOption is used to get labels to attach to certain metrics, if set. 105 pluginOption otelinternal.PluginOption 106 } 107 108 // DialOption returns a dial option which enables OpenTelemetry instrumentation 109 // code for a grpc.ClientConn. 110 // 111 // Client applications interested in instrumenting their grpc.ClientConn should 112 // pass the dial option returned from this function as a dial option to 113 // grpc.NewClient(). 114 // 115 // For the metrics supported by this instrumentation code, specify the client 116 // metrics to record in metrics options. Also provide an implementation of a 117 // MeterProvider. If the passed in Meter Provider does not have the view 118 // configured for an individual metric turned on, the API call in this component 119 // will create a default view for that metric. 120 func DialOption(o Options) grpc.DialOption { 121 csh := &clientStatsHandler{options: o} 122 csh.initializeMetrics() 123 return joinDialOptions(grpc.WithChainUnaryInterceptor(csh.unaryInterceptor), grpc.WithChainStreamInterceptor(csh.streamInterceptor), grpc.WithStatsHandler(csh)) 124 } 125 126 var joinServerOptions = internal.JoinServerOptions.(func(...grpc.ServerOption) grpc.ServerOption) 127 128 // ServerOption returns a server option which enables OpenTelemetry 129 // instrumentation code for a grpc.Server. 130 // 131 // Server applications interested in instrumenting their grpc.Server should pass 132 // the server option returned from this function as an argument to 133 // grpc.NewServer(). 134 // 135 // For the metrics supported by this instrumentation code, specify the server 136 // metrics to record in metrics options. Also provide an implementation of a 137 // MeterProvider. If the passed in Meter Provider does not have the view 138 // configured for an individual metric turned on, the API call in this component 139 // will create a default view for that metric. 140 func ServerOption(o Options) grpc.ServerOption { 141 ssh := &serverStatsHandler{options: o} 142 ssh.initializeMetrics() 143 return joinServerOptions(grpc.ChainUnaryInterceptor(ssh.unaryInterceptor), grpc.ChainStreamInterceptor(ssh.streamInterceptor), grpc.StatsHandler(ssh)) 144 } 145 146 // callInfo is information pertaining to the lifespan of the RPC client side. 147 type callInfo struct { 148 target string 149 150 method string 151 } 152 153 type callInfoKey struct{} 154 155 func setCallInfo(ctx context.Context, ci *callInfo) context.Context { 156 return context.WithValue(ctx, callInfoKey{}, ci) 157 } 158 159 // getCallInfo returns the callInfo stored in the context, or nil 160 // if there isn't one. 161 func getCallInfo(ctx context.Context) *callInfo { 162 ci, _ := ctx.Value(callInfoKey{}).(*callInfo) 163 return ci 164 } 165 166 // rpcInfo is RPC information scoped to the RPC attempt life span client side, 167 // and the RPC life span server side. 168 type rpcInfo struct { 169 ai *attemptInfo 170 } 171 172 type rpcInfoKey struct{} 173 174 func setRPCInfo(ctx context.Context, ri *rpcInfo) context.Context { 175 return context.WithValue(ctx, rpcInfoKey{}, ri) 176 } 177 178 // getRPCInfo returns the rpcInfo stored in the context, or nil 179 // if there isn't one. 180 func getRPCInfo(ctx context.Context) *rpcInfo { 181 ri, _ := ctx.Value(rpcInfoKey{}).(*rpcInfo) 182 return ri 183 } 184 185 func removeLeadingSlash(mn string) string { 186 return strings.TrimLeft(mn, "/") 187 } 188 189 // attemptInfo is RPC information scoped to the RPC attempt life span client 190 // side, and the RPC life span server side. 191 type attemptInfo struct { 192 // access these counts atomically for hedging in the future: 193 // number of bytes after compression (within each message) from side (client 194 // || server). 195 sentCompressedBytes int64 196 // number of compressed bytes received (within each message) received on 197 // side (client || server). 198 recvCompressedBytes int64 199 200 startTime time.Time 201 method string 202 203 pluginOptionLabels map[string]string // pluginOptionLabels to attach to metrics emitted 204 xdsLabels map[string]string 205 206 // traceSpan is data used for recording traces. 207 traceSpan trace.Span 208 // message counters for sent and received messages (used for 209 // generating message IDs), and the number of previous RPC attempts for the 210 // associated call. 211 countSentMsg uint32 212 countRecvMsg uint32 213 previousRPCAttempts uint32 214 } 215 216 type clientMetrics struct { 217 // "grpc.client.attempt.started" 218 attemptStarted otelmetric.Int64Counter 219 // "grpc.client.attempt.duration" 220 attemptDuration otelmetric.Float64Histogram 221 // "grpc.client.attempt.sent_total_compressed_message_size" 222 attemptSentTotalCompressedMessageSize otelmetric.Int64Histogram 223 // "grpc.client.attempt.rcvd_total_compressed_message_size" 224 attemptRcvdTotalCompressedMessageSize otelmetric.Int64Histogram 225 // "grpc.client.call.duration" 226 callDuration otelmetric.Float64Histogram 227 } 228 229 type serverMetrics struct { 230 // "grpc.server.call.started" 231 callStarted otelmetric.Int64Counter 232 // "grpc.server.call.sent_total_compressed_message_size" 233 callSentTotalCompressedMessageSize otelmetric.Int64Histogram 234 // "grpc.server.call.rcvd_total_compressed_message_size" 235 callRcvdTotalCompressedMessageSize otelmetric.Int64Histogram 236 // "grpc.server.call.duration" 237 callDuration otelmetric.Float64Histogram 238 } 239 240 func createInt64Counter(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Int64CounterOption) otelmetric.Int64Counter { 241 if _, ok := setOfMetrics[metricName]; !ok { 242 return noop.Int64Counter{} 243 } 244 ret, err := meter.Int64Counter(string(metricName), options...) 245 if err != nil { 246 logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err) 247 return noop.Int64Counter{} 248 } 249 return ret 250 } 251 252 func createFloat64Counter(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Float64CounterOption) otelmetric.Float64Counter { 253 if _, ok := setOfMetrics[metricName]; !ok { 254 return noop.Float64Counter{} 255 } 256 ret, err := meter.Float64Counter(string(metricName), options...) 257 if err != nil { 258 logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err) 259 return noop.Float64Counter{} 260 } 261 return ret 262 } 263 264 func createInt64Histogram(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Int64HistogramOption) otelmetric.Int64Histogram { 265 if _, ok := setOfMetrics[metricName]; !ok { 266 return noop.Int64Histogram{} 267 } 268 ret, err := meter.Int64Histogram(string(metricName), options...) 269 if err != nil { 270 logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err) 271 return noop.Int64Histogram{} 272 } 273 return ret 274 } 275 276 func createFloat64Histogram(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Float64HistogramOption) otelmetric.Float64Histogram { 277 if _, ok := setOfMetrics[metricName]; !ok { 278 return noop.Float64Histogram{} 279 } 280 ret, err := meter.Float64Histogram(string(metricName), options...) 281 if err != nil { 282 logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err) 283 return noop.Float64Histogram{} 284 } 285 return ret 286 } 287 288 func createInt64Gauge(setOfMetrics map[string]bool, metricName string, meter otelmetric.Meter, options ...otelmetric.Int64GaugeOption) otelmetric.Int64Gauge { 289 if _, ok := setOfMetrics[metricName]; !ok { 290 return noop.Int64Gauge{} 291 } 292 ret, err := meter.Int64Gauge(string(metricName), options...) 293 if err != nil { 294 logger.Errorf("failed to register metric \"%v\", will not record: %v", metricName, err) 295 return noop.Int64Gauge{} 296 } 297 return ret 298 } 299 300 func optionFromLabels(labelKeys []string, optionalLabelKeys []string, optionalLabels []string, labelVals ...string) otelmetric.MeasurementOption { 301 var attributes []otelattribute.KeyValue 302 303 // Once it hits here lower level has guaranteed length of labelVals matches 304 // labelKeys + optionalLabelKeys. 305 for i, label := range labelKeys { 306 attributes = append(attributes, otelattribute.String(label, labelVals[i])) 307 } 308 309 for i, label := range optionalLabelKeys { 310 for _, optLabel := range optionalLabels { // o(n) could build out a set but n is currently capped at < 5 311 if label == optLabel { 312 attributes = append(attributes, otelattribute.String(label, labelVals[i+len(labelKeys)])) 313 } 314 } 315 } 316 return otelmetric.WithAttributeSet(otelattribute.NewSet(attributes...)) 317 } 318 319 // registryMetrics implements MetricsRecorder for the client and server stats 320 // handlers. 321 type registryMetrics struct { 322 intCounts map[*estats.MetricDescriptor]otelmetric.Int64Counter 323 floatCounts map[*estats.MetricDescriptor]otelmetric.Float64Counter 324 intHistos map[*estats.MetricDescriptor]otelmetric.Int64Histogram 325 floatHistos map[*estats.MetricDescriptor]otelmetric.Float64Histogram 326 intGauges map[*estats.MetricDescriptor]otelmetric.Int64Gauge 327 328 optionalLabels []string 329 } 330 331 func (rm *registryMetrics) registerMetrics(metrics *stats.MetricSet, meter otelmetric.Meter) { 332 rm.intCounts = make(map[*estats.MetricDescriptor]otelmetric.Int64Counter) 333 rm.floatCounts = make(map[*estats.MetricDescriptor]otelmetric.Float64Counter) 334 rm.intHistos = make(map[*estats.MetricDescriptor]otelmetric.Int64Histogram) 335 rm.floatHistos = make(map[*estats.MetricDescriptor]otelmetric.Float64Histogram) 336 rm.intGauges = make(map[*estats.MetricDescriptor]otelmetric.Int64Gauge) 337 338 for metric := range metrics.Metrics() { 339 desc := estats.DescriptorForMetric(metric) 340 if desc == nil { 341 // Either the metric was per call or the metric is not registered. 342 // Thus, if this component ever receives the desc as a handle in 343 // record it will be a no-op. 344 continue 345 } 346 switch desc.Type { 347 case estats.MetricTypeIntCount: 348 rm.intCounts[desc] = createInt64Counter(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description)) 349 case estats.MetricTypeFloatCount: 350 rm.floatCounts[desc] = createFloat64Counter(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description)) 351 case estats.MetricTypeIntHisto: 352 rm.intHistos[desc] = createInt64Histogram(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description), otelmetric.WithExplicitBucketBoundaries(desc.Bounds...)) 353 case estats.MetricTypeFloatHisto: 354 rm.floatHistos[desc] = createFloat64Histogram(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description), otelmetric.WithExplicitBucketBoundaries(desc.Bounds...)) 355 case estats.MetricTypeIntGauge: 356 rm.intGauges[desc] = createInt64Gauge(metrics.Metrics(), desc.Name, meter, otelmetric.WithUnit(desc.Unit), otelmetric.WithDescription(desc.Description)) 357 } 358 } 359 } 360 361 func (rm *registryMetrics) RecordInt64Count(handle *estats.Int64CountHandle, incr int64, labels ...string) { 362 desc := handle.Descriptor() 363 if ic, ok := rm.intCounts[desc]; ok { 364 ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...) 365 ic.Add(context.TODO(), incr, ao) 366 } 367 } 368 369 func (rm *registryMetrics) RecordFloat64Count(handle *estats.Float64CountHandle, incr float64, labels ...string) { 370 desc := handle.Descriptor() 371 if fc, ok := rm.floatCounts[desc]; ok { 372 ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...) 373 fc.Add(context.TODO(), incr, ao) 374 } 375 } 376 377 func (rm *registryMetrics) RecordInt64Histo(handle *estats.Int64HistoHandle, incr int64, labels ...string) { 378 desc := handle.Descriptor() 379 if ih, ok := rm.intHistos[desc]; ok { 380 ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...) 381 ih.Record(context.TODO(), incr, ao) 382 } 383 } 384 385 func (rm *registryMetrics) RecordFloat64Histo(handle *estats.Float64HistoHandle, incr float64, labels ...string) { 386 desc := handle.Descriptor() 387 if fh, ok := rm.floatHistos[desc]; ok { 388 ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...) 389 fh.Record(context.TODO(), incr, ao) 390 } 391 } 392 393 func (rm *registryMetrics) RecordInt64Gauge(handle *estats.Int64GaugeHandle, incr int64, labels ...string) { 394 desc := handle.Descriptor() 395 if ig, ok := rm.intGauges[desc]; ok { 396 ao := optionFromLabels(desc.Labels, desc.OptionalLabels, rm.optionalLabels, labels...) 397 ig.Record(context.TODO(), incr, ao) 398 } 399 } 400 401 // Users of this component should use these bucket boundaries as part of their 402 // SDK MeterProvider passed in. This component sends this as "advice" to the 403 // API, which works, however this stability is not guaranteed, so for safety the 404 // SDK Meter Provider provided should set these bounds for corresponding 405 // metrics. 406 var ( 407 // DefaultLatencyBounds are the default bounds for latency metrics. 408 DefaultLatencyBounds = []float64{0, 0.00001, 0.00005, 0.0001, 0.0003, 0.0006, 0.0008, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.008, 0.01, 0.013, 0.016, 0.02, 0.025, 0.03, 0.04, 0.05, 0.065, 0.08, 0.1, 0.13, 0.16, 0.2, 0.25, 0.3, 0.4, 0.5, 0.65, 0.8, 1, 2, 5, 10, 20, 50, 100} // provide "advice" through API, SDK should set this too 409 // DefaultSizeBounds are the default bounds for metrics which record size. 410 DefaultSizeBounds = []float64{0, 1024, 2048, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864, 268435456, 1073741824, 4294967296} 411 // defaultPerCallMetrics are the default metrics provided by this module. 412 defaultPerCallMetrics = stats.NewMetricSet(ClientAttemptStartedMetricName, ClientAttemptDurationMetricName, ClientAttemptSentCompressedTotalMessageSizeMetricName, ClientAttemptRcvdCompressedTotalMessageSizeMetricName, ClientCallDurationMetricName, ServerCallStartedMetricName, ServerCallSentCompressedTotalMessageSizeMetricName, ServerCallRcvdCompressedTotalMessageSizeMetricName, ServerCallDurationMetricName) 413 ) 414 415 // DefaultMetrics returns a set of default OpenTelemetry metrics. 416 // 417 // This should only be invoked after init time. 418 func DefaultMetrics() *stats.MetricSet { 419 return defaultPerCallMetrics.Join(estats.DefaultMetrics) 420 }