google.golang.org/grpc@v1.72.2/stats/opentelemetry/client_metrics.go (about) 1 /* 2 * Copyright 2024 gRPC authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package opentelemetry 18 19 import ( 20 "context" 21 "sync/atomic" 22 "time" 23 24 otelcodes "go.opentelemetry.io/otel/codes" 25 "go.opentelemetry.io/otel/trace" 26 "google.golang.org/grpc" 27 grpccodes "google.golang.org/grpc/codes" 28 estats "google.golang.org/grpc/experimental/stats" 29 istats "google.golang.org/grpc/internal/stats" 30 "google.golang.org/grpc/metadata" 31 "google.golang.org/grpc/stats" 32 "google.golang.org/grpc/status" 33 34 otelattribute "go.opentelemetry.io/otel/attribute" 35 otelmetric "go.opentelemetry.io/otel/metric" 36 ) 37 38 type clientStatsHandler struct { 39 estats.MetricsRecorder 40 options Options 41 clientMetrics clientMetrics 42 } 43 44 func (h *clientStatsHandler) initializeMetrics() { 45 // Will set no metrics to record, logically making this stats handler a 46 // no-op. 47 if h.options.MetricsOptions.MeterProvider == nil { 48 return 49 } 50 51 meter := h.options.MetricsOptions.MeterProvider.Meter("grpc-go", otelmetric.WithInstrumentationVersion(grpc.Version)) 52 if meter == nil { 53 return 54 } 55 56 metrics := h.options.MetricsOptions.Metrics 57 if metrics == nil { 58 metrics = DefaultMetrics() 59 } 60 61 h.clientMetrics.attemptStarted = createInt64Counter(metrics.Metrics(), "grpc.client.attempt.started", meter, otelmetric.WithUnit("attempt"), otelmetric.WithDescription("Number of client call attempts started.")) 62 h.clientMetrics.attemptDuration = createFloat64Histogram(metrics.Metrics(), "grpc.client.attempt.duration", meter, otelmetric.WithUnit("s"), otelmetric.WithDescription("End-to-end time taken to complete a client call attempt."), otelmetric.WithExplicitBucketBoundaries(DefaultLatencyBounds...)) 63 h.clientMetrics.attemptSentTotalCompressedMessageSize = createInt64Histogram(metrics.Metrics(), "grpc.client.attempt.sent_total_compressed_message_size", meter, otelmetric.WithUnit("By"), otelmetric.WithDescription("Compressed message bytes sent per client call attempt."), otelmetric.WithExplicitBucketBoundaries(DefaultSizeBounds...)) 64 h.clientMetrics.attemptRcvdTotalCompressedMessageSize = createInt64Histogram(metrics.Metrics(), "grpc.client.attempt.rcvd_total_compressed_message_size", meter, otelmetric.WithUnit("By"), otelmetric.WithDescription("Compressed message bytes received per call attempt."), otelmetric.WithExplicitBucketBoundaries(DefaultSizeBounds...)) 65 h.clientMetrics.callDuration = createFloat64Histogram(metrics.Metrics(), "grpc.client.call.duration", meter, otelmetric.WithUnit("s"), otelmetric.WithDescription("Time taken by gRPC to complete an RPC from application's perspective."), otelmetric.WithExplicitBucketBoundaries(DefaultLatencyBounds...)) 66 67 rm := ®istryMetrics{ 68 optionalLabels: h.options.MetricsOptions.OptionalLabels, 69 } 70 h.MetricsRecorder = rm 71 rm.registerMetrics(metrics, meter) 72 } 73 74 func (h *clientStatsHandler) unaryInterceptor(ctx context.Context, method string, req, reply any, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { 75 ci := &callInfo{ 76 target: cc.CanonicalTarget(), 77 method: h.determineMethod(method, opts...), 78 } 79 ctx = setCallInfo(ctx, ci) 80 81 if h.options.MetricsOptions.pluginOption != nil { 82 md := h.options.MetricsOptions.pluginOption.GetMetadata() 83 for k, vs := range md { 84 for _, v := range vs { 85 ctx = metadata.AppendToOutgoingContext(ctx, k, v) 86 } 87 } 88 } 89 90 startTime := time.Now() 91 var span trace.Span 92 if h.options.isTracingEnabled() { 93 ctx, span = h.createCallTraceSpan(ctx, method) 94 } 95 err := invoker(ctx, method, req, reply, cc, opts...) 96 h.perCallTracesAndMetrics(ctx, err, startTime, ci, span) 97 return err 98 } 99 100 // determineMethod determines the method to record attributes with. This will be 101 // "other" if StaticMethod isn't specified or if method filter is set and 102 // specifies, the method name as is otherwise. 103 func (h *clientStatsHandler) determineMethod(method string, opts ...grpc.CallOption) string { 104 for _, opt := range opts { 105 if _, ok := opt.(grpc.StaticMethodCallOption); ok { 106 return removeLeadingSlash(method) 107 } 108 } 109 return "other" 110 } 111 112 func (h *clientStatsHandler) streamInterceptor(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) { 113 ci := &callInfo{ 114 target: cc.CanonicalTarget(), 115 method: h.determineMethod(method, opts...), 116 } 117 ctx = setCallInfo(ctx, ci) 118 119 if h.options.MetricsOptions.pluginOption != nil { 120 md := h.options.MetricsOptions.pluginOption.GetMetadata() 121 for k, vs := range md { 122 for _, v := range vs { 123 ctx = metadata.AppendToOutgoingContext(ctx, k, v) 124 } 125 } 126 } 127 128 startTime := time.Now() 129 var span trace.Span 130 if h.options.isTracingEnabled() { 131 ctx, span = h.createCallTraceSpan(ctx, method) 132 } 133 callback := func(err error) { 134 h.perCallTracesAndMetrics(ctx, err, startTime, ci, span) 135 } 136 opts = append([]grpc.CallOption{grpc.OnFinish(callback)}, opts...) 137 return streamer(ctx, desc, cc, method, opts...) 138 } 139 140 // perCallTracesAndMetrics records per call trace spans and metrics. 141 func (h *clientStatsHandler) perCallTracesAndMetrics(ctx context.Context, err error, startTime time.Time, ci *callInfo, ts trace.Span) { 142 if h.options.isTracingEnabled() { 143 s := status.Convert(err) 144 if s.Code() == grpccodes.OK { 145 ts.SetStatus(otelcodes.Ok, s.Message()) 146 } else { 147 ts.SetStatus(otelcodes.Error, s.Message()) 148 } 149 ts.End() 150 } 151 if h.options.isMetricsEnabled() { 152 callLatency := float64(time.Since(startTime)) / float64(time.Second) 153 attrs := otelmetric.WithAttributeSet(otelattribute.NewSet( 154 otelattribute.String("grpc.method", ci.method), 155 otelattribute.String("grpc.target", ci.target), 156 otelattribute.String("grpc.status", canonicalString(status.Code(err))), 157 )) 158 h.clientMetrics.callDuration.Record(ctx, callLatency, attrs) 159 } 160 } 161 162 // TagConn exists to satisfy stats.Handler. 163 func (h *clientStatsHandler) TagConn(ctx context.Context, _ *stats.ConnTagInfo) context.Context { 164 return ctx 165 } 166 167 // HandleConn exists to satisfy stats.Handler. 168 func (h *clientStatsHandler) HandleConn(context.Context, stats.ConnStats) {} 169 170 // TagRPC implements per RPC attempt context management. 171 func (h *clientStatsHandler) TagRPC(ctx context.Context, info *stats.RPCTagInfo) context.Context { 172 // Numerous stats handlers can be used for the same channel. The cluster 173 // impl balancer which writes to this will only write once, thus have this 174 // stats handler's per attempt scoped context point to the same optional 175 // labels map if set. 176 var labels *istats.Labels 177 if labels = istats.GetLabels(ctx); labels == nil { 178 labels = &istats.Labels{ 179 // The defaults for all the per call labels from a plugin that 180 // executes on the callpath that this OpenTelemetry component 181 // currently supports. 182 TelemetryLabels: map[string]string{ 183 "grpc.lb.locality": "", 184 }, 185 } 186 ctx = istats.SetLabels(ctx, labels) 187 } 188 ai := &attemptInfo{ 189 startTime: time.Now(), 190 xdsLabels: labels.TelemetryLabels, 191 method: removeLeadingSlash(info.FullMethodName), 192 } 193 if h.options.isTracingEnabled() { 194 ctx, ai = h.traceTagRPC(ctx, ai) 195 } 196 return setRPCInfo(ctx, &rpcInfo{ 197 ai: ai, 198 }) 199 } 200 201 func (h *clientStatsHandler) HandleRPC(ctx context.Context, rs stats.RPCStats) { 202 ri := getRPCInfo(ctx) 203 if ri == nil { 204 logger.Error("ctx passed into client side stats handler metrics event handling has no client attempt data present") 205 return 206 } 207 if h.options.isMetricsEnabled() { 208 h.processRPCEvent(ctx, rs, ri.ai) 209 } 210 if h.options.isTracingEnabled() { 211 populateSpan(rs, ri.ai) 212 } 213 } 214 215 func (h *clientStatsHandler) processRPCEvent(ctx context.Context, s stats.RPCStats, ai *attemptInfo) { 216 switch st := s.(type) { 217 case *stats.Begin: 218 ci := getCallInfo(ctx) 219 if ci == nil { 220 logger.Error("ctx passed into client side stats handler metrics event handling has no metrics data present") 221 return 222 } 223 224 attrs := otelmetric.WithAttributeSet(otelattribute.NewSet( 225 otelattribute.String("grpc.method", ci.method), 226 otelattribute.String("grpc.target", ci.target), 227 )) 228 h.clientMetrics.attemptStarted.Add(ctx, 1, attrs) 229 case *stats.OutPayload: 230 atomic.AddInt64(&ai.sentCompressedBytes, int64(st.CompressedLength)) 231 case *stats.InPayload: 232 atomic.AddInt64(&ai.recvCompressedBytes, int64(st.CompressedLength)) 233 case *stats.InHeader: 234 h.setLabelsFromPluginOption(ai, st.Header) 235 case *stats.InTrailer: 236 h.setLabelsFromPluginOption(ai, st.Trailer) 237 case *stats.End: 238 h.processRPCEnd(ctx, ai, st) 239 default: 240 } 241 } 242 243 func (h *clientStatsHandler) setLabelsFromPluginOption(ai *attemptInfo, incomingMetadata metadata.MD) { 244 if ai.pluginOptionLabels == nil && h.options.MetricsOptions.pluginOption != nil { 245 labels := h.options.MetricsOptions.pluginOption.GetLabels(incomingMetadata) 246 if labels == nil { 247 labels = map[string]string{} // Shouldn't return a nil map. Make it empty if so to ignore future Get Calls for this Attempt. 248 } 249 ai.pluginOptionLabels = labels 250 } 251 } 252 253 func (h *clientStatsHandler) processRPCEnd(ctx context.Context, ai *attemptInfo, e *stats.End) { 254 ci := getCallInfo(ctx) 255 if ci == nil { 256 logger.Error("ctx passed into client side stats handler metrics event handling has no metrics data present") 257 return 258 } 259 latency := float64(time.Since(ai.startTime)) / float64(time.Second) 260 st := "OK" 261 if e.Error != nil { 262 s, _ := status.FromError(e.Error) 263 st = canonicalString(s.Code()) 264 } 265 266 attributes := []otelattribute.KeyValue{ 267 otelattribute.String("grpc.method", ci.method), 268 otelattribute.String("grpc.target", ci.target), 269 otelattribute.String("grpc.status", st), 270 } 271 272 for k, v := range ai.pluginOptionLabels { 273 attributes = append(attributes, otelattribute.String(k, v)) 274 } 275 276 for _, o := range h.options.MetricsOptions.OptionalLabels { 277 // TODO: Add a filter for converting to unknown if not present in the 278 // CSM Plugin Option layer by adding an optional labels API. 279 if val, ok := ai.xdsLabels[o]; ok { 280 attributes = append(attributes, otelattribute.String(o, val)) 281 } 282 } 283 284 // Allocate vararg slice once. 285 opts := []otelmetric.RecordOption{otelmetric.WithAttributeSet(otelattribute.NewSet(attributes...))} 286 h.clientMetrics.attemptDuration.Record(ctx, latency, opts...) 287 h.clientMetrics.attemptSentTotalCompressedMessageSize.Record(ctx, atomic.LoadInt64(&ai.sentCompressedBytes), opts...) 288 h.clientMetrics.attemptRcvdTotalCompressedMessageSize.Record(ctx, atomic.LoadInt64(&ai.recvCompressedBytes), opts...) 289 } 290 291 const ( 292 // ClientAttemptStartedMetricName is the number of client call attempts 293 // started. 294 ClientAttemptStartedMetricName string = "grpc.client.attempt.started" 295 // ClientAttemptDurationMetricName is the end-to-end time taken to complete 296 // a client call attempt. 297 ClientAttemptDurationMetricName string = "grpc.client.attempt.duration" 298 // ClientAttemptSentCompressedTotalMessageSizeMetricName is the compressed 299 // message bytes sent per client call attempt. 300 ClientAttemptSentCompressedTotalMessageSizeMetricName string = "grpc.client.attempt.sent_total_compressed_message_size" 301 // ClientAttemptRcvdCompressedTotalMessageSizeMetricName is the compressed 302 // message bytes received per call attempt. 303 ClientAttemptRcvdCompressedTotalMessageSizeMetricName string = "grpc.client.attempt.rcvd_total_compressed_message_size" 304 // ClientCallDurationMetricName is the time taken by gRPC to complete an RPC 305 // from application's perspective. 306 ClientCallDurationMetricName string = "grpc.client.call.duration" 307 )