google.golang.org/grpc@v1.74.2/stats/opentelemetry/client_metrics.go (about)

     1  /*
     2   * Copyright 2024 gRPC authors.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package opentelemetry
    18  
    19  import (
    20  	"context"
    21  	"sync/atomic"
    22  	"time"
    23  
    24  	otelattribute "go.opentelemetry.io/otel/attribute"
    25  	otelmetric "go.opentelemetry.io/otel/metric"
    26  	"google.golang.org/grpc"
    27  	estats "google.golang.org/grpc/experimental/stats"
    28  	istats "google.golang.org/grpc/internal/stats"
    29  	"google.golang.org/grpc/metadata"
    30  	"google.golang.org/grpc/stats"
    31  	"google.golang.org/grpc/status"
    32  )
    33  
    34  type clientMetricsHandler struct {
    35  	estats.MetricsRecorder
    36  	options       Options
    37  	clientMetrics clientMetrics
    38  }
    39  
    40  func (h *clientMetricsHandler) initializeMetrics() {
    41  	// Will set no metrics to record, logically making this stats handler a
    42  	// no-op.
    43  	if h.options.MetricsOptions.MeterProvider == nil {
    44  		return
    45  	}
    46  
    47  	meter := h.options.MetricsOptions.MeterProvider.Meter("grpc-go", otelmetric.WithInstrumentationVersion(grpc.Version))
    48  	if meter == nil {
    49  		return
    50  	}
    51  
    52  	metrics := h.options.MetricsOptions.Metrics
    53  	if metrics == nil {
    54  		metrics = DefaultMetrics()
    55  	}
    56  
    57  	h.clientMetrics.attemptStarted = createInt64Counter(metrics.Metrics(), "grpc.client.attempt.started", meter, otelmetric.WithUnit("attempt"), otelmetric.WithDescription("Number of client call attempts started."))
    58  	h.clientMetrics.attemptDuration = createFloat64Histogram(metrics.Metrics(), "grpc.client.attempt.duration", meter, otelmetric.WithUnit("s"), otelmetric.WithDescription("End-to-end time taken to complete a client call attempt."), otelmetric.WithExplicitBucketBoundaries(DefaultLatencyBounds...))
    59  	h.clientMetrics.attemptSentTotalCompressedMessageSize = createInt64Histogram(metrics.Metrics(), "grpc.client.attempt.sent_total_compressed_message_size", meter, otelmetric.WithUnit("By"), otelmetric.WithDescription("Compressed message bytes sent per client call attempt."), otelmetric.WithExplicitBucketBoundaries(DefaultSizeBounds...))
    60  	h.clientMetrics.attemptRcvdTotalCompressedMessageSize = createInt64Histogram(metrics.Metrics(), "grpc.client.attempt.rcvd_total_compressed_message_size", meter, otelmetric.WithUnit("By"), otelmetric.WithDescription("Compressed message bytes received per call attempt."), otelmetric.WithExplicitBucketBoundaries(DefaultSizeBounds...))
    61  	h.clientMetrics.callDuration = createFloat64Histogram(metrics.Metrics(), "grpc.client.call.duration", meter, otelmetric.WithUnit("s"), otelmetric.WithDescription("Time taken by gRPC to complete an RPC from application's perspective."), otelmetric.WithExplicitBucketBoundaries(DefaultLatencyBounds...))
    62  
    63  	rm := &registryMetrics{
    64  		optionalLabels: h.options.MetricsOptions.OptionalLabels,
    65  	}
    66  	h.MetricsRecorder = rm
    67  	rm.registerMetrics(metrics, meter)
    68  }
    69  
    70  // getOrCreateCallInfo returns the existing callInfo from context if present,
    71  // or creates and attaches a new one.
    72  func getOrCreateCallInfo(ctx context.Context, cc *grpc.ClientConn, method string, opts ...grpc.CallOption) (context.Context, *callInfo) {
    73  	ci := getCallInfo(ctx)
    74  	if ci == nil {
    75  		if logger.V(2) {
    76  			logger.Info("Creating new CallInfo since its not present in context")
    77  		}
    78  		ci = &callInfo{
    79  			target: cc.CanonicalTarget(),
    80  			method: determineMethod(method, opts...),
    81  		}
    82  		ctx = setCallInfo(ctx, ci)
    83  	}
    84  	return ctx, ci
    85  }
    86  
    87  func (h *clientMetricsHandler) unaryInterceptor(ctx context.Context, method string, req, reply any, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
    88  	ctx, ci := getOrCreateCallInfo(ctx, cc, method, opts...)
    89  
    90  	if h.options.MetricsOptions.pluginOption != nil {
    91  		md := h.options.MetricsOptions.pluginOption.GetMetadata()
    92  		for k, vs := range md {
    93  			for _, v := range vs {
    94  				ctx = metadata.AppendToOutgoingContext(ctx, k, v)
    95  			}
    96  		}
    97  	}
    98  
    99  	startTime := time.Now()
   100  	err := invoker(ctx, method, req, reply, cc, opts...)
   101  	h.perCallMetrics(ctx, err, startTime, ci)
   102  	return err
   103  }
   104  
   105  // determineMethod determines the method to record attributes with. This will be
   106  // "other" if StaticMethod isn't specified or if method filter is set and
   107  // specifies, the method name as is otherwise.
   108  func determineMethod(method string, opts ...grpc.CallOption) string {
   109  	for _, opt := range opts {
   110  		if _, ok := opt.(grpc.StaticMethodCallOption); ok {
   111  			return removeLeadingSlash(method)
   112  		}
   113  	}
   114  	return "other"
   115  }
   116  
   117  func (h *clientMetricsHandler) streamInterceptor(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) {
   118  	ctx, ci := getOrCreateCallInfo(ctx, cc, method, opts...)
   119  
   120  	if h.options.MetricsOptions.pluginOption != nil {
   121  		md := h.options.MetricsOptions.pluginOption.GetMetadata()
   122  		for k, vs := range md {
   123  			for _, v := range vs {
   124  				ctx = metadata.AppendToOutgoingContext(ctx, k, v)
   125  			}
   126  		}
   127  	}
   128  
   129  	startTime := time.Now()
   130  	callback := func(err error) {
   131  		h.perCallMetrics(ctx, err, startTime, ci)
   132  	}
   133  	opts = append([]grpc.CallOption{grpc.OnFinish(callback)}, opts...)
   134  	return streamer(ctx, desc, cc, method, opts...)
   135  }
   136  
   137  // perCallMetrics records per call metrics for both unary and stream calls.
   138  func (h *clientMetricsHandler) perCallMetrics(ctx context.Context, err error, startTime time.Time, ci *callInfo) {
   139  	callLatency := float64(time.Since(startTime)) / float64(time.Second)
   140  	attrs := otelmetric.WithAttributeSet(otelattribute.NewSet(
   141  		otelattribute.String("grpc.method", ci.method),
   142  		otelattribute.String("grpc.target", ci.target),
   143  		otelattribute.String("grpc.status", canonicalString(status.Code(err))),
   144  	))
   145  	h.clientMetrics.callDuration.Record(ctx, callLatency, attrs)
   146  }
   147  
   148  // TagConn exists to satisfy stats.Handler.
   149  func (h *clientMetricsHandler) TagConn(ctx context.Context, _ *stats.ConnTagInfo) context.Context {
   150  	return ctx
   151  }
   152  
   153  // HandleConn exists to satisfy stats.Handler.
   154  func (h *clientMetricsHandler) HandleConn(context.Context, stats.ConnStats) {}
   155  
   156  // getOrCreateRPCAttemptInfo retrieves or creates an rpc attemptInfo object
   157  // and ensures it is set in the context along with the rpcInfo.
   158  func getOrCreateRPCAttemptInfo(ctx context.Context) (context.Context, *attemptInfo) {
   159  	ri := getRPCInfo(ctx)
   160  	if ri != nil {
   161  		return ctx, ri.ai
   162  	}
   163  	ri = &rpcInfo{ai: &attemptInfo{}}
   164  	return setRPCInfo(ctx, ri), ri.ai
   165  }
   166  
   167  // TagRPC implements per RPC attempt context management for metrics.
   168  func (h *clientMetricsHandler) TagRPC(ctx context.Context, info *stats.RPCTagInfo) context.Context {
   169  	// Numerous stats handlers can be used for the same channel. The cluster
   170  	// impl balancer which writes to this will only write once, thus have this
   171  	// stats handler's per attempt scoped context point to the same optional
   172  	// labels map if set.
   173  	var labels *istats.Labels
   174  	if labels = istats.GetLabels(ctx); labels == nil {
   175  		labels = &istats.Labels{
   176  			// The defaults for all the per call labels from a plugin that
   177  			// executes on the callpath that this OpenTelemetry component
   178  			// currently supports.
   179  			TelemetryLabels: map[string]string{
   180  				"grpc.lb.locality": "",
   181  			},
   182  		}
   183  		ctx = istats.SetLabels(ctx, labels)
   184  	}
   185  	ctx, ai := getOrCreateRPCAttemptInfo(ctx)
   186  	ai.startTime = time.Now()
   187  	ai.xdsLabels = labels.TelemetryLabels
   188  	ai.method = removeLeadingSlash(info.FullMethodName)
   189  
   190  	return setRPCInfo(ctx, &rpcInfo{ai: ai})
   191  }
   192  
   193  // HandleRPC handles per RPC stats implementation.
   194  func (h *clientMetricsHandler) HandleRPC(ctx context.Context, rs stats.RPCStats) {
   195  	ri := getRPCInfo(ctx)
   196  	if ri == nil {
   197  		logger.Error("ctx passed into client side stats handler metrics event handling has no client attempt data present")
   198  		return
   199  	}
   200  	h.processRPCEvent(ctx, rs, ri.ai)
   201  }
   202  
   203  func (h *clientMetricsHandler) processRPCEvent(ctx context.Context, s stats.RPCStats, ai *attemptInfo) {
   204  	switch st := s.(type) {
   205  	case *stats.Begin:
   206  		ci := getCallInfo(ctx)
   207  		if ci == nil {
   208  			logger.Error("ctx passed into client side stats handler metrics event handling has no metrics data present")
   209  			return
   210  		}
   211  
   212  		attrs := otelmetric.WithAttributeSet(otelattribute.NewSet(
   213  			otelattribute.String("grpc.method", ci.method),
   214  			otelattribute.String("grpc.target", ci.target),
   215  		))
   216  		h.clientMetrics.attemptStarted.Add(ctx, 1, attrs)
   217  	case *stats.OutPayload:
   218  		atomic.AddInt64(&ai.sentCompressedBytes, int64(st.CompressedLength))
   219  	case *stats.InPayload:
   220  		atomic.AddInt64(&ai.recvCompressedBytes, int64(st.CompressedLength))
   221  	case *stats.InHeader:
   222  		h.setLabelsFromPluginOption(ai, st.Header)
   223  	case *stats.InTrailer:
   224  		h.setLabelsFromPluginOption(ai, st.Trailer)
   225  	case *stats.End:
   226  		h.processRPCEnd(ctx, ai, st)
   227  	default:
   228  	}
   229  }
   230  
   231  func (h *clientMetricsHandler) setLabelsFromPluginOption(ai *attemptInfo, incomingMetadata metadata.MD) {
   232  	if ai.pluginOptionLabels == nil && h.options.MetricsOptions.pluginOption != nil {
   233  		labels := h.options.MetricsOptions.pluginOption.GetLabels(incomingMetadata)
   234  		if labels == nil {
   235  			labels = map[string]string{} // Shouldn't return a nil map. Make it empty if so to ignore future Get Calls for this Attempt.
   236  		}
   237  		ai.pluginOptionLabels = labels
   238  	}
   239  }
   240  
   241  func (h *clientMetricsHandler) processRPCEnd(ctx context.Context, ai *attemptInfo, e *stats.End) {
   242  	ci := getCallInfo(ctx)
   243  	if ci == nil {
   244  		logger.Error("ctx passed into client side stats handler metrics event handling has no metrics data present")
   245  		return
   246  	}
   247  	latency := float64(time.Since(ai.startTime)) / float64(time.Second)
   248  	st := "OK"
   249  	if e.Error != nil {
   250  		s, _ := status.FromError(e.Error)
   251  		st = canonicalString(s.Code())
   252  	}
   253  
   254  	attributes := []otelattribute.KeyValue{
   255  		otelattribute.String("grpc.method", ci.method),
   256  		otelattribute.String("grpc.target", ci.target),
   257  		otelattribute.String("grpc.status", st),
   258  	}
   259  
   260  	for k, v := range ai.pluginOptionLabels {
   261  		attributes = append(attributes, otelattribute.String(k, v))
   262  	}
   263  
   264  	for _, o := range h.options.MetricsOptions.OptionalLabels {
   265  		// TODO: Add a filter for converting to unknown if not present in the
   266  		// CSM Plugin Option layer by adding an optional labels API.
   267  		if val, ok := ai.xdsLabels[o]; ok {
   268  			attributes = append(attributes, otelattribute.String(o, val))
   269  		}
   270  	}
   271  
   272  	// Allocate vararg slice once.
   273  	opts := []otelmetric.RecordOption{otelmetric.WithAttributeSet(otelattribute.NewSet(attributes...))}
   274  	h.clientMetrics.attemptDuration.Record(ctx, latency, opts...)
   275  	h.clientMetrics.attemptSentTotalCompressedMessageSize.Record(ctx, atomic.LoadInt64(&ai.sentCompressedBytes), opts...)
   276  	h.clientMetrics.attemptRcvdTotalCompressedMessageSize.Record(ctx, atomic.LoadInt64(&ai.recvCompressedBytes), opts...)
   277  }
   278  
   279  const (
   280  	// ClientAttemptStartedMetricName is the number of client call attempts
   281  	// started.
   282  	ClientAttemptStartedMetricName string = "grpc.client.attempt.started"
   283  	// ClientAttemptDurationMetricName is the end-to-end time taken to complete
   284  	// a client call attempt.
   285  	ClientAttemptDurationMetricName string = "grpc.client.attempt.duration"
   286  	// ClientAttemptSentCompressedTotalMessageSizeMetricName is the compressed
   287  	// message bytes sent per client call attempt.
   288  	ClientAttemptSentCompressedTotalMessageSizeMetricName string = "grpc.client.attempt.sent_total_compressed_message_size"
   289  	// ClientAttemptRcvdCompressedTotalMessageSizeMetricName is the compressed
   290  	// message bytes received per call attempt.
   291  	ClientAttemptRcvdCompressedTotalMessageSizeMetricName string = "grpc.client.attempt.rcvd_total_compressed_message_size"
   292  	// ClientCallDurationMetricName is the time taken by gRPC to complete an RPC
   293  	// from application's perspective.
   294  	ClientCallDurationMetricName string = "grpc.client.call.duration"
   295  )