google.golang.org/grpc@v1.72.2/stats/opentelemetry/client_metrics.go (about)

     1  /*
     2   * Copyright 2024 gRPC authors.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package opentelemetry
    18  
    19  import (
    20  	"context"
    21  	"sync/atomic"
    22  	"time"
    23  
    24  	otelcodes "go.opentelemetry.io/otel/codes"
    25  	"go.opentelemetry.io/otel/trace"
    26  	"google.golang.org/grpc"
    27  	grpccodes "google.golang.org/grpc/codes"
    28  	estats "google.golang.org/grpc/experimental/stats"
    29  	istats "google.golang.org/grpc/internal/stats"
    30  	"google.golang.org/grpc/metadata"
    31  	"google.golang.org/grpc/stats"
    32  	"google.golang.org/grpc/status"
    33  
    34  	otelattribute "go.opentelemetry.io/otel/attribute"
    35  	otelmetric "go.opentelemetry.io/otel/metric"
    36  )
    37  
    38  type clientStatsHandler struct {
    39  	estats.MetricsRecorder
    40  	options       Options
    41  	clientMetrics clientMetrics
    42  }
    43  
    44  func (h *clientStatsHandler) initializeMetrics() {
    45  	// Will set no metrics to record, logically making this stats handler a
    46  	// no-op.
    47  	if h.options.MetricsOptions.MeterProvider == nil {
    48  		return
    49  	}
    50  
    51  	meter := h.options.MetricsOptions.MeterProvider.Meter("grpc-go", otelmetric.WithInstrumentationVersion(grpc.Version))
    52  	if meter == nil {
    53  		return
    54  	}
    55  
    56  	metrics := h.options.MetricsOptions.Metrics
    57  	if metrics == nil {
    58  		metrics = DefaultMetrics()
    59  	}
    60  
    61  	h.clientMetrics.attemptStarted = createInt64Counter(metrics.Metrics(), "grpc.client.attempt.started", meter, otelmetric.WithUnit("attempt"), otelmetric.WithDescription("Number of client call attempts started."))
    62  	h.clientMetrics.attemptDuration = createFloat64Histogram(metrics.Metrics(), "grpc.client.attempt.duration", meter, otelmetric.WithUnit("s"), otelmetric.WithDescription("End-to-end time taken to complete a client call attempt."), otelmetric.WithExplicitBucketBoundaries(DefaultLatencyBounds...))
    63  	h.clientMetrics.attemptSentTotalCompressedMessageSize = createInt64Histogram(metrics.Metrics(), "grpc.client.attempt.sent_total_compressed_message_size", meter, otelmetric.WithUnit("By"), otelmetric.WithDescription("Compressed message bytes sent per client call attempt."), otelmetric.WithExplicitBucketBoundaries(DefaultSizeBounds...))
    64  	h.clientMetrics.attemptRcvdTotalCompressedMessageSize = createInt64Histogram(metrics.Metrics(), "grpc.client.attempt.rcvd_total_compressed_message_size", meter, otelmetric.WithUnit("By"), otelmetric.WithDescription("Compressed message bytes received per call attempt."), otelmetric.WithExplicitBucketBoundaries(DefaultSizeBounds...))
    65  	h.clientMetrics.callDuration = createFloat64Histogram(metrics.Metrics(), "grpc.client.call.duration", meter, otelmetric.WithUnit("s"), otelmetric.WithDescription("Time taken by gRPC to complete an RPC from application's perspective."), otelmetric.WithExplicitBucketBoundaries(DefaultLatencyBounds...))
    66  
    67  	rm := &registryMetrics{
    68  		optionalLabels: h.options.MetricsOptions.OptionalLabels,
    69  	}
    70  	h.MetricsRecorder = rm
    71  	rm.registerMetrics(metrics, meter)
    72  }
    73  
    74  func (h *clientStatsHandler) unaryInterceptor(ctx context.Context, method string, req, reply any, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
    75  	ci := &callInfo{
    76  		target: cc.CanonicalTarget(),
    77  		method: h.determineMethod(method, opts...),
    78  	}
    79  	ctx = setCallInfo(ctx, ci)
    80  
    81  	if h.options.MetricsOptions.pluginOption != nil {
    82  		md := h.options.MetricsOptions.pluginOption.GetMetadata()
    83  		for k, vs := range md {
    84  			for _, v := range vs {
    85  				ctx = metadata.AppendToOutgoingContext(ctx, k, v)
    86  			}
    87  		}
    88  	}
    89  
    90  	startTime := time.Now()
    91  	var span trace.Span
    92  	if h.options.isTracingEnabled() {
    93  		ctx, span = h.createCallTraceSpan(ctx, method)
    94  	}
    95  	err := invoker(ctx, method, req, reply, cc, opts...)
    96  	h.perCallTracesAndMetrics(ctx, err, startTime, ci, span)
    97  	return err
    98  }
    99  
   100  // determineMethod determines the method to record attributes with. This will be
   101  // "other" if StaticMethod isn't specified or if method filter is set and
   102  // specifies, the method name as is otherwise.
   103  func (h *clientStatsHandler) determineMethod(method string, opts ...grpc.CallOption) string {
   104  	for _, opt := range opts {
   105  		if _, ok := opt.(grpc.StaticMethodCallOption); ok {
   106  			return removeLeadingSlash(method)
   107  		}
   108  	}
   109  	return "other"
   110  }
   111  
   112  func (h *clientStatsHandler) streamInterceptor(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) {
   113  	ci := &callInfo{
   114  		target: cc.CanonicalTarget(),
   115  		method: h.determineMethod(method, opts...),
   116  	}
   117  	ctx = setCallInfo(ctx, ci)
   118  
   119  	if h.options.MetricsOptions.pluginOption != nil {
   120  		md := h.options.MetricsOptions.pluginOption.GetMetadata()
   121  		for k, vs := range md {
   122  			for _, v := range vs {
   123  				ctx = metadata.AppendToOutgoingContext(ctx, k, v)
   124  			}
   125  		}
   126  	}
   127  
   128  	startTime := time.Now()
   129  	var span trace.Span
   130  	if h.options.isTracingEnabled() {
   131  		ctx, span = h.createCallTraceSpan(ctx, method)
   132  	}
   133  	callback := func(err error) {
   134  		h.perCallTracesAndMetrics(ctx, err, startTime, ci, span)
   135  	}
   136  	opts = append([]grpc.CallOption{grpc.OnFinish(callback)}, opts...)
   137  	return streamer(ctx, desc, cc, method, opts...)
   138  }
   139  
   140  // perCallTracesAndMetrics records per call trace spans and metrics.
   141  func (h *clientStatsHandler) perCallTracesAndMetrics(ctx context.Context, err error, startTime time.Time, ci *callInfo, ts trace.Span) {
   142  	if h.options.isTracingEnabled() {
   143  		s := status.Convert(err)
   144  		if s.Code() == grpccodes.OK {
   145  			ts.SetStatus(otelcodes.Ok, s.Message())
   146  		} else {
   147  			ts.SetStatus(otelcodes.Error, s.Message())
   148  		}
   149  		ts.End()
   150  	}
   151  	if h.options.isMetricsEnabled() {
   152  		callLatency := float64(time.Since(startTime)) / float64(time.Second)
   153  		attrs := otelmetric.WithAttributeSet(otelattribute.NewSet(
   154  			otelattribute.String("grpc.method", ci.method),
   155  			otelattribute.String("grpc.target", ci.target),
   156  			otelattribute.String("grpc.status", canonicalString(status.Code(err))),
   157  		))
   158  		h.clientMetrics.callDuration.Record(ctx, callLatency, attrs)
   159  	}
   160  }
   161  
   162  // TagConn exists to satisfy stats.Handler.
   163  func (h *clientStatsHandler) TagConn(ctx context.Context, _ *stats.ConnTagInfo) context.Context {
   164  	return ctx
   165  }
   166  
   167  // HandleConn exists to satisfy stats.Handler.
   168  func (h *clientStatsHandler) HandleConn(context.Context, stats.ConnStats) {}
   169  
   170  // TagRPC implements per RPC attempt context management.
   171  func (h *clientStatsHandler) TagRPC(ctx context.Context, info *stats.RPCTagInfo) context.Context {
   172  	// Numerous stats handlers can be used for the same channel. The cluster
   173  	// impl balancer which writes to this will only write once, thus have this
   174  	// stats handler's per attempt scoped context point to the same optional
   175  	// labels map if set.
   176  	var labels *istats.Labels
   177  	if labels = istats.GetLabels(ctx); labels == nil {
   178  		labels = &istats.Labels{
   179  			// The defaults for all the per call labels from a plugin that
   180  			// executes on the callpath that this OpenTelemetry component
   181  			// currently supports.
   182  			TelemetryLabels: map[string]string{
   183  				"grpc.lb.locality": "",
   184  			},
   185  		}
   186  		ctx = istats.SetLabels(ctx, labels)
   187  	}
   188  	ai := &attemptInfo{
   189  		startTime: time.Now(),
   190  		xdsLabels: labels.TelemetryLabels,
   191  		method:    removeLeadingSlash(info.FullMethodName),
   192  	}
   193  	if h.options.isTracingEnabled() {
   194  		ctx, ai = h.traceTagRPC(ctx, ai)
   195  	}
   196  	return setRPCInfo(ctx, &rpcInfo{
   197  		ai: ai,
   198  	})
   199  }
   200  
   201  func (h *clientStatsHandler) HandleRPC(ctx context.Context, rs stats.RPCStats) {
   202  	ri := getRPCInfo(ctx)
   203  	if ri == nil {
   204  		logger.Error("ctx passed into client side stats handler metrics event handling has no client attempt data present")
   205  		return
   206  	}
   207  	if h.options.isMetricsEnabled() {
   208  		h.processRPCEvent(ctx, rs, ri.ai)
   209  	}
   210  	if h.options.isTracingEnabled() {
   211  		populateSpan(rs, ri.ai)
   212  	}
   213  }
   214  
   215  func (h *clientStatsHandler) processRPCEvent(ctx context.Context, s stats.RPCStats, ai *attemptInfo) {
   216  	switch st := s.(type) {
   217  	case *stats.Begin:
   218  		ci := getCallInfo(ctx)
   219  		if ci == nil {
   220  			logger.Error("ctx passed into client side stats handler metrics event handling has no metrics data present")
   221  			return
   222  		}
   223  
   224  		attrs := otelmetric.WithAttributeSet(otelattribute.NewSet(
   225  			otelattribute.String("grpc.method", ci.method),
   226  			otelattribute.String("grpc.target", ci.target),
   227  		))
   228  		h.clientMetrics.attemptStarted.Add(ctx, 1, attrs)
   229  	case *stats.OutPayload:
   230  		atomic.AddInt64(&ai.sentCompressedBytes, int64(st.CompressedLength))
   231  	case *stats.InPayload:
   232  		atomic.AddInt64(&ai.recvCompressedBytes, int64(st.CompressedLength))
   233  	case *stats.InHeader:
   234  		h.setLabelsFromPluginOption(ai, st.Header)
   235  	case *stats.InTrailer:
   236  		h.setLabelsFromPluginOption(ai, st.Trailer)
   237  	case *stats.End:
   238  		h.processRPCEnd(ctx, ai, st)
   239  	default:
   240  	}
   241  }
   242  
   243  func (h *clientStatsHandler) setLabelsFromPluginOption(ai *attemptInfo, incomingMetadata metadata.MD) {
   244  	if ai.pluginOptionLabels == nil && h.options.MetricsOptions.pluginOption != nil {
   245  		labels := h.options.MetricsOptions.pluginOption.GetLabels(incomingMetadata)
   246  		if labels == nil {
   247  			labels = map[string]string{} // Shouldn't return a nil map. Make it empty if so to ignore future Get Calls for this Attempt.
   248  		}
   249  		ai.pluginOptionLabels = labels
   250  	}
   251  }
   252  
   253  func (h *clientStatsHandler) processRPCEnd(ctx context.Context, ai *attemptInfo, e *stats.End) {
   254  	ci := getCallInfo(ctx)
   255  	if ci == nil {
   256  		logger.Error("ctx passed into client side stats handler metrics event handling has no metrics data present")
   257  		return
   258  	}
   259  	latency := float64(time.Since(ai.startTime)) / float64(time.Second)
   260  	st := "OK"
   261  	if e.Error != nil {
   262  		s, _ := status.FromError(e.Error)
   263  		st = canonicalString(s.Code())
   264  	}
   265  
   266  	attributes := []otelattribute.KeyValue{
   267  		otelattribute.String("grpc.method", ci.method),
   268  		otelattribute.String("grpc.target", ci.target),
   269  		otelattribute.String("grpc.status", st),
   270  	}
   271  
   272  	for k, v := range ai.pluginOptionLabels {
   273  		attributes = append(attributes, otelattribute.String(k, v))
   274  	}
   275  
   276  	for _, o := range h.options.MetricsOptions.OptionalLabels {
   277  		// TODO: Add a filter for converting to unknown if not present in the
   278  		// CSM Plugin Option layer by adding an optional labels API.
   279  		if val, ok := ai.xdsLabels[o]; ok {
   280  			attributes = append(attributes, otelattribute.String(o, val))
   281  		}
   282  	}
   283  
   284  	// Allocate vararg slice once.
   285  	opts := []otelmetric.RecordOption{otelmetric.WithAttributeSet(otelattribute.NewSet(attributes...))}
   286  	h.clientMetrics.attemptDuration.Record(ctx, latency, opts...)
   287  	h.clientMetrics.attemptSentTotalCompressedMessageSize.Record(ctx, atomic.LoadInt64(&ai.sentCompressedBytes), opts...)
   288  	h.clientMetrics.attemptRcvdTotalCompressedMessageSize.Record(ctx, atomic.LoadInt64(&ai.recvCompressedBytes), opts...)
   289  }
   290  
   291  const (
   292  	// ClientAttemptStartedMetricName is the number of client call attempts
   293  	// started.
   294  	ClientAttemptStartedMetricName string = "grpc.client.attempt.started"
   295  	// ClientAttemptDurationMetricName is the end-to-end time taken to complete
   296  	// a client call attempt.
   297  	ClientAttemptDurationMetricName string = "grpc.client.attempt.duration"
   298  	// ClientAttemptSentCompressedTotalMessageSizeMetricName is the compressed
   299  	// message bytes sent per client call attempt.
   300  	ClientAttemptSentCompressedTotalMessageSizeMetricName string = "grpc.client.attempt.sent_total_compressed_message_size"
   301  	// ClientAttemptRcvdCompressedTotalMessageSizeMetricName is the compressed
   302  	// message bytes received per call attempt.
   303  	ClientAttemptRcvdCompressedTotalMessageSizeMetricName string = "grpc.client.attempt.rcvd_total_compressed_message_size"
   304  	// ClientCallDurationMetricName is the time taken by gRPC to complete an RPC
   305  	// from application's perspective.
   306  	ClientCallDurationMetricName string = "grpc.client.call.duration"
   307  )