github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/x/grpc/grpc_metrics.go (about)

     1  // Copyright (c) 2020 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package grpc
    22  
    23  import (
    24  	"context"
    25  	"io"
    26  	"strings"
    27  	"time"
    28  
    29  	"github.com/m3db/m3/src/x/instrument"
    30  
    31  	"github.com/uber-go/tally"
    32  	"google.golang.org/grpc"
    33  	"google.golang.org/grpc/codes"
    34  	"google.golang.org/grpc/status"
    35  )
    36  
    37  var (
    38  	grpcTypeUnary        = "unary"
    39  	grpcTypeClientStream = "client_stream"
    40  	grpcTypeServerStream = "server_stream"
    41  	grpcTypeBidiStream   = "bidi_stream"
    42  )
    43  
    44  // InterceptorInstrumentOptions is a set of options for instrumented interceptors.
    45  type InterceptorInstrumentOptions struct {
    46  	// Scope, required.
    47  	Scope tally.Scope
    48  	// TimerOptions, optional and if not set will use defaults.
    49  	TimerOptions *instrument.TimerOptions
    50  }
    51  
    52  type interceptorInstrumentOptions struct {
    53  	Scope        tally.Scope
    54  	TimerOptions instrument.TimerOptions
    55  }
    56  
    57  func (o InterceptorInstrumentOptions) resolve() interceptorInstrumentOptions {
    58  	result := interceptorInstrumentOptions{Scope: o.Scope}
    59  	if o.TimerOptions == nil {
    60  		result.TimerOptions = DefaultTimerOptions()
    61  	} else {
    62  		result.TimerOptions = *o.TimerOptions
    63  	}
    64  	return result
    65  }
    66  
    67  // UnaryClientInterceptor provides tally metrics for client unary calls.
    68  func UnaryClientInterceptor(
    69  	opts InterceptorInstrumentOptions,
    70  ) func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
    71  	resolvedOpts := opts.resolve()
    72  	return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
    73  		m := newClientMetrics(grpcTypeUnary, method, resolvedOpts)
    74  		err := invoker(ctx, method, req, reply, cc, opts...)
    75  		st, _ := status.FromError(err)
    76  		m.Handled(st.Code())
    77  		return err
    78  	}
    79  }
    80  
    81  // StreamClientInterceptor provides tally metrics for client streams.
    82  func StreamClientInterceptor(
    83  	opts InterceptorInstrumentOptions,
    84  ) func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) {
    85  	resolvedOpts := opts.resolve()
    86  	return func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) {
    87  		m := newClientMetrics(rpcTypeFromStreamDesc(desc), method, resolvedOpts)
    88  		stream, err := streamer(ctx, desc, cc, method, opts...)
    89  		if err != nil {
    90  			st, _ := status.FromError(err)
    91  			m.Handled(st.Code())
    92  			return nil, err
    93  		}
    94  		return &monitoredClientStream{ClientStream: stream, metrics: m}, nil
    95  	}
    96  }
    97  
    98  type monitoredClientStream struct {
    99  	grpc.ClientStream
   100  	metrics clientMetrics
   101  }
   102  
   103  func (s *monitoredClientStream) SendMsg(m interface{}) error {
   104  	timer := s.metrics.SendMessageTimer()
   105  	err := s.ClientStream.SendMsg(m)
   106  	timer.Stop()
   107  	if err == nil {
   108  		s.metrics.SentMessage()
   109  	}
   110  	return err
   111  }
   112  
   113  func (s *monitoredClientStream) RecvMsg(m interface{}) error {
   114  	timer := s.metrics.ReceiveMessageTimer()
   115  	err := s.ClientStream.RecvMsg(m)
   116  	timer.Stop()
   117  
   118  	if err == nil {
   119  		s.metrics.ReceivedMessage()
   120  	} else if err == io.EOF {
   121  		s.metrics.Handled(codes.OK)
   122  	} else {
   123  		st, _ := status.FromError(err)
   124  		s.metrics.Handled(st.Code())
   125  	}
   126  	return err
   127  }
   128  
   129  type clientMetrics struct {
   130  	scope                     tally.Scope
   131  	startTime                 time.Time
   132  	tags                      map[string]string
   133  	clientStartedCounter      tally.Counter
   134  	clientHandledHistogram    tally.Timer
   135  	clientStreamRecvHistogram tally.Timer
   136  	clientStreamMsgReceived   tally.Counter
   137  	clientStreamSendHistogram tally.Timer
   138  	clientStreamMsgSent       tally.Counter
   139  }
   140  
   141  func newClientMetrics(
   142  	rpcType string,
   143  	fullMethod string,
   144  	opts interceptorInstrumentOptions,
   145  ) clientMetrics {
   146  	var (
   147  		name            = strings.TrimPrefix(fullMethod, "/")
   148  		service, method = "unknown", "unknown"
   149  	)
   150  	if i := strings.Index(name, "/"); i >= 0 {
   151  		service, method = name[:i], name[i+1:]
   152  	}
   153  
   154  	tags := map[string]string{
   155  		"grpc_type":    rpcType,
   156  		"grpc_service": service,
   157  		"grpc_method":  method,
   158  	}
   159  
   160  	scope := opts.Scope.SubScope("grpc").Tagged(tags)
   161  
   162  	m := clientMetrics{
   163  		scope:                scope,
   164  		startTime:            time.Now(),
   165  		tags:                 tags, // Reuse tags for later subscoping.
   166  		clientStartedCounter: scope.Counter("client_started_total"),
   167  		clientHandledHistogram: instrument.NewTimer(scope,
   168  			"client_handling_seconds", opts.TimerOptions),
   169  		clientStreamRecvHistogram: instrument.NewTimer(scope,
   170  			"client_msg_recv_handling_seconds", opts.TimerOptions),
   171  		clientStreamMsgReceived: scope.Counter("client_msg_received_total"),
   172  		clientStreamSendHistogram: instrument.NewTimer(scope,
   173  			"client_msg_send_handling_seconds", opts.TimerOptions),
   174  		clientStreamMsgSent: scope.Counter("client_msg_sent_total"),
   175  	}
   176  	m.clientStartedCounter.Inc(1)
   177  	return m
   178  }
   179  
   180  func (m clientMetrics) ReceiveMessageTimer() tally.Stopwatch {
   181  	return m.clientStreamRecvHistogram.Start()
   182  }
   183  
   184  func (m clientMetrics) ReceivedMessage() {
   185  	m.clientStreamMsgReceived.Inc(1)
   186  }
   187  
   188  func (m clientMetrics) SendMessageTimer() tally.Stopwatch {
   189  	return m.clientStreamSendHistogram.Start()
   190  }
   191  
   192  func (m clientMetrics) SentMessage() {
   193  	m.clientStreamMsgSent.Inc(1)
   194  }
   195  
   196  func (m clientMetrics) Handled(code codes.Code) {
   197  	// Reuse tags map.
   198  	for k := range m.tags {
   199  		delete(m.tags, k)
   200  	}
   201  	m.tags["grpc_code"] = code.String()
   202  	subscope := m.scope.Tagged(m.tags)
   203  	subscope.Counter("client_handled_total").Inc(1)
   204  	m.clientHandledHistogram.Record(time.Since(m.startTime))
   205  }
   206  
   207  func rpcTypeFromStreamDesc(desc *grpc.StreamDesc) string {
   208  	if desc.ClientStreams && !desc.ServerStreams {
   209  		return grpcTypeClientStream
   210  	} else if !desc.ClientStreams && desc.ServerStreams {
   211  		return grpcTypeServerStream
   212  	}
   213  	return grpcTypeBidiStream
   214  }
   215  
   216  // DefaultTimerOptions returns a sane default timer options with buckets from
   217  // 1ms to 10mins.
   218  func DefaultTimerOptions() instrument.TimerOptions {
   219  	return instrument.NewHistogramTimerOptions(instrument.HistogramTimerOptions{
   220  		HistogramBuckets: tally.DurationBuckets{
   221  			0,
   222  			time.Millisecond,
   223  			2 * time.Millisecond,
   224  			3 * time.Millisecond,
   225  			4 * time.Millisecond,
   226  			5 * time.Millisecond,
   227  			6 * time.Millisecond,
   228  			7 * time.Millisecond,
   229  			8 * time.Millisecond,
   230  			9 * time.Millisecond,
   231  			10 * time.Millisecond,
   232  			20 * time.Millisecond,
   233  			40 * time.Millisecond,
   234  			60 * time.Millisecond,
   235  			80 * time.Millisecond,
   236  			100 * time.Millisecond,
   237  			200 * time.Millisecond,
   238  			400 * time.Millisecond,
   239  			600 * time.Millisecond,
   240  			800 * time.Millisecond,
   241  			time.Second,
   242  			time.Second + 500*time.Millisecond,
   243  			2 * time.Second,
   244  			2*time.Second + 500*time.Millisecond,
   245  			3 * time.Second,
   246  			3*time.Second + 500*time.Millisecond,
   247  			4 * time.Second,
   248  			4*time.Second + 500*time.Millisecond,
   249  			5 * time.Second,
   250  			5*time.Second + 500*time.Millisecond,
   251  			6 * time.Second,
   252  			6*time.Second + 500*time.Millisecond,
   253  			7 * time.Second,
   254  			7*time.Second + 500*time.Millisecond,
   255  			8 * time.Second,
   256  			8*time.Second + 500*time.Millisecond,
   257  			9 * time.Second,
   258  			9*time.Second + 500*time.Millisecond,
   259  			10 * time.Second,
   260  			15 * time.Second,
   261  			20 * time.Second,
   262  			25 * time.Second,
   263  			30 * time.Second,
   264  			35 * time.Second,
   265  			40 * time.Second,
   266  			45 * time.Second,
   267  			50 * time.Second,
   268  			55 * time.Second,
   269  			60 * time.Second,
   270  			150 * time.Second,
   271  			300 * time.Second,
   272  			450 * time.Second,
   273  			600 * time.Second,
   274  		},
   275  	})
   276  }