github.com/m3db/m3@v1.5.0/src/x/grpc/grpc_metrics.go (about) 1 // Copyright (c) 2020 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package grpc 22 23 import ( 24 "context" 25 "io" 26 "strings" 27 "time" 28 29 "github.com/m3db/m3/src/x/instrument" 30 31 "github.com/uber-go/tally" 32 "google.golang.org/grpc" 33 "google.golang.org/grpc/codes" 34 "google.golang.org/grpc/status" 35 ) 36 37 var ( 38 grpcTypeUnary = "unary" 39 grpcTypeClientStream = "client_stream" 40 grpcTypeServerStream = "server_stream" 41 grpcTypeBidiStream = "bidi_stream" 42 ) 43 44 // InterceptorInstrumentOptions is a set of options for instrumented interceptors. 45 type InterceptorInstrumentOptions struct { 46 // Scope, required. 47 Scope tally.Scope 48 // TimerOptions, optional and if not set will use defaults. 49 TimerOptions *instrument.TimerOptions 50 } 51 52 type interceptorInstrumentOptions struct { 53 Scope tally.Scope 54 TimerOptions instrument.TimerOptions 55 } 56 57 func (o InterceptorInstrumentOptions) resolve() interceptorInstrumentOptions { 58 result := interceptorInstrumentOptions{Scope: o.Scope} 59 if o.TimerOptions == nil { 60 result.TimerOptions = DefaultTimerOptions() 61 } else { 62 result.TimerOptions = *o.TimerOptions 63 } 64 return result 65 } 66 67 // UnaryClientInterceptor provides tally metrics for client unary calls. 68 func UnaryClientInterceptor( 69 opts InterceptorInstrumentOptions, 70 ) func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { 71 resolvedOpts := opts.resolve() 72 return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { 73 m := newClientMetrics(grpcTypeUnary, method, resolvedOpts) 74 err := invoker(ctx, method, req, reply, cc, opts...) 75 st, _ := status.FromError(err) 76 m.Handled(st.Code()) 77 return err 78 } 79 } 80 81 // StreamClientInterceptor provides tally metrics for client streams. 82 func StreamClientInterceptor( 83 opts InterceptorInstrumentOptions, 84 ) func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) { 85 resolvedOpts := opts.resolve() 86 return func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption) (grpc.ClientStream, error) { 87 m := newClientMetrics(rpcTypeFromStreamDesc(desc), method, resolvedOpts) 88 stream, err := streamer(ctx, desc, cc, method, opts...) 89 if err != nil { 90 st, _ := status.FromError(err) 91 m.Handled(st.Code()) 92 return nil, err 93 } 94 return &monitoredClientStream{ClientStream: stream, metrics: m}, nil 95 } 96 } 97 98 type monitoredClientStream struct { 99 grpc.ClientStream 100 metrics clientMetrics 101 } 102 103 func (s *monitoredClientStream) SendMsg(m interface{}) error { 104 timer := s.metrics.SendMessageTimer() 105 err := s.ClientStream.SendMsg(m) 106 timer.Stop() 107 if err == nil { 108 s.metrics.SentMessage() 109 } 110 return err 111 } 112 113 func (s *monitoredClientStream) RecvMsg(m interface{}) error { 114 timer := s.metrics.ReceiveMessageTimer() 115 err := s.ClientStream.RecvMsg(m) 116 timer.Stop() 117 118 if err == nil { 119 s.metrics.ReceivedMessage() 120 } else if err == io.EOF { 121 s.metrics.Handled(codes.OK) 122 } else { 123 st, _ := status.FromError(err) 124 s.metrics.Handled(st.Code()) 125 } 126 return err 127 } 128 129 type clientMetrics struct { 130 scope tally.Scope 131 startTime time.Time 132 tags map[string]string 133 clientStartedCounter tally.Counter 134 clientHandledHistogram tally.Timer 135 clientStreamRecvHistogram tally.Timer 136 clientStreamMsgReceived tally.Counter 137 clientStreamSendHistogram tally.Timer 138 clientStreamMsgSent tally.Counter 139 } 140 141 func newClientMetrics( 142 rpcType string, 143 fullMethod string, 144 opts interceptorInstrumentOptions, 145 ) clientMetrics { 146 var ( 147 name = strings.TrimPrefix(fullMethod, "/") 148 service, method = "unknown", "unknown" 149 ) 150 if i := strings.Index(name, "/"); i >= 0 { 151 service, method = name[:i], name[i+1:] 152 } 153 154 tags := map[string]string{ 155 "grpc_type": rpcType, 156 "grpc_service": service, 157 "grpc_method": method, 158 } 159 160 scope := opts.Scope.SubScope("grpc").Tagged(tags) 161 162 m := clientMetrics{ 163 scope: scope, 164 startTime: time.Now(), 165 tags: tags, // Reuse tags for later subscoping. 166 clientStartedCounter: scope.Counter("client_started_total"), 167 clientHandledHistogram: instrument.NewTimer(scope, 168 "client_handling_seconds", opts.TimerOptions), 169 clientStreamRecvHistogram: instrument.NewTimer(scope, 170 "client_msg_recv_handling_seconds", opts.TimerOptions), 171 clientStreamMsgReceived: scope.Counter("client_msg_received_total"), 172 clientStreamSendHistogram: instrument.NewTimer(scope, 173 "client_msg_send_handling_seconds", opts.TimerOptions), 174 clientStreamMsgSent: scope.Counter("client_msg_sent_total"), 175 } 176 m.clientStartedCounter.Inc(1) 177 return m 178 } 179 180 func (m clientMetrics) ReceiveMessageTimer() tally.Stopwatch { 181 return m.clientStreamRecvHistogram.Start() 182 } 183 184 func (m clientMetrics) ReceivedMessage() { 185 m.clientStreamMsgReceived.Inc(1) 186 } 187 188 func (m clientMetrics) SendMessageTimer() tally.Stopwatch { 189 return m.clientStreamSendHistogram.Start() 190 } 191 192 func (m clientMetrics) SentMessage() { 193 m.clientStreamMsgSent.Inc(1) 194 } 195 196 func (m clientMetrics) Handled(code codes.Code) { 197 // Reuse tags map. 198 for k := range m.tags { 199 delete(m.tags, k) 200 } 201 m.tags["grpc_code"] = code.String() 202 subscope := m.scope.Tagged(m.tags) 203 subscope.Counter("client_handled_total").Inc(1) 204 m.clientHandledHistogram.Record(time.Since(m.startTime)) 205 } 206 207 func rpcTypeFromStreamDesc(desc *grpc.StreamDesc) string { 208 if desc.ClientStreams && !desc.ServerStreams { 209 return grpcTypeClientStream 210 } else if !desc.ClientStreams && desc.ServerStreams { 211 return grpcTypeServerStream 212 } 213 return grpcTypeBidiStream 214 } 215 216 // DefaultTimerOptions returns a sane default timer options with buckets from 217 // 1ms to 10mins. 218 func DefaultTimerOptions() instrument.TimerOptions { 219 return instrument.NewHistogramTimerOptions(instrument.HistogramTimerOptions{ 220 HistogramBuckets: tally.DurationBuckets{ 221 0, 222 time.Millisecond, 223 2 * time.Millisecond, 224 3 * time.Millisecond, 225 4 * time.Millisecond, 226 5 * time.Millisecond, 227 6 * time.Millisecond, 228 7 * time.Millisecond, 229 8 * time.Millisecond, 230 9 * time.Millisecond, 231 10 * time.Millisecond, 232 20 * time.Millisecond, 233 40 * time.Millisecond, 234 60 * time.Millisecond, 235 80 * time.Millisecond, 236 100 * time.Millisecond, 237 200 * time.Millisecond, 238 400 * time.Millisecond, 239 600 * time.Millisecond, 240 800 * time.Millisecond, 241 time.Second, 242 time.Second + 500*time.Millisecond, 243 2 * time.Second, 244 2*time.Second + 500*time.Millisecond, 245 3 * time.Second, 246 3*time.Second + 500*time.Millisecond, 247 4 * time.Second, 248 4*time.Second + 500*time.Millisecond, 249 5 * time.Second, 250 5*time.Second + 500*time.Millisecond, 251 6 * time.Second, 252 6*time.Second + 500*time.Millisecond, 253 7 * time.Second, 254 7*time.Second + 500*time.Millisecond, 255 8 * time.Second, 256 8*time.Second + 500*time.Millisecond, 257 9 * time.Second, 258 9*time.Second + 500*time.Millisecond, 259 10 * time.Second, 260 15 * time.Second, 261 20 * time.Second, 262 25 * time.Second, 263 30 * time.Second, 264 35 * time.Second, 265 40 * time.Second, 266 45 * time.Second, 267 50 * time.Second, 268 55 * time.Second, 269 60 * time.Second, 270 150 * time.Second, 271 300 * time.Second, 272 450 * time.Second, 273 600 * time.Second, 274 }, 275 }) 276 }