go.uber.org/yarpc@v1.72.1/internal/observability/middleware.go (about)

     1  // Copyright (c) 2022 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package observability
    22  
    23  import (
    24  	"context"
    25  	"sync"
    26  
    27  	"go.uber.org/net/metrics"
    28  	"go.uber.org/yarpc/api/transport"
    29  	"go.uber.org/yarpc/yarpcerrors"
    30  	"go.uber.org/zap"
    31  	"go.uber.org/zap/zapcore"
    32  )
    33  
    34  var _writerPool = sync.Pool{New: func() interface{} {
    35  	return &writer{}
    36  }}
    37  
    38  // writer wraps a transport.ResponseWriter and a transport.ApplicationErrorMetaSetter so the observing middleware can
    39  // detect application errors and their metadata.
    40  type writer struct {
    41  	transport.ResponseWriter
    42  
    43  	isApplicationError   bool
    44  	applicationErrorMeta *transport.ApplicationErrorMeta
    45  
    46  	responseSize int
    47  }
    48  
    49  func newWriter(rw transport.ResponseWriter) *writer {
    50  	w := _writerPool.Get().(*writer)
    51  	*w = writer{ResponseWriter: rw} // reset
    52  	return w
    53  }
    54  
    55  func (w *writer) SetApplicationError() {
    56  	w.isApplicationError = true
    57  	w.ResponseWriter.SetApplicationError()
    58  }
    59  
    60  func (w *writer) SetApplicationErrorMeta(applicationErrorMeta *transport.ApplicationErrorMeta) {
    61  	if applicationErrorMeta == nil {
    62  		return
    63  	}
    64  
    65  	w.applicationErrorMeta = applicationErrorMeta
    66  	if appErrMetaSetter, ok := w.ResponseWriter.(transport.ApplicationErrorMetaSetter); ok {
    67  		appErrMetaSetter.SetApplicationErrorMeta(applicationErrorMeta)
    68  	}
    69  }
    70  
    71  func (w *writer) Write(p []byte) (n int, err error) {
    72  	w.responseSize += len(p)
    73  	return w.ResponseWriter.Write(p)
    74  }
    75  
    76  func (w *writer) free() {
    77  	_writerPool.Put(w)
    78  }
    79  
    80  // Middleware is logging and metrics middleware for all RPC types.
    81  type Middleware struct {
    82  	graph graph
    83  }
    84  
    85  // Config configures the observability middleware.
    86  type Config struct {
    87  	// Logger to which messages will be logged.
    88  	Logger *zap.Logger
    89  
    90  	// Scope to which metrics are emitted.
    91  	Scope *metrics.Scope
    92  
    93  	// MetricTagsBlocklist of metric tags being suppressed from being tagged on
    94  	// metrics emitted by the middleware.
    95  	MetricTagsBlocklist []string
    96  
    97  	// ContextExtractor Extracts request-scoped information from the context for logging.
    98  	ContextExtractor ContextExtractor
    99  
   100  	// Levels specify log levels for various classes of requests.
   101  	Levels LevelsConfig
   102  }
   103  
   104  // LevelsConfig specifies log level overrides for inbound traffic, outbound
   105  // traffic, or the defaults for either.
   106  type LevelsConfig struct {
   107  	Default  DirectionalLevelsConfig
   108  	Inbound  DirectionalLevelsConfig
   109  	Outbound DirectionalLevelsConfig
   110  }
   111  
   112  // DirectionalLevelsConfig may override the log levels for any combination of
   113  // successes, failures, and application errors.
   114  type DirectionalLevelsConfig struct {
   115  	// Log level used to log successful calls.
   116  	//
   117  	// Defaults to DebugLevel.
   118  	Success *zapcore.Level
   119  
   120  	// Log level used to log failed calls.
   121  	// This includes low-level network errors, TChannel error frames, etc.
   122  	//
   123  	// Defaults to ErrorLevel.
   124  	// Deprecated in favor of ServerError and ClientError.
   125  	Failure *zapcore.Level
   126  
   127  	// Log level used to log calls that failed with an application error.
   128  	// All Thrift exceptions are considered application errors.
   129  	//
   130  	// Defaults to ErrorLevel.
   131  	// Deprecated in favor of ServerError and ClientError.
   132  	ApplicationError *zapcore.Level
   133  
   134  	// Log level used to log calls that failed with an server error.
   135  	//
   136  	// Defaults to ErrorLevel.
   137  	ServerError *zapcore.Level
   138  
   139  	// Log level used to log calls that failed with an client error.
   140  	// All Thrift exceptions are considered application errors if
   141  	// there are not annotated with the option rpc.code.
   142  	//
   143  	// Defaults to ErrorLevel.
   144  	ClientError *zapcore.Level
   145  }
   146  
   147  // NewMiddleware constructs an observability middleware with the provided
   148  // configuration.
   149  func NewMiddleware(cfg Config) *Middleware {
   150  	m := &Middleware{newGraph(cfg.Scope, cfg.Logger, cfg.ContextExtractor, cfg.MetricTagsBlocklist)}
   151  
   152  	// Apply the default levels
   153  	applyLogLevelsConfig(&m.graph.inboundLevels, &cfg.Levels.Default)
   154  	applyLogLevelsConfig(&m.graph.outboundLevels, &cfg.Levels.Default)
   155  	// Override with direction-specific levels
   156  	applyLogLevelsConfig(&m.graph.inboundLevels, &cfg.Levels.Inbound)
   157  	applyLogLevelsConfig(&m.graph.outboundLevels, &cfg.Levels.Outbound)
   158  
   159  	return m
   160  }
   161  
   162  func applyLogLevelsConfig(dst *levels, src *DirectionalLevelsConfig) {
   163  	if level := src.Success; level != nil {
   164  		dst.success = *src.Success
   165  	}
   166  	if level := src.Failure; level != nil {
   167  		dst.useApplicationErrorFailureLevels = true
   168  		dst.failure = *src.Failure
   169  	}
   170  	if level := src.ApplicationError; level != nil {
   171  		dst.useApplicationErrorFailureLevels = true
   172  		dst.applicationError = *src.ApplicationError
   173  	}
   174  	if level := src.ServerError; level != nil {
   175  		dst.serverError = *src.ServerError
   176  	}
   177  	if level := src.ClientError; level != nil {
   178  		dst.clientError = *src.ClientError
   179  	}
   180  }
   181  
   182  // Handle implements middleware.UnaryInbound.
   183  func (m *Middleware) Handle(ctx context.Context, req *transport.Request, w transport.ResponseWriter, h transport.UnaryHandler) error {
   184  	call := m.graph.begin(ctx, transport.Unary, _directionInbound, req)
   185  	defer m.handlePanicForCall(call, transport.Unary)
   186  
   187  	wrappedWriter := newWriter(w)
   188  	err := h.Handle(ctx, req, wrappedWriter)
   189  	ctxErr := ctxErrOverride(ctx, req)
   190  
   191  	call.EndHandleWithAppError(
   192  		callResult{
   193  			err:                  err,
   194  			ctxOverrideErr:       ctxErr,
   195  			isApplicationError:   wrappedWriter.isApplicationError,
   196  			applicationErrorMeta: wrappedWriter.applicationErrorMeta,
   197  			requestSize:          req.BodySize,
   198  			responseSize:         wrappedWriter.responseSize,
   199  		})
   200  
   201  	if ctxErr != nil {
   202  		err = ctxErr
   203  	}
   204  	wrappedWriter.free()
   205  	return err
   206  }
   207  
   208  // Call implements middleware.UnaryOutbound.
   209  func (m *Middleware) Call(ctx context.Context, req *transport.Request, out transport.UnaryOutbound) (*transport.Response, error) {
   210  	call := m.graph.begin(ctx, transport.Unary, _directionOutbound, req)
   211  	defer m.handlePanicForCall(call, transport.Unary)
   212  
   213  	res, err := out.Call(ctx, req)
   214  
   215  	isApplicationError := false
   216  	var applicationErrorMeta *transport.ApplicationErrorMeta
   217  	var responseSize int
   218  	if res != nil {
   219  		isApplicationError = res.ApplicationError
   220  		applicationErrorMeta = res.ApplicationErrorMeta
   221  		responseSize = res.BodySize
   222  	}
   223  	callRes := callResult{
   224  		err:                  err,
   225  		isApplicationError:   isApplicationError,
   226  		applicationErrorMeta: applicationErrorMeta,
   227  		requestSize:          req.BodySize,
   228  		responseSize:         responseSize,
   229  	}
   230  	call.EndCallWithAppError(callRes)
   231  	return res, err
   232  }
   233  
   234  // HandleOneway implements middleware.OnewayInbound.
   235  func (m *Middleware) HandleOneway(ctx context.Context, req *transport.Request, h transport.OnewayHandler) error {
   236  	call := m.graph.begin(ctx, transport.Oneway, _directionInbound, req)
   237  	defer m.handlePanicForCall(call, transport.Oneway)
   238  
   239  	err := h.HandleOneway(ctx, req)
   240  	call.End(callResult{err: err, requestSize: req.BodySize})
   241  	return err
   242  }
   243  
   244  // CallOneway implements middleware.OnewayOutbound.
   245  func (m *Middleware) CallOneway(ctx context.Context, req *transport.Request, out transport.OnewayOutbound) (transport.Ack, error) {
   246  	call := m.graph.begin(ctx, transport.Oneway, _directionOutbound, req)
   247  	defer m.handlePanicForCall(call, transport.Oneway)
   248  
   249  	ack, err := out.CallOneway(ctx, req)
   250  	call.End(callResult{err: err, requestSize: req.BodySize})
   251  	return ack, err
   252  }
   253  
   254  // HandleStream implements middleware.StreamInbound.
   255  func (m *Middleware) HandleStream(serverStream *transport.ServerStream, h transport.StreamHandler) error {
   256  	call := m.graph.begin(serverStream.Context(), transport.Streaming, _directionInbound, serverStream.Request().Meta.ToRequest())
   257  	defer m.handlePanicForCall(call, transport.Streaming)
   258  
   259  	call.EndStreamHandshake()
   260  	err := h.HandleStream(call.WrapServerStream(serverStream))
   261  	call.EndStream(err)
   262  	return err
   263  }
   264  
   265  // CallStream implements middleware.StreamOutbound.
   266  func (m *Middleware) CallStream(ctx context.Context, request *transport.StreamRequest, out transport.StreamOutbound) (*transport.ClientStream, error) {
   267  	call := m.graph.begin(ctx, transport.Streaming, _directionOutbound, request.Meta.ToRequest())
   268  	defer m.handlePanicForCall(call, transport.Streaming)
   269  
   270  	// TODO: metrics for outbound stream and inbound stream are not reported
   271  	// in the same way. HandleStream increases the metric calls by 1 before passing
   272  	// the request while CallStream does it after passing the request.
   273  	// In case of panics in CallStream, the metrics calls will not be increased
   274  	// while it will in HandleStream.
   275  	clientStream, err := out.CallStream(ctx, request)
   276  	call.EndStreamHandshakeWithError(err)
   277  	if err != nil {
   278  		return nil, err
   279  	}
   280  	return call.WrapClientStream(clientStream), nil
   281  }
   282  
   283  func ctxErrOverride(ctx context.Context, req *transport.Request) (ctxErr error) {
   284  	if ctx.Err() == context.DeadlineExceeded {
   285  		return yarpcerrors.DeadlineExceededErrorf(
   286  			"call to procedure %q of service %q from caller %q timed out",
   287  			req.Procedure, req.Service, req.Caller)
   288  	}
   289  
   290  	if ctx.Err() == context.Canceled {
   291  		return yarpcerrors.CancelledErrorf(
   292  			"call to procedure %q of service %q from caller %q was canceled",
   293  			req.Procedure, req.Service, req.Caller)
   294  	}
   295  
   296  	return nil
   297  }
   298  
   299  // handlePanicForCall checks for a panic without actually recovering from it
   300  // it must be called in defer otherwise recover will act as a no-op
   301  // The only action this method takes is to emit panic metrics.
   302  func (m *Middleware) handlePanicForCall(call call, transportType transport.Type) {
   303  	// We only want to emit panic metrics without actually recovering from it
   304  	// Actual recovery from a panic happens at top of the stack in transport's Handler Invoker
   305  	// As this middleware is the one and only one with Metrics responsibility, we just panic again after
   306  	// checking for panic without actually recovering from it.
   307  	if e := recover(); e != nil {
   308  		err := yarpcerrors.InternalErrorf("panic %v", e)
   309  		// Emit only the panic metrics
   310  		if transportType == transport.Streaming {
   311  			call.EndStreamWithPanic(err)
   312  		} else {
   313  			call.EndWithPanic(err)
   314  		}
   315  		panic(e)
   316  	}
   317  }