go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/grpc/grpcmon/server.go (about)

     1  // Copyright 2016 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package grpcmon
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"time"
    21  
    22  	gcode "google.golang.org/genproto/googleapis/rpc/code"
    23  	"google.golang.org/grpc"
    24  	"google.golang.org/grpc/codes"
    25  	"google.golang.org/grpc/status"
    26  
    27  	"go.chromium.org/luci/common/clock"
    28  	"go.chromium.org/luci/common/tsmon/distribution"
    29  	"go.chromium.org/luci/common/tsmon/field"
    30  	"go.chromium.org/luci/common/tsmon/metric"
    31  	"go.chromium.org/luci/common/tsmon/types"
    32  )
    33  
    34  var (
    35  	grpcServerCount = metric.NewCounter(
    36  		"grpc/server/count",
    37  		"Total number of RPCs.",
    38  		nil,
    39  		field.String("method"),         // full name of the grpc method
    40  		field.Int("code"),              // status.Code of the result
    41  		field.String("canonical_code")) // String representation of the code above
    42  
    43  	grpcServerDuration = metric.NewCumulativeDistribution(
    44  		"grpc/server/duration",
    45  		"Distribution of server-side RPC duration (in milliseconds).",
    46  		&types.MetricMetadata{Units: types.Milliseconds},
    47  		distribution.DefaultBucketer,
    48  		field.String("method"),         // full name of the grpc method
    49  		field.Int("code"),              // status.Code of the result
    50  		field.String("canonical_code")) // String representation of the code above
    51  )
    52  
    53  // UnaryServerInterceptor is a grpc.UnaryServerInterceptor that gathers RPC
    54  // handler metrics and sends them to tsmon.
    55  //
    56  // It assumes the RPC context has tsmon initialized already.
    57  func UnaryServerInterceptor(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp any, err error) {
    58  	started := clock.Now(ctx)
    59  	panicking := true
    60  	defer func() {
    61  		// We don't want to recover anything, but we want to log Internal error
    62  		// in case of a panic. We pray here reportServerRPCMetrics is very
    63  		// lightweight and it doesn't panic itself.
    64  		code := codes.OK
    65  		switch {
    66  		case err != nil:
    67  			code = status.Code(err)
    68  		case panicking:
    69  			code = codes.Internal
    70  		}
    71  		reportServerRPCMetrics(ctx, info.FullMethod, code, clock.Now(ctx).Sub(started))
    72  	}()
    73  	resp, err = handler(ctx, req)
    74  	panicking = false // normal exit, no panic happened, disarms defer
    75  	return
    76  }
    77  
    78  // StreamServerInterceptor is a grpc.StreamServerInterceptor that gathers RPC
    79  // handler metrics and sends them to tsmon.
    80  //
    81  // It assumes the RPC context has tsmon initialized already.
    82  //
    83  // TODO(vadimsh): Report the number of messages streamed. This will make this
    84  // interceptor sufficiently different from UnaryServerInterceptor. That's the
    85  // reason there's no UnifiedServerInterceptor exposed, even though right now
    86  // implementations of unary and stream interceptors are identical.
    87  func StreamServerInterceptor(srv any, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) (err error) {
    88  	ctx := ss.Context()
    89  	started := clock.Now(ctx)
    90  	panicking := true
    91  	defer func() {
    92  		// We don't want to recover anything, but we want to log Internal error
    93  		// in case of a panic. We pray here reportServerRPCMetrics is very
    94  		// lightweight and it doesn't panic itself.
    95  		code := codes.OK
    96  		switch {
    97  		case err != nil:
    98  			code = status.Code(err)
    99  		case panicking:
   100  			code = codes.Internal
   101  		}
   102  		reportServerRPCMetrics(ctx, info.FullMethod, code, clock.Now(ctx).Sub(started))
   103  	}()
   104  	err = handler(srv, ss)
   105  	panicking = false // normal exit, no panic happened, disarms defer
   106  	return
   107  }
   108  
   109  // reportServerRPCMetrics sends metrics after RPC handler has finished.
   110  func reportServerRPCMetrics(ctx context.Context, method string, code codes.Code, dur time.Duration) {
   111  	canon, ok := gcode.Code_name[int32(code)]
   112  	if !ok {
   113  		canon = fmt.Sprintf("Code(%d)", int64(code))
   114  	}
   115  
   116  	grpcServerCount.Add(ctx, 1, method, int(code), canon)
   117  	grpcServerDuration.Add(ctx, float64(dur.Nanoseconds()/1e6), method, int(code), canon)
   118  }