github.com/weaveworks/common@v0.0.0-20230728070032-dd9e68f319d5/instrument/instrument.go

github.com/weaveworks/common@v0.0.0-20230728070032-dd9e68f319d5/instrument/instrument.go (about)

     1  package instrument
     2  
     3  import (
     4  	"context"
     5  	"time"
     6  
     7  	"github.com/opentracing/opentracing-go"
     8  	"github.com/opentracing/opentracing-go/ext"
     9  	otlog "github.com/opentracing/opentracing-go/log"
    10  	"github.com/prometheus/client_golang/prometheus"
    11  	oldcontext "golang.org/x/net/context"
    12  
    13  	"github.com/weaveworks/common/grpc"
    14  	"github.com/weaveworks/common/tracing"
    15  	"github.com/weaveworks/common/user"
    16  )
    17  
    18  // DefBuckets are histogram buckets for the response time (in seconds)
    19  // of a network service, including one that is responding very slowly.
    20  var DefBuckets = []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10, 25, 50, 100}
    21  
    22  // Collector describes something that collects data before and/or after a task.
    23  type Collector interface {
    24  	Register()
    25  	Before(ctx context.Context, method string, start time.Time)
    26  	After(ctx context.Context, method, statusCode string, start time.Time)
    27  }
    28  
    29  // HistogramCollector collects the duration of a request
    30  type HistogramCollector struct {
    31  	metric *prometheus.HistogramVec
    32  }
    33  
    34  // HistogramCollectorBuckets define the buckets when passing the metric
    35  var HistogramCollectorBuckets = []string{"operation", "status_code"}
    36  
    37  // NewHistogramCollectorFromOpts creates a Collector from histogram options.
    38  // It makes sure that the buckets are named properly and should be preferred over
    39  // NewHistogramCollector().
    40  func NewHistogramCollectorFromOpts(opts prometheus.HistogramOpts) *HistogramCollector {
    41  	metric := prometheus.NewHistogramVec(opts, HistogramCollectorBuckets)
    42  	return &HistogramCollector{metric}
    43  }
    44  
    45  // NewHistogramCollector creates a Collector from a metric.
    46  func NewHistogramCollector(metric *prometheus.HistogramVec) *HistogramCollector {
    47  	return &HistogramCollector{metric}
    48  }
    49  
    50  // Register registers metrics.
    51  func (c *HistogramCollector) Register() {
    52  	prometheus.MustRegister(c.metric)
    53  }
    54  
    55  // Before collects for the upcoming request.
    56  func (c *HistogramCollector) Before(ctx context.Context, method string, start time.Time) {
    57  }
    58  
    59  // After collects when the request is done.
    60  func (c *HistogramCollector) After(ctx context.Context, method, statusCode string, start time.Time) {
    61  	if c.metric != nil {
    62  		ObserveWithExemplar(ctx, c.metric.WithLabelValues(method, statusCode), time.Since(start).Seconds())
    63  	}
    64  }
    65  
    66  // ObserveWithExemplar adds a sample to a histogram, and adds an exemplar if the context has a sampled trace.
    67  // 'histogram' parameter must be castable to prometheus.ExemplarObserver or function will panic
    68  // (this will always work for a HistogramVec).
    69  func ObserveWithExemplar(ctx context.Context, histogram prometheus.Observer, seconds float64) {
    70  	if traceID, ok := tracing.ExtractSampledTraceID(ctx); ok {
    71  		histogram.(prometheus.ExemplarObserver).ObserveWithExemplar(
    72  			seconds,
    73  			prometheus.Labels{"traceID": traceID},
    74  		)
    75  		return
    76  	}
    77  	histogram.Observe(seconds)
    78  }
    79  
    80  // JobCollector collects metrics for jobs. Designed for batch jobs which run on a regular,
    81  // not-too-frequent, non-overlapping interval. We can afford to measure duration directly
    82  // with gauges, and compute quantile with quantile_over_time.
    83  type JobCollector struct {
    84  	start, end, duration *prometheus.GaugeVec
    85  	started, completed   *prometheus.CounterVec
    86  }
    87  
    88  // NewJobCollector instantiates JobCollector which creates its metrics.
    89  func NewJobCollector(namespace string) *JobCollector {
    90  	return &JobCollector{
    91  		start: prometheus.NewGaugeVec(prometheus.GaugeOpts{
    92  			Namespace: namespace,
    93  			Subsystem: "job",
    94  			Name:      "latest_start_timestamp",
    95  			Help:      "Unix UTC timestamp of most recent job start time",
    96  		}, []string{"operation"}),
    97  		end: prometheus.NewGaugeVec(prometheus.GaugeOpts{
    98  			Namespace: namespace,
    99  			Subsystem: "job",
   100  			Name:      "latest_end_timestamp",
   101  			Help:      "Unix UTC timestamp of most recent job end time",
   102  		}, []string{"operation", "status_code"}),
   103  		duration: prometheus.NewGaugeVec(prometheus.GaugeOpts{
   104  			Namespace: namespace,
   105  			Subsystem: "job",
   106  			Name:      "latest_duration_seconds",
   107  			Help:      "duration of most recent job",
   108  		}, []string{"operation", "status_code"}),
   109  		started: prometheus.NewCounterVec(prometheus.CounterOpts{
   110  			Namespace: namespace,
   111  			Subsystem: "job",
   112  			Name:      "started_total",
   113  			Help:      "Number of jobs started",
   114  		}, []string{"operation"}),
   115  		completed: prometheus.NewCounterVec(prometheus.CounterOpts{
   116  			Namespace: namespace,
   117  			Subsystem: "job",
   118  			Name:      "completed_total",
   119  			Help:      "Number of jobs completed",
   120  		}, []string{"operation", "status_code"}),
   121  	}
   122  }
   123  
   124  // Register registers metrics.
   125  func (c *JobCollector) Register() {
   126  	prometheus.MustRegister(c.start)
   127  	prometheus.MustRegister(c.end)
   128  	prometheus.MustRegister(c.duration)
   129  	prometheus.MustRegister(c.started)
   130  	prometheus.MustRegister(c.completed)
   131  }
   132  
   133  // Before collects for the upcoming request.
   134  func (c *JobCollector) Before(ctx context.Context, method string, start time.Time) {
   135  	c.start.WithLabelValues(method).Set(float64(start.UTC().Unix()))
   136  	c.started.WithLabelValues(method).Inc()
   137  }
   138  
   139  // After collects when the request is done.
   140  func (c *JobCollector) After(ctx context.Context, method, statusCode string, start time.Time) {
   141  	end := time.Now()
   142  	c.end.WithLabelValues(method, statusCode).Set(float64(end.UTC().Unix()))
   143  	c.duration.WithLabelValues(method, statusCode).Set(end.Sub(start).Seconds())
   144  	c.completed.WithLabelValues(method, statusCode).Inc()
   145  }
   146  
   147  // CollectedRequest runs a tracked request. It uses the given Collector to monitor requests.
   148  //
   149  // If `f` returns no error we log "200" as status code, otherwise "500". Pass in a function
   150  // for `toStatusCode` to overwrite this behaviour. It will also emit an OpenTracing span if
   151  // you have a global tracer configured.
   152  func CollectedRequest(ctx context.Context, method string, col Collector, toStatusCode func(error) string, f func(context.Context) error) error {
   153  	if toStatusCode == nil {
   154  		toStatusCode = ErrorCode
   155  	}
   156  	sp, newCtx := opentracing.StartSpanFromContext(ctx, method)
   157  	ext.SpanKindRPCClient.Set(sp)
   158  	if userID, err := user.ExtractUserID(ctx); err == nil {
   159  		sp.SetTag("user", userID)
   160  	}
   161  	if orgID, err := user.ExtractOrgID(ctx); err == nil {
   162  		sp.SetTag("organization", orgID)
   163  	}
   164  
   165  	start := time.Now()
   166  	col.Before(newCtx, method, start)
   167  	err := f(newCtx)
   168  	col.After(newCtx, method, toStatusCode(err), start)
   169  
   170  	if err != nil {
   171  		if !grpc.IsCanceled(err) {
   172  			ext.Error.Set(sp, true)
   173  		}
   174  		sp.LogFields(otlog.Error(err))
   175  	}
   176  	sp.Finish()
   177  
   178  	return err
   179  }
   180  
   181  // ErrorCode converts an error into an HTTP status code
   182  func ErrorCode(err error) string {
   183  	if err == nil {
   184  		return "200"
   185  	}
   186  	return "500"
   187  }
   188  
   189  // TimeRequestHistogram runs 'f' and records how long it took in the given Prometheus
   190  // histogram metric. If 'f' returns successfully, record a "200". Otherwise, record
   191  // "500".  It will also emit an OpenTracing span if you have a global tracer configured.
   192  //
   193  // Deprecated: Use CollectedRequest()
   194  func TimeRequestHistogram(ctx oldcontext.Context, method string, metric *prometheus.HistogramVec, f func(context.Context) error) error {
   195  	return CollectedRequest(ctx, method, NewHistogramCollector(metric), ErrorCode, f)
   196  }
   197  
   198  // TimeRequestHistogramStatus runs 'f' and records how long it took in the given Prometheus
   199  // histogram metric. If 'f' returns successfully, record a "200". Otherwise, record
   200  // "500".  It will also emit an OpenTracing span if you have a global tracer configured.
   201  //
   202  // Deprecated: Use CollectedRequest()
   203  func TimeRequestHistogramStatus(ctx oldcontext.Context, method string, metric *prometheus.HistogramVec, toStatusCode func(error) string, f func(context.Context) error) error {
   204  	return CollectedRequest(ctx, method, NewHistogramCollector(metric), toStatusCode, f)
   205  }