github.com/weaveworks/common@v0.0.0-20230728070032-dd9e68f319d5/instrument/instrument.go (about) 1 package instrument 2 3 import ( 4 "context" 5 "time" 6 7 "github.com/opentracing/opentracing-go" 8 "github.com/opentracing/opentracing-go/ext" 9 otlog "github.com/opentracing/opentracing-go/log" 10 "github.com/prometheus/client_golang/prometheus" 11 oldcontext "golang.org/x/net/context" 12 13 "github.com/weaveworks/common/grpc" 14 "github.com/weaveworks/common/tracing" 15 "github.com/weaveworks/common/user" 16 ) 17 18 // DefBuckets are histogram buckets for the response time (in seconds) 19 // of a network service, including one that is responding very slowly. 20 var DefBuckets = []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10, 25, 50, 100} 21 22 // Collector describes something that collects data before and/or after a task. 23 type Collector interface { 24 Register() 25 Before(ctx context.Context, method string, start time.Time) 26 After(ctx context.Context, method, statusCode string, start time.Time) 27 } 28 29 // HistogramCollector collects the duration of a request 30 type HistogramCollector struct { 31 metric *prometheus.HistogramVec 32 } 33 34 // HistogramCollectorBuckets define the buckets when passing the metric 35 var HistogramCollectorBuckets = []string{"operation", "status_code"} 36 37 // NewHistogramCollectorFromOpts creates a Collector from histogram options. 38 // It makes sure that the buckets are named properly and should be preferred over 39 // NewHistogramCollector(). 40 func NewHistogramCollectorFromOpts(opts prometheus.HistogramOpts) *HistogramCollector { 41 metric := prometheus.NewHistogramVec(opts, HistogramCollectorBuckets) 42 return &HistogramCollector{metric} 43 } 44 45 // NewHistogramCollector creates a Collector from a metric. 46 func NewHistogramCollector(metric *prometheus.HistogramVec) *HistogramCollector { 47 return &HistogramCollector{metric} 48 } 49 50 // Register registers metrics. 51 func (c *HistogramCollector) Register() { 52 prometheus.MustRegister(c.metric) 53 } 54 55 // Before collects for the upcoming request. 56 func (c *HistogramCollector) Before(ctx context.Context, method string, start time.Time) { 57 } 58 59 // After collects when the request is done. 60 func (c *HistogramCollector) After(ctx context.Context, method, statusCode string, start time.Time) { 61 if c.metric != nil { 62 ObserveWithExemplar(ctx, c.metric.WithLabelValues(method, statusCode), time.Since(start).Seconds()) 63 } 64 } 65 66 // ObserveWithExemplar adds a sample to a histogram, and adds an exemplar if the context has a sampled trace. 67 // 'histogram' parameter must be castable to prometheus.ExemplarObserver or function will panic 68 // (this will always work for a HistogramVec). 69 func ObserveWithExemplar(ctx context.Context, histogram prometheus.Observer, seconds float64) { 70 if traceID, ok := tracing.ExtractSampledTraceID(ctx); ok { 71 histogram.(prometheus.ExemplarObserver).ObserveWithExemplar( 72 seconds, 73 prometheus.Labels{"traceID": traceID}, 74 ) 75 return 76 } 77 histogram.Observe(seconds) 78 } 79 80 // JobCollector collects metrics for jobs. Designed for batch jobs which run on a regular, 81 // not-too-frequent, non-overlapping interval. We can afford to measure duration directly 82 // with gauges, and compute quantile with quantile_over_time. 83 type JobCollector struct { 84 start, end, duration *prometheus.GaugeVec 85 started, completed *prometheus.CounterVec 86 } 87 88 // NewJobCollector instantiates JobCollector which creates its metrics. 89 func NewJobCollector(namespace string) *JobCollector { 90 return &JobCollector{ 91 start: prometheus.NewGaugeVec(prometheus.GaugeOpts{ 92 Namespace: namespace, 93 Subsystem: "job", 94 Name: "latest_start_timestamp", 95 Help: "Unix UTC timestamp of most recent job start time", 96 }, []string{"operation"}), 97 end: prometheus.NewGaugeVec(prometheus.GaugeOpts{ 98 Namespace: namespace, 99 Subsystem: "job", 100 Name: "latest_end_timestamp", 101 Help: "Unix UTC timestamp of most recent job end time", 102 }, []string{"operation", "status_code"}), 103 duration: prometheus.NewGaugeVec(prometheus.GaugeOpts{ 104 Namespace: namespace, 105 Subsystem: "job", 106 Name: "latest_duration_seconds", 107 Help: "duration of most recent job", 108 }, []string{"operation", "status_code"}), 109 started: prometheus.NewCounterVec(prometheus.CounterOpts{ 110 Namespace: namespace, 111 Subsystem: "job", 112 Name: "started_total", 113 Help: "Number of jobs started", 114 }, []string{"operation"}), 115 completed: prometheus.NewCounterVec(prometheus.CounterOpts{ 116 Namespace: namespace, 117 Subsystem: "job", 118 Name: "completed_total", 119 Help: "Number of jobs completed", 120 }, []string{"operation", "status_code"}), 121 } 122 } 123 124 // Register registers metrics. 125 func (c *JobCollector) Register() { 126 prometheus.MustRegister(c.start) 127 prometheus.MustRegister(c.end) 128 prometheus.MustRegister(c.duration) 129 prometheus.MustRegister(c.started) 130 prometheus.MustRegister(c.completed) 131 } 132 133 // Before collects for the upcoming request. 134 func (c *JobCollector) Before(ctx context.Context, method string, start time.Time) { 135 c.start.WithLabelValues(method).Set(float64(start.UTC().Unix())) 136 c.started.WithLabelValues(method).Inc() 137 } 138 139 // After collects when the request is done. 140 func (c *JobCollector) After(ctx context.Context, method, statusCode string, start time.Time) { 141 end := time.Now() 142 c.end.WithLabelValues(method, statusCode).Set(float64(end.UTC().Unix())) 143 c.duration.WithLabelValues(method, statusCode).Set(end.Sub(start).Seconds()) 144 c.completed.WithLabelValues(method, statusCode).Inc() 145 } 146 147 // CollectedRequest runs a tracked request. It uses the given Collector to monitor requests. 148 // 149 // If `f` returns no error we log "200" as status code, otherwise "500". Pass in a function 150 // for `toStatusCode` to overwrite this behaviour. It will also emit an OpenTracing span if 151 // you have a global tracer configured. 152 func CollectedRequest(ctx context.Context, method string, col Collector, toStatusCode func(error) string, f func(context.Context) error) error { 153 if toStatusCode == nil { 154 toStatusCode = ErrorCode 155 } 156 sp, newCtx := opentracing.StartSpanFromContext(ctx, method) 157 ext.SpanKindRPCClient.Set(sp) 158 if userID, err := user.ExtractUserID(ctx); err == nil { 159 sp.SetTag("user", userID) 160 } 161 if orgID, err := user.ExtractOrgID(ctx); err == nil { 162 sp.SetTag("organization", orgID) 163 } 164 165 start := time.Now() 166 col.Before(newCtx, method, start) 167 err := f(newCtx) 168 col.After(newCtx, method, toStatusCode(err), start) 169 170 if err != nil { 171 if !grpc.IsCanceled(err) { 172 ext.Error.Set(sp, true) 173 } 174 sp.LogFields(otlog.Error(err)) 175 } 176 sp.Finish() 177 178 return err 179 } 180 181 // ErrorCode converts an error into an HTTP status code 182 func ErrorCode(err error) string { 183 if err == nil { 184 return "200" 185 } 186 return "500" 187 } 188 189 // TimeRequestHistogram runs 'f' and records how long it took in the given Prometheus 190 // histogram metric. If 'f' returns successfully, record a "200". Otherwise, record 191 // "500". It will also emit an OpenTracing span if you have a global tracer configured. 192 // 193 // Deprecated: Use CollectedRequest() 194 func TimeRequestHistogram(ctx oldcontext.Context, method string, metric *prometheus.HistogramVec, f func(context.Context) error) error { 195 return CollectedRequest(ctx, method, NewHistogramCollector(metric), ErrorCode, f) 196 } 197 198 // TimeRequestHistogramStatus runs 'f' and records how long it took in the given Prometheus 199 // histogram metric. If 'f' returns successfully, record a "200". Otherwise, record 200 // "500". It will also emit an OpenTracing span if you have a global tracer configured. 201 // 202 // Deprecated: Use CollectedRequest() 203 func TimeRequestHistogramStatus(ctx oldcontext.Context, method string, metric *prometheus.HistogramVec, toStatusCode func(error) string, f func(context.Context) error) error { 204 return CollectedRequest(ctx, method, NewHistogramCollector(metric), toStatusCode, f) 205 }