github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/storage/metrics/metrics.go (about) 1 package metrics 2 3 import ( 4 "path" 5 "runtime" 6 "strings" 7 "sync" 8 "time" 9 10 units "github.com/docker/go-units" 11 "github.com/prometheus/client_golang/prometheus" 12 ) 13 14 // TODO The metrics code should probably be reorganized at some point. 15 // The current setup provides an easy way to collect metrics for both external and internal PFS/Storage APIs. 16 17 type metrics struct { 18 requestCounter *prometheus.CounterVec 19 requestSummary, requestSummaryThroughput *prometheus.SummaryVec 20 } 21 22 var ( 23 subsystems = make(map[string]*metrics) 24 mu sync.Mutex 25 ) 26 27 const ( 28 trimPrefix = "github.com/pachyderm/pachyderm/src/" 29 ) 30 31 // ReportRequest reports a request to Prometheus. 32 // This function automatically registers a metric (if one does not already 33 // exist) with the default register. 34 // The calling function's package name is used as the subsystem name and the 35 // function name is used for the operation label. 36 // This function also labels the request as successful or not, and records 37 // the time spent in a separate metric. 38 func ReportRequest(f func() error, skip ...int) (retErr error) { 39 ci := retrieveCallInfo(skip...) 40 ms, err := maybeRegisterSubsystem(ci.packageName) 41 if err != nil { 42 return err 43 } 44 operation := ci.funcName 45 start := time.Now() 46 defer func() { 47 result := "success" 48 if retErr != nil { 49 result = retErr.Error() 50 } 51 ms.requestCounter.WithLabelValues(operation, result).Inc() 52 ms.requestSummary.WithLabelValues(operation).Observe(time.Since(start).Seconds()) 53 }() 54 return f() 55 } 56 57 // ReportRequestWithThroughput functions the same as ReportRequest, but also 58 // reports the throughput in a separate metric. 59 func ReportRequestWithThroughput(f func() (int64, error)) error { 60 ci := retrieveCallInfo() 61 ms, err := maybeRegisterSubsystem(ci.packageName) 62 if err != nil { 63 return err 64 } 65 operation := ci.funcName 66 start := time.Now() 67 return ReportRequest(func() error { 68 bytesProcessed, err := f() 69 throughput := float64(bytesProcessed) / units.MB / time.Since(start).Seconds() 70 ms.requestSummaryThroughput.WithLabelValues(operation).Observe(throughput) 71 return err 72 }, 1) 73 } 74 75 type callInfo struct { 76 packageName string 77 fileName string 78 funcName string 79 line int 80 } 81 82 func retrieveCallInfo(skip ...int) *callInfo { 83 skipFrames := 2 84 if len(skip) > 0 { 85 skipFrames += skip[0] 86 } 87 pc, file, line, _ := runtime.Caller(skipFrames) 88 _, fileName := path.Split(file) 89 parts := strings.Split(runtime.FuncForPC(pc).Name(), ".") 90 pl := len(parts) 91 packageName := "" 92 funcName := parts[pl-1] 93 94 if parts[pl-2][0] == '(' { 95 funcName = parts[pl-2] + "." + funcName 96 packageName = strings.Join(parts[0:pl-2], ".") 97 } else { 98 packageName = strings.Join(parts[0:pl-1], ".") 99 } 100 101 return &callInfo{ 102 packageName: packageName, 103 fileName: fileName, 104 funcName: funcName, 105 line: line, 106 } 107 } 108 109 func maybeRegisterSubsystem(packageName string) (*metrics, error) { 110 subsystem := strings.ReplaceAll(strings.TrimPrefix(packageName, trimPrefix), "/", "_") 111 mu.Lock() 112 defer mu.Unlock() 113 if ms, ok := subsystems[subsystem]; ok { 114 return ms, nil 115 } 116 err := register(subsystem) 117 return subsystems[subsystem], err 118 } 119 120 func register(subsystem string) error { 121 ms := &metrics{ 122 requestCounter: newRequestCounter(subsystem), 123 requestSummary: newRequestSummary(subsystem), 124 requestSummaryThroughput: newRequestSummaryThroughput(subsystem), 125 } 126 for _, m := range []prometheus.Collector{ 127 ms.requestCounter, 128 ms.requestSummary, 129 ms.requestSummaryThroughput, 130 } { 131 if err := prometheus.Register(m); err != nil { 132 return err 133 } 134 } 135 subsystems[subsystem] = ms 136 return nil 137 } 138 139 func newRequestCounter(subsystem string) *prometheus.CounterVec { 140 return prometheus.NewCounterVec( 141 prometheus.CounterOpts{ 142 Namespace: "pachyderm", 143 Subsystem: subsystem, 144 Name: "request_results", 145 Help: subsystem + " operations, count by operation and result type", 146 }, 147 []string{"operation", "result"}, 148 ) 149 } 150 151 func newRequestSummary(subsystem string) *prometheus.SummaryVec { 152 return prometheus.NewSummaryVec( 153 prometheus.SummaryOpts{ 154 Namespace: "pachyderm", 155 Subsystem: subsystem, 156 Name: "request_time", 157 Help: "time spent on " + subsystem + " operations, histogram by duration (seconds)", 158 }, 159 []string{"operation"}, 160 ) 161 } 162 163 func newRequestSummaryThroughput(subsystem string) *prometheus.SummaryVec { 164 return prometheus.NewSummaryVec( 165 prometheus.SummaryOpts{ 166 Namespace: "pachyderm", 167 Subsystem: subsystem, 168 Name: "request_throughput", 169 Help: "throughput of " + subsystem + " operations, histogram by throughput (MB/s)", 170 }, 171 []string{"operation"}, 172 ) 173 }