github.com/wfusion/gofusion@v1.1.14/common/infra/asynq/asynqmon/metrics_handler.go (about) 1 package asynqmon 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "io" 7 "net/http" 8 "net/url" 9 "strconv" 10 "strings" 11 "time" 12 ) 13 14 type getMetricsResponse struct { 15 QueueSize *json.RawMessage `json:"queue_size"` 16 QueueLatency *json.RawMessage `json:"queue_latency_seconds"` 17 QueueMemUsgApprox *json.RawMessage `json:"queue_memory_usage_approx_bytes"` 18 ProcessedPerSecond *json.RawMessage `json:"tasks_processed_per_second"` 19 FailedPerSecond *json.RawMessage `json:"tasks_failed_per_second"` 20 ErrorRate *json.RawMessage `json:"error_rate"` 21 PendingTasksByQueue *json.RawMessage `json:"pending_tasks_by_queue"` 22 RetryTasksByQueue *json.RawMessage `json:"retry_tasks_by_queue"` 23 ArchivedTasksByQueue *json.RawMessage `json:"archived_tasks_by_queue"` 24 } 25 26 type metricsFetchOptions struct { 27 // Specifies the number of seconds to scan for metrics. 28 duration time.Duration 29 30 // Specifies the end time when fetching metrics. 31 endTime time.Time 32 33 // Optional filter to speicify a list of queues to get metrics for. 34 // Empty list indicates no filter (i.e. get metrics for all queues). 35 queues []string 36 } 37 38 func newGetMetricsHandlerFunc(client *http.Client, prometheusAddr string) http.HandlerFunc { 39 // res is the result of calling a JSON API endpoint. 40 type res struct { 41 query string 42 msg *json.RawMessage 43 err error 44 } 45 46 // List of PromQLs. 47 // Strings are used as template to optionally insert queue filter specified by QUEUE_FILTER. 48 const ( 49 promQLQueueSize = "asynq_queue_size{QUEUE_FILTER}" 50 promQLQueueLatency = "asynq_queue_latency_seconds{QUEUE_FILTER}" 51 promQLMemUsage = "asynq_queue_memory_usage_approx_bytes{QUEUE_FILTER}" 52 promQLProcessedTasks = "rate(asynq_tasks_processed_total{QUEUE_FILTER}[5m])" 53 promQLFailedTasks = "rate(asynq_tasks_failed_total{QUEUE_FILTER}[5m])" 54 promQLErrorRate = "rate(asynq_tasks_failed_total{QUEUE_FILTER}[5m]) / rate(asynq_tasks_processed_total{QUEUE_FILTER}[5m])" 55 promQLPendingTasks = "asynq_tasks_enqueued_total{state=\"pending\",QUEUE_FILTER}" 56 promQLRetryTasks = "asynq_tasks_enqueued_total{state=\"retry\",QUEUE_FILTER}" 57 promQLArchivedTasks = "asynq_tasks_enqueued_total{state=\"archived\",QUEUE_FILTER}" 58 ) 59 60 // Optional query params: 61 // `duration_sec`: specifies the number of seconds to scan 62 // `end_time`: specifies the end_time in Unix time seconds 63 return func(w http.ResponseWriter, r *http.Request) { 64 opts, err := extractMetricsFetchOptions(r) 65 if err != nil { 66 http.Error(w, fmt.Sprintf("invalid query parameter: %v", err), http.StatusBadRequest) 67 return 68 } 69 // List of queries (i.e. promQL) to send to prometheus server. 70 queries := []string{ 71 promQLQueueSize, 72 promQLQueueLatency, 73 promQLMemUsage, 74 promQLProcessedTasks, 75 promQLFailedTasks, 76 promQLErrorRate, 77 promQLPendingTasks, 78 promQLRetryTasks, 79 promQLArchivedTasks, 80 } 81 resp := getMetricsResponse{} 82 // Make multiple API calls concurrently 83 n := len(queries) 84 ch := make(chan res, len(queries)) 85 for _, q := range queries { 86 go func(q string) { 87 url := buildPrometheusURL(prometheusAddr, q, opts) 88 msg, err := fetchPrometheusMetrics(client, url) 89 ch <- res{q, msg, err} 90 }(q) 91 } 92 for r := range ch { 93 n-- 94 if r.err != nil { 95 http.Error(w, fmt.Sprintf("failed to fetch %q: %v", r.query, r.err), http.StatusInternalServerError) 96 return 97 } 98 switch r.query { 99 case promQLQueueSize: 100 resp.QueueSize = r.msg 101 case promQLQueueLatency: 102 resp.QueueLatency = r.msg 103 case promQLMemUsage: 104 resp.QueueMemUsgApprox = r.msg 105 case promQLProcessedTasks: 106 resp.ProcessedPerSecond = r.msg 107 case promQLFailedTasks: 108 resp.FailedPerSecond = r.msg 109 case promQLErrorRate: 110 resp.ErrorRate = r.msg 111 case promQLPendingTasks: 112 resp.PendingTasksByQueue = r.msg 113 case promQLRetryTasks: 114 resp.RetryTasksByQueue = r.msg 115 case promQLArchivedTasks: 116 resp.ArchivedTasksByQueue = r.msg 117 } 118 if n == 0 { 119 break // fetched all metrics 120 } 121 } 122 bytes, err := json.Marshal(resp) 123 if err != nil { 124 http.Error(w, fmt.Sprintf("failed to marshal response into JSON: %v", err), http.StatusInternalServerError) 125 return 126 } 127 if _, err := w.Write(bytes); err != nil { 128 http.Error(w, fmt.Sprintf("failed to write to response: %v", err), http.StatusInternalServerError) 129 return 130 } 131 } 132 } 133 134 const prometheusAPIPath = "/api/v1/query_range" 135 136 func extractMetricsFetchOptions(r *http.Request) (*metricsFetchOptions, error) { 137 opts := &metricsFetchOptions{ 138 duration: 60 * time.Minute, 139 endTime: time.Now(), 140 } 141 q := r.URL.Query() 142 if d := q.Get("duration"); d != "" { 143 val, err := strconv.Atoi(d) 144 if err != nil { 145 return nil, fmt.Errorf("invalid value provided for duration: %q", d) 146 } 147 opts.duration = time.Duration(val) * time.Second 148 } 149 if t := q.Get("endtime"); t != "" { 150 val, err := strconv.Atoi(t) 151 if err != nil { 152 return nil, fmt.Errorf("invalid value provided for end_time: %q", t) 153 } 154 opts.endTime = time.Unix(int64(val), 0) 155 } 156 if qs := q.Get("queues"); qs != "" { 157 opts.queues = strings.Split(qs, ",") 158 } 159 return opts, nil 160 } 161 162 func buildPrometheusURL(baseAddr, promQL string, opts *metricsFetchOptions) string { 163 var b strings.Builder 164 b.WriteString(strings.TrimSuffix(baseAddr, "/")) 165 b.WriteString(prometheusAPIPath) 166 v := url.Values{} 167 v.Add("query", applyQueueFilter(promQL, opts.queues)) 168 v.Add("start", unixTimeString(opts.endTime.Add(-opts.duration))) 169 v.Add("end", unixTimeString(opts.endTime)) 170 v.Add("step", strconv.Itoa(int(step(opts).Seconds()))) 171 b.WriteString("?") 172 b.WriteString(v.Encode()) 173 return b.String() 174 } 175 176 func applyQueueFilter(promQL string, qnames []string) string { 177 if len(qnames) == 0 { 178 return strings.ReplaceAll(promQL, "QUEUE_FILTER", "") 179 } 180 var b strings.Builder 181 b.WriteString(`queue=~"`) 182 for i, q := range qnames { 183 if i != 0 { 184 b.WriteString("|") 185 } 186 b.WriteString(q) 187 } 188 b.WriteByte('"') 189 return strings.ReplaceAll(promQL, "QUEUE_FILTER", b.String()) 190 } 191 192 func fetchPrometheusMetrics(client *http.Client, url string) (*json.RawMessage, error) { 193 resp, err := client.Get(url) 194 if err != nil { 195 return nil, err 196 } 197 defer resp.Body.Close() 198 bytes, err := io.ReadAll(resp.Body) 199 if err != nil { 200 return nil, err 201 } 202 msg := json.RawMessage(bytes) 203 return &msg, err 204 } 205 206 // Returns step to use given the fetch options. 207 // In general, the longer the duration, longer the each step. 208 func step(opts *metricsFetchOptions) time.Duration { 209 if opts.duration <= 6*time.Hour { 210 // maximum number of data points to return: 6h / 10s = 2160 211 return 10 * time.Second 212 } 213 if opts.duration <= 24*time.Hour { 214 // maximum number of data points to return: 24h / 1m = 1440 215 return 1 * time.Minute 216 } 217 if opts.duration <= 8*24*time.Hour { 218 // maximum number of data points to return: (8*24)h / 3m = 3840 219 return 3 * time.Minute 220 } 221 if opts.duration <= 30*24*time.Hour { 222 // maximum number of data points to return: (30*24)h / 10m = 4320 223 return 10 * time.Minute 224 } 225 return opts.duration / 3000 226 } 227 228 func unixTimeString(t time.Time) string { 229 return strconv.Itoa(int(t.Unix())) 230 }