github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/ruler/api.go (about) 1 package ruler 2 3 import ( 4 "encoding/json" 5 "io/ioutil" 6 "net/http" 7 "net/url" 8 "sort" 9 "strconv" 10 "strings" 11 "time" 12 13 "github.com/go-kit/log" 14 "github.com/go-kit/log/level" 15 "github.com/gorilla/mux" 16 "github.com/pkg/errors" 17 v1 "github.com/prometheus/client_golang/api/prometheus/v1" 18 "github.com/prometheus/prometheus/pkg/labels" 19 "github.com/prometheus/prometheus/pkg/rulefmt" 20 "github.com/weaveworks/common/user" 21 "gopkg.in/yaml.v3" 22 23 "github.com/cortexproject/cortex/pkg/cortexpb" 24 "github.com/cortexproject/cortex/pkg/ruler/rulespb" 25 "github.com/cortexproject/cortex/pkg/ruler/rulestore" 26 "github.com/cortexproject/cortex/pkg/tenant" 27 util_log "github.com/cortexproject/cortex/pkg/util/log" 28 ) 29 30 // In order to reimplement the prometheus rules API, a large amount of code was copied over 31 // This is required because the prometheus api implementation does not allow us to return errors 32 // on rule lookups, which might fail in Cortex's case. 33 34 type response struct { 35 Status string `json:"status"` 36 Data interface{} `json:"data"` 37 ErrorType v1.ErrorType `json:"errorType"` 38 Error string `json:"error"` 39 } 40 41 // AlertDiscovery has info for all active alerts. 42 type AlertDiscovery struct { 43 Alerts []*Alert `json:"alerts"` 44 } 45 46 // Alert has info for an alert. 47 type Alert struct { 48 Labels labels.Labels `json:"labels"` 49 Annotations labels.Labels `json:"annotations"` 50 State string `json:"state"` 51 ActiveAt *time.Time `json:"activeAt"` 52 Value string `json:"value"` 53 } 54 55 // RuleDiscovery has info for all rules 56 type RuleDiscovery struct { 57 RuleGroups []*RuleGroup `json:"groups"` 58 } 59 60 // RuleGroup has info for rules which are part of a group 61 type RuleGroup struct { 62 Name string `json:"name"` 63 File string `json:"file"` 64 // In order to preserve rule ordering, while exposing type (alerting or recording) 65 // specific properties, both alerting and recording rules are exposed in the 66 // same array. 67 Rules []rule `json:"rules"` 68 Interval float64 `json:"interval"` 69 LastEvaluation time.Time `json:"lastEvaluation"` 70 EvaluationTime float64 `json:"evaluationTime"` 71 } 72 73 type rule interface{} 74 75 type alertingRule struct { 76 // State can be "pending", "firing", "inactive". 77 State string `json:"state"` 78 Name string `json:"name"` 79 Query string `json:"query"` 80 Duration float64 `json:"duration"` 81 Labels labels.Labels `json:"labels"` 82 Annotations labels.Labels `json:"annotations"` 83 Alerts []*Alert `json:"alerts"` 84 Health string `json:"health"` 85 LastError string `json:"lastError"` 86 Type v1.RuleType `json:"type"` 87 LastEvaluation time.Time `json:"lastEvaluation"` 88 EvaluationTime float64 `json:"evaluationTime"` 89 } 90 91 type recordingRule struct { 92 Name string `json:"name"` 93 Query string `json:"query"` 94 Labels labels.Labels `json:"labels"` 95 Health string `json:"health"` 96 LastError string `json:"lastError"` 97 Type v1.RuleType `json:"type"` 98 LastEvaluation time.Time `json:"lastEvaluation"` 99 EvaluationTime float64 `json:"evaluationTime"` 100 } 101 102 func respondError(logger log.Logger, w http.ResponseWriter, msg string) { 103 b, err := json.Marshal(&response{ 104 Status: "error", 105 ErrorType: v1.ErrServer, 106 Error: msg, 107 Data: nil, 108 }) 109 110 if err != nil { 111 level.Error(logger).Log("msg", "error marshaling json response", "err", err) 112 http.Error(w, err.Error(), http.StatusInternalServerError) 113 return 114 } 115 116 w.WriteHeader(http.StatusInternalServerError) 117 if n, err := w.Write(b); err != nil { 118 level.Error(logger).Log("msg", "error writing response", "bytesWritten", n, "err", err) 119 } 120 } 121 122 // API is used to handle HTTP requests for the ruler service 123 type API struct { 124 ruler *Ruler 125 store rulestore.RuleStore 126 127 logger log.Logger 128 } 129 130 // NewAPI returns a new API struct with the provided ruler and rule store 131 func NewAPI(r *Ruler, s rulestore.RuleStore, logger log.Logger) *API { 132 return &API{ 133 ruler: r, 134 store: s, 135 logger: logger, 136 } 137 } 138 139 func (a *API) PrometheusRules(w http.ResponseWriter, req *http.Request) { 140 logger := util_log.WithContext(req.Context(), a.logger) 141 userID, err := tenant.TenantID(req.Context()) 142 if err != nil || userID == "" { 143 level.Error(logger).Log("msg", "error extracting org id from context", "err", err) 144 respondError(logger, w, "no valid org id found") 145 return 146 } 147 148 w.Header().Set("Content-Type", "application/json") 149 rgs, err := a.ruler.GetRules(req.Context()) 150 151 if err != nil { 152 respondError(logger, w, err.Error()) 153 return 154 } 155 156 groups := make([]*RuleGroup, 0, len(rgs)) 157 158 for _, g := range rgs { 159 grp := RuleGroup{ 160 Name: g.Group.Name, 161 File: g.Group.Namespace, 162 Rules: make([]rule, len(g.ActiveRules)), 163 Interval: g.Group.Interval.Seconds(), 164 LastEvaluation: g.GetEvaluationTimestamp(), 165 EvaluationTime: g.GetEvaluationDuration().Seconds(), 166 } 167 168 for i, rl := range g.ActiveRules { 169 if g.ActiveRules[i].Rule.Alert != "" { 170 alerts := make([]*Alert, 0, len(rl.Alerts)) 171 for _, a := range rl.Alerts { 172 alerts = append(alerts, &Alert{ 173 Labels: cortexpb.FromLabelAdaptersToLabels(a.Labels), 174 Annotations: cortexpb.FromLabelAdaptersToLabels(a.Annotations), 175 State: a.GetState(), 176 ActiveAt: &a.ActiveAt, 177 Value: strconv.FormatFloat(a.Value, 'e', -1, 64), 178 }) 179 } 180 grp.Rules[i] = alertingRule{ 181 State: rl.GetState(), 182 Name: rl.Rule.GetAlert(), 183 Query: rl.Rule.GetExpr(), 184 Duration: rl.Rule.For.Seconds(), 185 Labels: cortexpb.FromLabelAdaptersToLabels(rl.Rule.Labels), 186 Annotations: cortexpb.FromLabelAdaptersToLabels(rl.Rule.Annotations), 187 Alerts: alerts, 188 Health: rl.GetHealth(), 189 LastError: rl.GetLastError(), 190 LastEvaluation: rl.GetEvaluationTimestamp(), 191 EvaluationTime: rl.GetEvaluationDuration().Seconds(), 192 Type: v1.RuleTypeAlerting, 193 } 194 } else { 195 grp.Rules[i] = recordingRule{ 196 Name: rl.Rule.GetRecord(), 197 Query: rl.Rule.GetExpr(), 198 Labels: cortexpb.FromLabelAdaptersToLabels(rl.Rule.Labels), 199 Health: rl.GetHealth(), 200 LastError: rl.GetLastError(), 201 LastEvaluation: rl.GetEvaluationTimestamp(), 202 EvaluationTime: rl.GetEvaluationDuration().Seconds(), 203 Type: v1.RuleTypeRecording, 204 } 205 } 206 } 207 groups = append(groups, &grp) 208 } 209 210 // keep data.groups are in order 211 sort.Slice(groups, func(i, j int) bool { 212 return groups[i].File < groups[j].File 213 }) 214 215 b, err := json.Marshal(&response{ 216 Status: "success", 217 Data: &RuleDiscovery{RuleGroups: groups}, 218 }) 219 if err != nil { 220 level.Error(logger).Log("msg", "error marshaling json response", "err", err) 221 respondError(logger, w, "unable to marshal the requested data") 222 return 223 } 224 w.Header().Set("Content-Type", "application/json") 225 w.WriteHeader(http.StatusOK) 226 if n, err := w.Write(b); err != nil { 227 level.Error(logger).Log("msg", "error writing response", "bytesWritten", n, "err", err) 228 } 229 } 230 231 func (a *API) PrometheusAlerts(w http.ResponseWriter, req *http.Request) { 232 logger := util_log.WithContext(req.Context(), a.logger) 233 userID, err := tenant.TenantID(req.Context()) 234 if err != nil || userID == "" { 235 level.Error(logger).Log("msg", "error extracting org id from context", "err", err) 236 respondError(logger, w, "no valid org id found") 237 return 238 } 239 240 w.Header().Set("Content-Type", "application/json") 241 rgs, err := a.ruler.GetRules(req.Context()) 242 243 if err != nil { 244 respondError(logger, w, err.Error()) 245 return 246 } 247 248 alerts := []*Alert{} 249 250 for _, g := range rgs { 251 for _, rl := range g.ActiveRules { 252 if rl.Rule.Alert != "" { 253 for _, a := range rl.Alerts { 254 alerts = append(alerts, &Alert{ 255 Labels: cortexpb.FromLabelAdaptersToLabels(a.Labels), 256 Annotations: cortexpb.FromLabelAdaptersToLabels(a.Annotations), 257 State: a.GetState(), 258 ActiveAt: &a.ActiveAt, 259 Value: strconv.FormatFloat(a.Value, 'e', -1, 64), 260 }) 261 } 262 } 263 } 264 } 265 266 b, err := json.Marshal(&response{ 267 Status: "success", 268 Data: &AlertDiscovery{Alerts: alerts}, 269 }) 270 if err != nil { 271 level.Error(logger).Log("msg", "error marshaling json response", "err", err) 272 respondError(logger, w, "unable to marshal the requested data") 273 return 274 } 275 w.Header().Set("Content-Type", "application/json") 276 w.WriteHeader(http.StatusOK) 277 if n, err := w.Write(b); err != nil { 278 level.Error(logger).Log("msg", "error writing response", "bytesWritten", n, "err", err) 279 } 280 } 281 282 var ( 283 // ErrNoNamespace signals that no namespace was specified in the request 284 ErrNoNamespace = errors.New("a namespace must be provided in the request") 285 // ErrNoGroupName signals a group name url parameter was not found 286 ErrNoGroupName = errors.New("a matching group name must be provided in the request") 287 // ErrNoRuleGroups signals the rule group requested does not exist 288 ErrNoRuleGroups = errors.New("no rule groups found") 289 // ErrBadRuleGroup is returned when the provided rule group can not be unmarshalled 290 ErrBadRuleGroup = errors.New("unable to decoded rule group") 291 ) 292 293 func marshalAndSend(output interface{}, w http.ResponseWriter, logger log.Logger) { 294 d, err := yaml.Marshal(&output) 295 if err != nil { 296 level.Error(logger).Log("msg", "error marshalling yaml rule groups", "err", err) 297 http.Error(w, err.Error(), http.StatusInternalServerError) 298 return 299 } 300 301 w.Header().Set("Content-Type", "application/yaml") 302 if _, err := w.Write(d); err != nil { 303 level.Error(logger).Log("msg", "error writing yaml response", "err", err) 304 return 305 } 306 } 307 308 func respondAccepted(w http.ResponseWriter, logger log.Logger) { 309 b, err := json.Marshal(&response{ 310 Status: "success", 311 }) 312 if err != nil { 313 level.Error(logger).Log("msg", "error marshaling json response", "err", err) 314 respondError(logger, w, "unable to marshal the requested data") 315 return 316 } 317 w.Header().Set("Content-Type", "application/json") 318 319 // Return a status accepted because the rule has been stored and queued for polling, but is not currently active 320 w.WriteHeader(http.StatusAccepted) 321 if n, err := w.Write(b); err != nil { 322 level.Error(logger).Log("msg", "error writing response", "bytesWritten", n, "err", err) 323 } 324 } 325 326 // parseNamespace parses the namespace from the provided set of params, in this 327 // api these params are derived from the url path 328 func parseNamespace(params map[string]string) (string, error) { 329 namespace, exists := params["namespace"] 330 if !exists { 331 return "", ErrNoNamespace 332 } 333 334 namespace, err := url.PathUnescape(namespace) 335 if err != nil { 336 return "", err 337 } 338 339 return namespace, nil 340 } 341 342 // parseGroupName parses the group name from the provided set of params, in this 343 // api these params are derived from the url path 344 func parseGroupName(params map[string]string) (string, error) { 345 groupName, exists := params["groupName"] 346 if !exists { 347 return "", ErrNoGroupName 348 } 349 350 groupName, err := url.PathUnescape(groupName) 351 if err != nil { 352 return "", err 353 } 354 355 return groupName, nil 356 } 357 358 // parseRequest parses the incoming request to parse out the userID, rules namespace, and rule group name 359 // and returns them in that order. It also allows users to require a namespace or group name and return 360 // an error if it they can not be parsed. 361 func parseRequest(req *http.Request, requireNamespace, requireGroup bool) (string, string, string, error) { 362 userID, err := tenant.TenantID(req.Context()) 363 if err != nil { 364 return "", "", "", user.ErrNoOrgID 365 } 366 367 vars := mux.Vars(req) 368 369 namespace, err := parseNamespace(vars) 370 if err != nil { 371 if err != ErrNoNamespace || requireNamespace { 372 return "", "", "", err 373 } 374 } 375 376 group, err := parseGroupName(vars) 377 if err != nil { 378 if err != ErrNoGroupName || requireGroup { 379 return "", "", "", err 380 } 381 } 382 383 return userID, namespace, group, nil 384 } 385 386 func (a *API) ListRules(w http.ResponseWriter, req *http.Request) { 387 logger := util_log.WithContext(req.Context(), a.logger) 388 389 userID, namespace, _, err := parseRequest(req, false, false) 390 if err != nil { 391 respondError(logger, w, err.Error()) 392 return 393 } 394 395 level.Debug(logger).Log("msg", "retrieving rule groups with namespace", "userID", userID, "namespace", namespace) 396 rgs, err := a.store.ListRuleGroupsForUserAndNamespace(req.Context(), userID, namespace) 397 if err != nil { 398 http.Error(w, err.Error(), http.StatusBadRequest) 399 return 400 } 401 402 if len(rgs) == 0 { 403 level.Info(logger).Log("msg", "no rule groups found", "userID", userID) 404 http.Error(w, ErrNoRuleGroups.Error(), http.StatusNotFound) 405 return 406 } 407 408 err = a.store.LoadRuleGroups(req.Context(), map[string]rulespb.RuleGroupList{userID: rgs}) 409 if err != nil { 410 http.Error(w, err.Error(), http.StatusBadRequest) 411 return 412 } 413 414 level.Debug(logger).Log("msg", "retrieved rule groups from rule store", "userID", userID, "num_namespaces", len(rgs)) 415 416 formatted := rgs.Formatted() 417 marshalAndSend(formatted, w, logger) 418 } 419 420 func (a *API) GetRuleGroup(w http.ResponseWriter, req *http.Request) { 421 logger := util_log.WithContext(req.Context(), a.logger) 422 userID, namespace, groupName, err := parseRequest(req, true, true) 423 if err != nil { 424 respondError(logger, w, err.Error()) 425 return 426 } 427 428 rg, err := a.store.GetRuleGroup(req.Context(), userID, namespace, groupName) 429 if err != nil { 430 if errors.Is(err, rulestore.ErrGroupNotFound) { 431 http.Error(w, err.Error(), http.StatusNotFound) 432 return 433 } 434 http.Error(w, err.Error(), http.StatusBadRequest) 435 return 436 } 437 438 formatted := rulespb.FromProto(rg) 439 marshalAndSend(formatted, w, logger) 440 } 441 442 func (a *API) CreateRuleGroup(w http.ResponseWriter, req *http.Request) { 443 logger := util_log.WithContext(req.Context(), a.logger) 444 userID, namespace, _, err := parseRequest(req, true, false) 445 if err != nil { 446 respondError(logger, w, err.Error()) 447 return 448 } 449 450 payload, err := ioutil.ReadAll(req.Body) 451 if err != nil { 452 level.Error(logger).Log("msg", "unable to read rule group payload", "err", err.Error()) 453 http.Error(w, err.Error(), http.StatusBadRequest) 454 return 455 } 456 457 level.Debug(logger).Log("msg", "attempting to unmarshal rulegroup", "userID", userID, "group", string(payload)) 458 459 rg := rulefmt.RuleGroup{} 460 err = yaml.Unmarshal(payload, &rg) 461 if err != nil { 462 level.Error(logger).Log("msg", "unable to unmarshal rule group payload", "err", err.Error()) 463 http.Error(w, ErrBadRuleGroup.Error(), http.StatusBadRequest) 464 return 465 } 466 467 errs := a.ruler.manager.ValidateRuleGroup(rg) 468 if len(errs) > 0 { 469 e := []string{} 470 for _, err := range errs { 471 level.Error(logger).Log("msg", "unable to validate rule group payload", "err", err.Error()) 472 e = append(e, err.Error()) 473 } 474 475 http.Error(w, strings.Join(e, ", "), http.StatusBadRequest) 476 return 477 } 478 479 if err := a.ruler.AssertMaxRulesPerRuleGroup(userID, len(rg.Rules)); err != nil { 480 level.Error(logger).Log("msg", "limit validation failure", "err", err.Error(), "user", userID) 481 http.Error(w, err.Error(), http.StatusBadRequest) 482 return 483 } 484 485 rgs, err := a.store.ListRuleGroupsForUserAndNamespace(req.Context(), userID, "") 486 if err != nil { 487 level.Error(logger).Log("msg", "unable to fetch current rule groups for validation", "err", err.Error(), "user", userID) 488 http.Error(w, err.Error(), http.StatusInternalServerError) 489 return 490 } 491 492 if err := a.ruler.AssertMaxRuleGroups(userID, len(rgs)+1); err != nil { 493 level.Error(logger).Log("msg", "limit validation failure", "err", err.Error(), "user", userID) 494 http.Error(w, err.Error(), http.StatusBadRequest) 495 return 496 } 497 498 rgProto := rulespb.ToProto(userID, namespace, rg) 499 500 level.Debug(logger).Log("msg", "attempting to store rulegroup", "userID", userID, "group", rgProto.String()) 501 err = a.store.SetRuleGroup(req.Context(), userID, namespace, rgProto) 502 if err != nil { 503 level.Error(logger).Log("msg", "unable to store rule group", "err", err.Error()) 504 http.Error(w, err.Error(), http.StatusInternalServerError) 505 return 506 } 507 508 respondAccepted(w, logger) 509 } 510 511 func (a *API) DeleteNamespace(w http.ResponseWriter, req *http.Request) { 512 logger := util_log.WithContext(req.Context(), a.logger) 513 514 userID, namespace, _, err := parseRequest(req, true, false) 515 if err != nil { 516 respondError(logger, w, err.Error()) 517 return 518 } 519 520 err = a.store.DeleteNamespace(req.Context(), userID, namespace) 521 if err != nil { 522 if err == rulestore.ErrGroupNamespaceNotFound { 523 http.Error(w, err.Error(), http.StatusNotFound) 524 return 525 } 526 respondError(logger, w, err.Error()) 527 return 528 } 529 530 respondAccepted(w, logger) 531 } 532 533 func (a *API) DeleteRuleGroup(w http.ResponseWriter, req *http.Request) { 534 logger := util_log.WithContext(req.Context(), a.logger) 535 536 userID, namespace, groupName, err := parseRequest(req, true, true) 537 if err != nil { 538 respondError(logger, w, err.Error()) 539 return 540 } 541 542 err = a.store.DeleteRuleGroup(req.Context(), userID, namespace, groupName) 543 if err != nil { 544 if err == rulestore.ErrGroupNotFound { 545 http.Error(w, err.Error(), http.StatusNotFound) 546 return 547 } 548 respondError(logger, w, err.Error()) 549 return 550 } 551 552 respondAccepted(w, logger) 553 }