github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ruler/base/api.go (about) 1 package base 2 3 import ( 4 "encoding/json" 5 "io/ioutil" 6 "net/http" 7 "net/url" 8 "sort" 9 "strconv" 10 "strings" 11 "time" 12 13 "github.com/go-kit/log" 14 "github.com/go-kit/log/level" 15 "github.com/gorilla/mux" 16 "github.com/pkg/errors" 17 v1 "github.com/prometheus/client_golang/api/prometheus/v1" 18 "github.com/prometheus/prometheus/model/labels" 19 "github.com/prometheus/prometheus/model/rulefmt" 20 "github.com/weaveworks/common/user" 21 "gopkg.in/yaml.v3" 22 23 "github.com/grafana/dskit/tenant" 24 25 "github.com/grafana/loki/pkg/logproto" 26 "github.com/grafana/loki/pkg/ruler/rulespb" 27 "github.com/grafana/loki/pkg/ruler/rulestore" 28 util_log "github.com/grafana/loki/pkg/util/log" 29 ) 30 31 // In order to reimplement the prometheus rules API, a large amount of code was copied over 32 // This is required because the prometheus api implementation does not allow us to return errors 33 // on rule lookups, which might fail in Cortex's case. 34 35 type response struct { 36 Status string `json:"status"` 37 Data interface{} `json:"data"` 38 ErrorType v1.ErrorType `json:"errorType"` 39 Error string `json:"error"` 40 } 41 42 // AlertDiscovery has info for all active alerts. 43 type AlertDiscovery struct { 44 Alerts []*Alert `json:"alerts"` 45 } 46 47 // Alert has info for an alert. 48 type Alert struct { 49 Labels labels.Labels `json:"labels"` 50 Annotations labels.Labels `json:"annotations"` 51 State string `json:"state"` 52 ActiveAt *time.Time `json:"activeAt"` 53 Value string `json:"value"` 54 } 55 56 // RuleDiscovery has info for all rules 57 type RuleDiscovery struct { 58 RuleGroups []*RuleGroup `json:"groups"` 59 } 60 61 // RuleGroup has info for rules which are part of a group 62 type RuleGroup struct { 63 Name string `json:"name"` 64 File string `json:"file"` 65 // In order to preserve rule ordering, while exposing type (alerting or recording) 66 // specific properties, both alerting and recording rules are exposed in the 67 // same array. 68 Rules []rule `json:"rules"` 69 Interval float64 `json:"interval"` 70 LastEvaluation time.Time `json:"lastEvaluation"` 71 EvaluationTime float64 `json:"evaluationTime"` 72 } 73 74 type rule interface{} 75 76 type alertingRule struct { 77 // State can be "pending", "firing", "inactive". 78 State string `json:"state"` 79 Name string `json:"name"` 80 Query string `json:"query"` 81 Duration float64 `json:"duration"` 82 Labels labels.Labels `json:"labels"` 83 Annotations labels.Labels `json:"annotations"` 84 Alerts []*Alert `json:"alerts"` 85 Health string `json:"health"` 86 LastError string `json:"lastError"` 87 Type v1.RuleType `json:"type"` 88 LastEvaluation time.Time `json:"lastEvaluation"` 89 EvaluationTime float64 `json:"evaluationTime"` 90 } 91 92 type recordingRule struct { 93 Name string `json:"name"` 94 Query string `json:"query"` 95 Labels labels.Labels `json:"labels"` 96 Health string `json:"health"` 97 LastError string `json:"lastError"` 98 Type v1.RuleType `json:"type"` 99 LastEvaluation time.Time `json:"lastEvaluation"` 100 EvaluationTime float64 `json:"evaluationTime"` 101 } 102 103 func respondError(logger log.Logger, w http.ResponseWriter, msg string) { 104 b, err := json.Marshal(&response{ 105 Status: "error", 106 ErrorType: v1.ErrServer, 107 Error: msg, 108 Data: nil, 109 }) 110 111 if err != nil { 112 level.Error(logger).Log("msg", "error marshaling json response", "err", err) 113 http.Error(w, err.Error(), http.StatusInternalServerError) 114 return 115 } 116 117 w.WriteHeader(http.StatusInternalServerError) 118 if n, err := w.Write(b); err != nil { 119 level.Error(logger).Log("msg", "error writing response", "bytesWritten", n, "err", err) 120 } 121 } 122 123 // API is used to handle HTTP requests for the ruler service 124 type API struct { 125 ruler *Ruler 126 store rulestore.RuleStore 127 128 logger log.Logger 129 } 130 131 // NewAPI returns a new API struct with the provided ruler and rule store 132 func NewAPI(r *Ruler, s rulestore.RuleStore, logger log.Logger) *API { 133 return &API{ 134 ruler: r, 135 store: s, 136 logger: logger, 137 } 138 } 139 140 func (a *API) PrometheusRules(w http.ResponseWriter, req *http.Request) { 141 logger := util_log.WithContext(req.Context(), a.logger) 142 userID, err := tenant.TenantID(req.Context()) 143 if err != nil || userID == "" { 144 level.Error(logger).Log("msg", "error extracting org id from context", "err", err) 145 respondError(logger, w, "no valid org id found") 146 return 147 } 148 149 w.Header().Set("Content-Type", "application/json") 150 rgs, err := a.ruler.GetRules(req.Context()) 151 152 if err != nil { 153 respondError(logger, w, err.Error()) 154 return 155 } 156 157 groups := make([]*RuleGroup, 0, len(rgs)) 158 159 for _, g := range rgs { 160 grp := RuleGroup{ 161 Name: g.Group.Name, 162 File: g.Group.Namespace, 163 Rules: make([]rule, len(g.ActiveRules)), 164 Interval: g.Group.Interval.Seconds(), 165 LastEvaluation: g.GetEvaluationTimestamp(), 166 EvaluationTime: g.GetEvaluationDuration().Seconds(), 167 } 168 169 for i, rl := range g.ActiveRules { 170 if g.ActiveRules[i].Rule.Alert != "" { 171 alerts := make([]*Alert, 0, len(rl.Alerts)) 172 for _, a := range rl.Alerts { 173 alerts = append(alerts, &Alert{ 174 Labels: logproto.FromLabelAdaptersToLabels(a.Labels), 175 Annotations: logproto.FromLabelAdaptersToLabels(a.Annotations), 176 State: a.GetState(), 177 ActiveAt: &a.ActiveAt, 178 Value: strconv.FormatFloat(a.Value, 'e', -1, 64), 179 }) 180 } 181 grp.Rules[i] = alertingRule{ 182 State: rl.GetState(), 183 Name: rl.Rule.GetAlert(), 184 Query: rl.Rule.GetExpr(), 185 Duration: rl.Rule.For.Seconds(), 186 Labels: logproto.FromLabelAdaptersToLabels(rl.Rule.Labels), 187 Annotations: logproto.FromLabelAdaptersToLabels(rl.Rule.Annotations), 188 Alerts: alerts, 189 Health: rl.GetHealth(), 190 LastError: rl.GetLastError(), 191 LastEvaluation: rl.GetEvaluationTimestamp(), 192 EvaluationTime: rl.GetEvaluationDuration().Seconds(), 193 Type: v1.RuleTypeAlerting, 194 } 195 } else { 196 grp.Rules[i] = recordingRule{ 197 Name: rl.Rule.GetRecord(), 198 Query: rl.Rule.GetExpr(), 199 Labels: logproto.FromLabelAdaptersToLabels(rl.Rule.Labels), 200 Health: rl.GetHealth(), 201 LastError: rl.GetLastError(), 202 LastEvaluation: rl.GetEvaluationTimestamp(), 203 EvaluationTime: rl.GetEvaluationDuration().Seconds(), 204 Type: v1.RuleTypeRecording, 205 } 206 } 207 } 208 groups = append(groups, &grp) 209 } 210 211 // keep data.groups are in order 212 sort.Slice(groups, func(i, j int) bool { 213 return groups[i].File < groups[j].File 214 }) 215 216 b, err := json.Marshal(&response{ 217 Status: "success", 218 Data: &RuleDiscovery{RuleGroups: groups}, 219 }) 220 if err != nil { 221 level.Error(logger).Log("msg", "error marshaling json response", "err", err) 222 respondError(logger, w, "unable to marshal the requested data") 223 return 224 } 225 w.Header().Set("Content-Type", "application/json") 226 w.WriteHeader(http.StatusOK) 227 if n, err := w.Write(b); err != nil { 228 level.Error(logger).Log("msg", "error writing response", "bytesWritten", n, "err", err) 229 } 230 } 231 232 func (a *API) PrometheusAlerts(w http.ResponseWriter, req *http.Request) { 233 logger := util_log.WithContext(req.Context(), a.logger) 234 userID, err := tenant.TenantID(req.Context()) 235 if err != nil || userID == "" { 236 level.Error(logger).Log("msg", "error extracting org id from context", "err", err) 237 respondError(logger, w, "no valid org id found") 238 return 239 } 240 241 w.Header().Set("Content-Type", "application/json") 242 rgs, err := a.ruler.GetRules(req.Context()) 243 244 if err != nil { 245 respondError(logger, w, err.Error()) 246 return 247 } 248 249 alerts := []*Alert{} 250 251 for _, g := range rgs { 252 for _, rl := range g.ActiveRules { 253 if rl.Rule.Alert != "" { 254 for _, a := range rl.Alerts { 255 alerts = append(alerts, &Alert{ 256 Labels: logproto.FromLabelAdaptersToLabels(a.Labels), 257 Annotations: logproto.FromLabelAdaptersToLabels(a.Annotations), 258 State: a.GetState(), 259 ActiveAt: &a.ActiveAt, 260 Value: strconv.FormatFloat(a.Value, 'e', -1, 64), 261 }) 262 } 263 } 264 } 265 } 266 267 b, err := json.Marshal(&response{ 268 Status: "success", 269 Data: &AlertDiscovery{Alerts: alerts}, 270 }) 271 if err != nil { 272 level.Error(logger).Log("msg", "error marshaling json response", "err", err) 273 respondError(logger, w, "unable to marshal the requested data") 274 return 275 } 276 w.Header().Set("Content-Type", "application/json") 277 w.WriteHeader(http.StatusOK) 278 if n, err := w.Write(b); err != nil { 279 level.Error(logger).Log("msg", "error writing response", "bytesWritten", n, "err", err) 280 } 281 } 282 283 var ( 284 // ErrNoNamespace signals that no namespace was specified in the request 285 ErrNoNamespace = errors.New("a namespace must be provided in the request") 286 // ErrNoGroupName signals a group name url parameter was not found 287 ErrNoGroupName = errors.New("a matching group name must be provided in the request") 288 // ErrNoRuleGroups signals the rule group requested does not exist 289 ErrNoRuleGroups = errors.New("no rule groups found") 290 // ErrBadRuleGroup is returned when the provided rule group can not be unmarshalled 291 ErrBadRuleGroup = errors.New("unable to decoded rule group") 292 ) 293 294 func marshalAndSend(output interface{}, w http.ResponseWriter, logger log.Logger) { 295 d, err := yaml.Marshal(&output) 296 if err != nil { 297 level.Error(logger).Log("msg", "error marshalling yaml rule groups", "err", err) 298 http.Error(w, err.Error(), http.StatusInternalServerError) 299 return 300 } 301 302 w.Header().Set("Content-Type", "application/yaml") 303 if _, err := w.Write(d); err != nil { 304 level.Error(logger).Log("msg", "error writing yaml response", "err", err) 305 return 306 } 307 } 308 309 func respondAccepted(w http.ResponseWriter, logger log.Logger) { 310 b, err := json.Marshal(&response{ 311 Status: "success", 312 }) 313 if err != nil { 314 level.Error(logger).Log("msg", "error marshaling json response", "err", err) 315 respondError(logger, w, "unable to marshal the requested data") 316 return 317 } 318 w.Header().Set("Content-Type", "application/json") 319 320 // Return a status accepted because the rule has been stored and queued for polling, but is not currently active 321 w.WriteHeader(http.StatusAccepted) 322 if n, err := w.Write(b); err != nil { 323 level.Error(logger).Log("msg", "error writing response", "bytesWritten", n, "err", err) 324 } 325 } 326 327 // parseNamespace parses the namespace from the provided set of params, in this 328 // api these params are derived from the url path 329 func parseNamespace(params map[string]string) (string, error) { 330 namespace, exists := params["namespace"] 331 if !exists { 332 return "", ErrNoNamespace 333 } 334 335 namespace, err := url.PathUnescape(namespace) 336 if err != nil { 337 return "", err 338 } 339 340 return namespace, nil 341 } 342 343 // parseGroupName parses the group name from the provided set of params, in this 344 // api these params are derived from the url path 345 func parseGroupName(params map[string]string) (string, error) { 346 groupName, exists := params["groupName"] 347 if !exists { 348 return "", ErrNoGroupName 349 } 350 351 groupName, err := url.PathUnescape(groupName) 352 if err != nil { 353 return "", err 354 } 355 356 return groupName, nil 357 } 358 359 // parseRequest parses the incoming request to parse out the userID, rules namespace, and rule group name 360 // and returns them in that order. It also allows users to require a namespace or group name and return 361 // an error if it they can not be parsed. 362 func parseRequest(req *http.Request, requireNamespace, requireGroup bool) (string, string, string, error) { 363 userID, err := tenant.TenantID(req.Context()) 364 if err != nil { 365 return "", "", "", user.ErrNoOrgID 366 } 367 368 vars := mux.Vars(req) 369 370 namespace, err := parseNamespace(vars) 371 if err != nil { 372 if err != ErrNoNamespace || requireNamespace { 373 return "", "", "", err 374 } 375 } 376 377 group, err := parseGroupName(vars) 378 if err != nil { 379 if err != ErrNoGroupName || requireGroup { 380 return "", "", "", err 381 } 382 } 383 384 return userID, namespace, group, nil 385 } 386 387 func (a *API) ListRules(w http.ResponseWriter, req *http.Request) { 388 logger := util_log.WithContext(req.Context(), a.logger) 389 390 userID, namespace, _, err := parseRequest(req, false, false) 391 if err != nil { 392 respondError(logger, w, err.Error()) 393 return 394 } 395 396 level.Debug(logger).Log("msg", "retrieving rule groups with namespace", "userID", userID, "namespace", namespace) 397 rgs, err := a.store.ListRuleGroupsForUserAndNamespace(req.Context(), userID, namespace) 398 if err != nil { 399 http.Error(w, err.Error(), http.StatusBadRequest) 400 return 401 } 402 403 if len(rgs) == 0 { 404 level.Info(logger).Log("msg", "no rule groups found", "userID", userID) 405 http.Error(w, ErrNoRuleGroups.Error(), http.StatusNotFound) 406 return 407 } 408 409 err = a.store.LoadRuleGroups(req.Context(), map[string]rulespb.RuleGroupList{userID: rgs}) 410 if err != nil { 411 http.Error(w, err.Error(), http.StatusBadRequest) 412 return 413 } 414 415 level.Debug(logger).Log("msg", "retrieved rule groups from rule store", "userID", userID, "num_namespaces", len(rgs)) 416 417 formatted := rgs.Formatted() 418 marshalAndSend(formatted, w, logger) 419 } 420 421 func (a *API) GetRuleGroup(w http.ResponseWriter, req *http.Request) { 422 logger := util_log.WithContext(req.Context(), a.logger) 423 userID, namespace, groupName, err := parseRequest(req, true, true) 424 if err != nil { 425 respondError(logger, w, err.Error()) 426 return 427 } 428 429 rg, err := a.store.GetRuleGroup(req.Context(), userID, namespace, groupName) 430 if err != nil { 431 if errors.Is(err, rulestore.ErrGroupNotFound) { 432 http.Error(w, err.Error(), http.StatusNotFound) 433 return 434 } 435 http.Error(w, err.Error(), http.StatusBadRequest) 436 return 437 } 438 439 formatted := rulespb.FromProto(rg) 440 marshalAndSend(formatted, w, logger) 441 } 442 443 func (a *API) CreateRuleGroup(w http.ResponseWriter, req *http.Request) { 444 logger := util_log.WithContext(req.Context(), a.logger) 445 userID, namespace, _, err := parseRequest(req, true, false) 446 if err != nil { 447 respondError(logger, w, err.Error()) 448 return 449 } 450 451 payload, err := ioutil.ReadAll(req.Body) 452 if err != nil { 453 level.Error(logger).Log("msg", "unable to read rule group payload", "err", err.Error()) 454 http.Error(w, err.Error(), http.StatusBadRequest) 455 return 456 } 457 458 level.Debug(logger).Log("msg", "attempting to unmarshal rulegroup", "userID", userID, "group", string(payload)) 459 460 rg := rulefmt.RuleGroup{} 461 err = yaml.Unmarshal(payload, &rg) 462 if err != nil { 463 level.Error(logger).Log("msg", "unable to unmarshal rule group payload", "err", err.Error()) 464 http.Error(w, ErrBadRuleGroup.Error(), http.StatusBadRequest) 465 return 466 } 467 468 errs := a.ruler.manager.ValidateRuleGroup(rg) 469 if len(errs) > 0 { 470 e := []string{} 471 for _, err := range errs { 472 level.Error(logger).Log("msg", "unable to validate rule group payload", "err", err.Error()) 473 e = append(e, err.Error()) 474 } 475 476 http.Error(w, strings.Join(e, ", "), http.StatusBadRequest) 477 return 478 } 479 480 if err := a.ruler.AssertMaxRulesPerRuleGroup(userID, len(rg.Rules)); err != nil { 481 level.Error(logger).Log("msg", "limit validation failure", "err", err.Error(), "user", userID) 482 http.Error(w, err.Error(), http.StatusBadRequest) 483 return 484 } 485 486 rgs, err := a.store.ListRuleGroupsForUserAndNamespace(req.Context(), userID, "") 487 if err != nil { 488 level.Error(logger).Log("msg", "unable to fetch current rule groups for validation", "err", err.Error(), "user", userID) 489 http.Error(w, err.Error(), http.StatusInternalServerError) 490 return 491 } 492 493 if err := a.ruler.AssertMaxRuleGroups(userID, len(rgs)+1); err != nil { 494 level.Error(logger).Log("msg", "limit validation failure", "err", err.Error(), "user", userID) 495 http.Error(w, err.Error(), http.StatusBadRequest) 496 return 497 } 498 499 rgProto := rulespb.ToProto(userID, namespace, rg) 500 501 level.Debug(logger).Log("msg", "attempting to store rulegroup", "userID", userID, "group", rgProto.String()) 502 err = a.store.SetRuleGroup(req.Context(), userID, namespace, rgProto) 503 if err != nil { 504 level.Error(logger).Log("msg", "unable to store rule group", "err", err.Error()) 505 http.Error(w, err.Error(), http.StatusInternalServerError) 506 return 507 } 508 509 respondAccepted(w, logger) 510 } 511 512 func (a *API) DeleteNamespace(w http.ResponseWriter, req *http.Request) { 513 logger := util_log.WithContext(req.Context(), a.logger) 514 515 userID, namespace, _, err := parseRequest(req, true, false) 516 if err != nil { 517 respondError(logger, w, err.Error()) 518 return 519 } 520 521 err = a.store.DeleteNamespace(req.Context(), userID, namespace) 522 if err != nil { 523 if err == rulestore.ErrGroupNamespaceNotFound { 524 http.Error(w, err.Error(), http.StatusNotFound) 525 return 526 } 527 respondError(logger, w, err.Error()) 528 return 529 } 530 531 respondAccepted(w, logger) 532 } 533 534 func (a *API) DeleteRuleGroup(w http.ResponseWriter, req *http.Request) { 535 logger := util_log.WithContext(req.Context(), a.logger) 536 537 userID, namespace, groupName, err := parseRequest(req, true, true) 538 if err != nil { 539 respondError(logger, w, err.Error()) 540 return 541 } 542 543 err = a.store.DeleteRuleGroup(req.Context(), userID, namespace, groupName) 544 if err != nil { 545 if err == rulestore.ErrGroupNotFound { 546 http.Error(w, err.Error(), http.StatusNotFound) 547 return 548 } 549 respondError(logger, w, err.Error()) 550 return 551 } 552 553 respondAccepted(w, logger) 554 }