github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ruler/base/api.go (about)

     1  package base
     2  
     3  import (
     4  	"encoding/json"
     5  	"io/ioutil"
     6  	"net/http"
     7  	"net/url"
     8  	"sort"
     9  	"strconv"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/go-kit/log"
    14  	"github.com/go-kit/log/level"
    15  	"github.com/gorilla/mux"
    16  	"github.com/pkg/errors"
    17  	v1 "github.com/prometheus/client_golang/api/prometheus/v1"
    18  	"github.com/prometheus/prometheus/model/labels"
    19  	"github.com/prometheus/prometheus/model/rulefmt"
    20  	"github.com/weaveworks/common/user"
    21  	"gopkg.in/yaml.v3"
    22  
    23  	"github.com/grafana/dskit/tenant"
    24  
    25  	"github.com/grafana/loki/pkg/logproto"
    26  	"github.com/grafana/loki/pkg/ruler/rulespb"
    27  	"github.com/grafana/loki/pkg/ruler/rulestore"
    28  	util_log "github.com/grafana/loki/pkg/util/log"
    29  )
    30  
    31  // In order to reimplement the prometheus rules API, a large amount of code was copied over
    32  // This is required because the prometheus api implementation does not allow us to return errors
    33  // on rule lookups, which might fail in Cortex's case.
    34  
    35  type response struct {
    36  	Status    string       `json:"status"`
    37  	Data      interface{}  `json:"data"`
    38  	ErrorType v1.ErrorType `json:"errorType"`
    39  	Error     string       `json:"error"`
    40  }
    41  
    42  // AlertDiscovery has info for all active alerts.
    43  type AlertDiscovery struct {
    44  	Alerts []*Alert `json:"alerts"`
    45  }
    46  
    47  // Alert has info for an alert.
    48  type Alert struct {
    49  	Labels      labels.Labels `json:"labels"`
    50  	Annotations labels.Labels `json:"annotations"`
    51  	State       string        `json:"state"`
    52  	ActiveAt    *time.Time    `json:"activeAt"`
    53  	Value       string        `json:"value"`
    54  }
    55  
    56  // RuleDiscovery has info for all rules
    57  type RuleDiscovery struct {
    58  	RuleGroups []*RuleGroup `json:"groups"`
    59  }
    60  
    61  // RuleGroup has info for rules which are part of a group
    62  type RuleGroup struct {
    63  	Name string `json:"name"`
    64  	File string `json:"file"`
    65  	// In order to preserve rule ordering, while exposing type (alerting or recording)
    66  	// specific properties, both alerting and recording rules are exposed in the
    67  	// same array.
    68  	Rules          []rule    `json:"rules"`
    69  	Interval       float64   `json:"interval"`
    70  	LastEvaluation time.Time `json:"lastEvaluation"`
    71  	EvaluationTime float64   `json:"evaluationTime"`
    72  }
    73  
    74  type rule interface{}
    75  
    76  type alertingRule struct {
    77  	// State can be "pending", "firing", "inactive".
    78  	State          string        `json:"state"`
    79  	Name           string        `json:"name"`
    80  	Query          string        `json:"query"`
    81  	Duration       float64       `json:"duration"`
    82  	Labels         labels.Labels `json:"labels"`
    83  	Annotations    labels.Labels `json:"annotations"`
    84  	Alerts         []*Alert      `json:"alerts"`
    85  	Health         string        `json:"health"`
    86  	LastError      string        `json:"lastError"`
    87  	Type           v1.RuleType   `json:"type"`
    88  	LastEvaluation time.Time     `json:"lastEvaluation"`
    89  	EvaluationTime float64       `json:"evaluationTime"`
    90  }
    91  
    92  type recordingRule struct {
    93  	Name           string        `json:"name"`
    94  	Query          string        `json:"query"`
    95  	Labels         labels.Labels `json:"labels"`
    96  	Health         string        `json:"health"`
    97  	LastError      string        `json:"lastError"`
    98  	Type           v1.RuleType   `json:"type"`
    99  	LastEvaluation time.Time     `json:"lastEvaluation"`
   100  	EvaluationTime float64       `json:"evaluationTime"`
   101  }
   102  
   103  func respondError(logger log.Logger, w http.ResponseWriter, msg string) {
   104  	b, err := json.Marshal(&response{
   105  		Status:    "error",
   106  		ErrorType: v1.ErrServer,
   107  		Error:     msg,
   108  		Data:      nil,
   109  	})
   110  
   111  	if err != nil {
   112  		level.Error(logger).Log("msg", "error marshaling json response", "err", err)
   113  		http.Error(w, err.Error(), http.StatusInternalServerError)
   114  		return
   115  	}
   116  
   117  	w.WriteHeader(http.StatusInternalServerError)
   118  	if n, err := w.Write(b); err != nil {
   119  		level.Error(logger).Log("msg", "error writing response", "bytesWritten", n, "err", err)
   120  	}
   121  }
   122  
   123  // API is used to handle HTTP requests for the ruler service
   124  type API struct {
   125  	ruler *Ruler
   126  	store rulestore.RuleStore
   127  
   128  	logger log.Logger
   129  }
   130  
   131  // NewAPI returns a new API struct with the provided ruler and rule store
   132  func NewAPI(r *Ruler, s rulestore.RuleStore, logger log.Logger) *API {
   133  	return &API{
   134  		ruler:  r,
   135  		store:  s,
   136  		logger: logger,
   137  	}
   138  }
   139  
   140  func (a *API) PrometheusRules(w http.ResponseWriter, req *http.Request) {
   141  	logger := util_log.WithContext(req.Context(), a.logger)
   142  	userID, err := tenant.TenantID(req.Context())
   143  	if err != nil || userID == "" {
   144  		level.Error(logger).Log("msg", "error extracting org id from context", "err", err)
   145  		respondError(logger, w, "no valid org id found")
   146  		return
   147  	}
   148  
   149  	w.Header().Set("Content-Type", "application/json")
   150  	rgs, err := a.ruler.GetRules(req.Context())
   151  
   152  	if err != nil {
   153  		respondError(logger, w, err.Error())
   154  		return
   155  	}
   156  
   157  	groups := make([]*RuleGroup, 0, len(rgs))
   158  
   159  	for _, g := range rgs {
   160  		grp := RuleGroup{
   161  			Name:           g.Group.Name,
   162  			File:           g.Group.Namespace,
   163  			Rules:          make([]rule, len(g.ActiveRules)),
   164  			Interval:       g.Group.Interval.Seconds(),
   165  			LastEvaluation: g.GetEvaluationTimestamp(),
   166  			EvaluationTime: g.GetEvaluationDuration().Seconds(),
   167  		}
   168  
   169  		for i, rl := range g.ActiveRules {
   170  			if g.ActiveRules[i].Rule.Alert != "" {
   171  				alerts := make([]*Alert, 0, len(rl.Alerts))
   172  				for _, a := range rl.Alerts {
   173  					alerts = append(alerts, &Alert{
   174  						Labels:      logproto.FromLabelAdaptersToLabels(a.Labels),
   175  						Annotations: logproto.FromLabelAdaptersToLabels(a.Annotations),
   176  						State:       a.GetState(),
   177  						ActiveAt:    &a.ActiveAt,
   178  						Value:       strconv.FormatFloat(a.Value, 'e', -1, 64),
   179  					})
   180  				}
   181  				grp.Rules[i] = alertingRule{
   182  					State:          rl.GetState(),
   183  					Name:           rl.Rule.GetAlert(),
   184  					Query:          rl.Rule.GetExpr(),
   185  					Duration:       rl.Rule.For.Seconds(),
   186  					Labels:         logproto.FromLabelAdaptersToLabels(rl.Rule.Labels),
   187  					Annotations:    logproto.FromLabelAdaptersToLabels(rl.Rule.Annotations),
   188  					Alerts:         alerts,
   189  					Health:         rl.GetHealth(),
   190  					LastError:      rl.GetLastError(),
   191  					LastEvaluation: rl.GetEvaluationTimestamp(),
   192  					EvaluationTime: rl.GetEvaluationDuration().Seconds(),
   193  					Type:           v1.RuleTypeAlerting,
   194  				}
   195  			} else {
   196  				grp.Rules[i] = recordingRule{
   197  					Name:           rl.Rule.GetRecord(),
   198  					Query:          rl.Rule.GetExpr(),
   199  					Labels:         logproto.FromLabelAdaptersToLabels(rl.Rule.Labels),
   200  					Health:         rl.GetHealth(),
   201  					LastError:      rl.GetLastError(),
   202  					LastEvaluation: rl.GetEvaluationTimestamp(),
   203  					EvaluationTime: rl.GetEvaluationDuration().Seconds(),
   204  					Type:           v1.RuleTypeRecording,
   205  				}
   206  			}
   207  		}
   208  		groups = append(groups, &grp)
   209  	}
   210  
   211  	// keep data.groups are in order
   212  	sort.Slice(groups, func(i, j int) bool {
   213  		return groups[i].File < groups[j].File
   214  	})
   215  
   216  	b, err := json.Marshal(&response{
   217  		Status: "success",
   218  		Data:   &RuleDiscovery{RuleGroups: groups},
   219  	})
   220  	if err != nil {
   221  		level.Error(logger).Log("msg", "error marshaling json response", "err", err)
   222  		respondError(logger, w, "unable to marshal the requested data")
   223  		return
   224  	}
   225  	w.Header().Set("Content-Type", "application/json")
   226  	w.WriteHeader(http.StatusOK)
   227  	if n, err := w.Write(b); err != nil {
   228  		level.Error(logger).Log("msg", "error writing response", "bytesWritten", n, "err", err)
   229  	}
   230  }
   231  
   232  func (a *API) PrometheusAlerts(w http.ResponseWriter, req *http.Request) {
   233  	logger := util_log.WithContext(req.Context(), a.logger)
   234  	userID, err := tenant.TenantID(req.Context())
   235  	if err != nil || userID == "" {
   236  		level.Error(logger).Log("msg", "error extracting org id from context", "err", err)
   237  		respondError(logger, w, "no valid org id found")
   238  		return
   239  	}
   240  
   241  	w.Header().Set("Content-Type", "application/json")
   242  	rgs, err := a.ruler.GetRules(req.Context())
   243  
   244  	if err != nil {
   245  		respondError(logger, w, err.Error())
   246  		return
   247  	}
   248  
   249  	alerts := []*Alert{}
   250  
   251  	for _, g := range rgs {
   252  		for _, rl := range g.ActiveRules {
   253  			if rl.Rule.Alert != "" {
   254  				for _, a := range rl.Alerts {
   255  					alerts = append(alerts, &Alert{
   256  						Labels:      logproto.FromLabelAdaptersToLabels(a.Labels),
   257  						Annotations: logproto.FromLabelAdaptersToLabels(a.Annotations),
   258  						State:       a.GetState(),
   259  						ActiveAt:    &a.ActiveAt,
   260  						Value:       strconv.FormatFloat(a.Value, 'e', -1, 64),
   261  					})
   262  				}
   263  			}
   264  		}
   265  	}
   266  
   267  	b, err := json.Marshal(&response{
   268  		Status: "success",
   269  		Data:   &AlertDiscovery{Alerts: alerts},
   270  	})
   271  	if err != nil {
   272  		level.Error(logger).Log("msg", "error marshaling json response", "err", err)
   273  		respondError(logger, w, "unable to marshal the requested data")
   274  		return
   275  	}
   276  	w.Header().Set("Content-Type", "application/json")
   277  	w.WriteHeader(http.StatusOK)
   278  	if n, err := w.Write(b); err != nil {
   279  		level.Error(logger).Log("msg", "error writing response", "bytesWritten", n, "err", err)
   280  	}
   281  }
   282  
   283  var (
   284  	// ErrNoNamespace signals that no namespace was specified in the request
   285  	ErrNoNamespace = errors.New("a namespace must be provided in the request")
   286  	// ErrNoGroupName signals a group name url parameter was not found
   287  	ErrNoGroupName = errors.New("a matching group name must be provided in the request")
   288  	// ErrNoRuleGroups signals the rule group requested does not exist
   289  	ErrNoRuleGroups = errors.New("no rule groups found")
   290  	// ErrBadRuleGroup is returned when the provided rule group can not be unmarshalled
   291  	ErrBadRuleGroup = errors.New("unable to decoded rule group")
   292  )
   293  
   294  func marshalAndSend(output interface{}, w http.ResponseWriter, logger log.Logger) {
   295  	d, err := yaml.Marshal(&output)
   296  	if err != nil {
   297  		level.Error(logger).Log("msg", "error marshalling yaml rule groups", "err", err)
   298  		http.Error(w, err.Error(), http.StatusInternalServerError)
   299  		return
   300  	}
   301  
   302  	w.Header().Set("Content-Type", "application/yaml")
   303  	if _, err := w.Write(d); err != nil {
   304  		level.Error(logger).Log("msg", "error writing yaml response", "err", err)
   305  		return
   306  	}
   307  }
   308  
   309  func respondAccepted(w http.ResponseWriter, logger log.Logger) {
   310  	b, err := json.Marshal(&response{
   311  		Status: "success",
   312  	})
   313  	if err != nil {
   314  		level.Error(logger).Log("msg", "error marshaling json response", "err", err)
   315  		respondError(logger, w, "unable to marshal the requested data")
   316  		return
   317  	}
   318  	w.Header().Set("Content-Type", "application/json")
   319  
   320  	// Return a status accepted because the rule has been stored and queued for polling, but is not currently active
   321  	w.WriteHeader(http.StatusAccepted)
   322  	if n, err := w.Write(b); err != nil {
   323  		level.Error(logger).Log("msg", "error writing response", "bytesWritten", n, "err", err)
   324  	}
   325  }
   326  
   327  // parseNamespace parses the namespace from the provided set of params, in this
   328  // api these params are derived from the url path
   329  func parseNamespace(params map[string]string) (string, error) {
   330  	namespace, exists := params["namespace"]
   331  	if !exists {
   332  		return "", ErrNoNamespace
   333  	}
   334  
   335  	namespace, err := url.PathUnescape(namespace)
   336  	if err != nil {
   337  		return "", err
   338  	}
   339  
   340  	return namespace, nil
   341  }
   342  
   343  // parseGroupName parses the group name from the provided set of params, in this
   344  // api these params are derived from the url path
   345  func parseGroupName(params map[string]string) (string, error) {
   346  	groupName, exists := params["groupName"]
   347  	if !exists {
   348  		return "", ErrNoGroupName
   349  	}
   350  
   351  	groupName, err := url.PathUnescape(groupName)
   352  	if err != nil {
   353  		return "", err
   354  	}
   355  
   356  	return groupName, nil
   357  }
   358  
   359  // parseRequest parses the incoming request to parse out the userID, rules namespace, and rule group name
   360  // and returns them in that order. It also allows users to require a namespace or group name and return
   361  // an error if it they can not be parsed.
   362  func parseRequest(req *http.Request, requireNamespace, requireGroup bool) (string, string, string, error) {
   363  	userID, err := tenant.TenantID(req.Context())
   364  	if err != nil {
   365  		return "", "", "", user.ErrNoOrgID
   366  	}
   367  
   368  	vars := mux.Vars(req)
   369  
   370  	namespace, err := parseNamespace(vars)
   371  	if err != nil {
   372  		if err != ErrNoNamespace || requireNamespace {
   373  			return "", "", "", err
   374  		}
   375  	}
   376  
   377  	group, err := parseGroupName(vars)
   378  	if err != nil {
   379  		if err != ErrNoGroupName || requireGroup {
   380  			return "", "", "", err
   381  		}
   382  	}
   383  
   384  	return userID, namespace, group, nil
   385  }
   386  
   387  func (a *API) ListRules(w http.ResponseWriter, req *http.Request) {
   388  	logger := util_log.WithContext(req.Context(), a.logger)
   389  
   390  	userID, namespace, _, err := parseRequest(req, false, false)
   391  	if err != nil {
   392  		respondError(logger, w, err.Error())
   393  		return
   394  	}
   395  
   396  	level.Debug(logger).Log("msg", "retrieving rule groups with namespace", "userID", userID, "namespace", namespace)
   397  	rgs, err := a.store.ListRuleGroupsForUserAndNamespace(req.Context(), userID, namespace)
   398  	if err != nil {
   399  		http.Error(w, err.Error(), http.StatusBadRequest)
   400  		return
   401  	}
   402  
   403  	if len(rgs) == 0 {
   404  		level.Info(logger).Log("msg", "no rule groups found", "userID", userID)
   405  		http.Error(w, ErrNoRuleGroups.Error(), http.StatusNotFound)
   406  		return
   407  	}
   408  
   409  	err = a.store.LoadRuleGroups(req.Context(), map[string]rulespb.RuleGroupList{userID: rgs})
   410  	if err != nil {
   411  		http.Error(w, err.Error(), http.StatusBadRequest)
   412  		return
   413  	}
   414  
   415  	level.Debug(logger).Log("msg", "retrieved rule groups from rule store", "userID", userID, "num_namespaces", len(rgs))
   416  
   417  	formatted := rgs.Formatted()
   418  	marshalAndSend(formatted, w, logger)
   419  }
   420  
   421  func (a *API) GetRuleGroup(w http.ResponseWriter, req *http.Request) {
   422  	logger := util_log.WithContext(req.Context(), a.logger)
   423  	userID, namespace, groupName, err := parseRequest(req, true, true)
   424  	if err != nil {
   425  		respondError(logger, w, err.Error())
   426  		return
   427  	}
   428  
   429  	rg, err := a.store.GetRuleGroup(req.Context(), userID, namespace, groupName)
   430  	if err != nil {
   431  		if errors.Is(err, rulestore.ErrGroupNotFound) {
   432  			http.Error(w, err.Error(), http.StatusNotFound)
   433  			return
   434  		}
   435  		http.Error(w, err.Error(), http.StatusBadRequest)
   436  		return
   437  	}
   438  
   439  	formatted := rulespb.FromProto(rg)
   440  	marshalAndSend(formatted, w, logger)
   441  }
   442  
   443  func (a *API) CreateRuleGroup(w http.ResponseWriter, req *http.Request) {
   444  	logger := util_log.WithContext(req.Context(), a.logger)
   445  	userID, namespace, _, err := parseRequest(req, true, false)
   446  	if err != nil {
   447  		respondError(logger, w, err.Error())
   448  		return
   449  	}
   450  
   451  	payload, err := ioutil.ReadAll(req.Body)
   452  	if err != nil {
   453  		level.Error(logger).Log("msg", "unable to read rule group payload", "err", err.Error())
   454  		http.Error(w, err.Error(), http.StatusBadRequest)
   455  		return
   456  	}
   457  
   458  	level.Debug(logger).Log("msg", "attempting to unmarshal rulegroup", "userID", userID, "group", string(payload))
   459  
   460  	rg := rulefmt.RuleGroup{}
   461  	err = yaml.Unmarshal(payload, &rg)
   462  	if err != nil {
   463  		level.Error(logger).Log("msg", "unable to unmarshal rule group payload", "err", err.Error())
   464  		http.Error(w, ErrBadRuleGroup.Error(), http.StatusBadRequest)
   465  		return
   466  	}
   467  
   468  	errs := a.ruler.manager.ValidateRuleGroup(rg)
   469  	if len(errs) > 0 {
   470  		e := []string{}
   471  		for _, err := range errs {
   472  			level.Error(logger).Log("msg", "unable to validate rule group payload", "err", err.Error())
   473  			e = append(e, err.Error())
   474  		}
   475  
   476  		http.Error(w, strings.Join(e, ", "), http.StatusBadRequest)
   477  		return
   478  	}
   479  
   480  	if err := a.ruler.AssertMaxRulesPerRuleGroup(userID, len(rg.Rules)); err != nil {
   481  		level.Error(logger).Log("msg", "limit validation failure", "err", err.Error(), "user", userID)
   482  		http.Error(w, err.Error(), http.StatusBadRequest)
   483  		return
   484  	}
   485  
   486  	rgs, err := a.store.ListRuleGroupsForUserAndNamespace(req.Context(), userID, "")
   487  	if err != nil {
   488  		level.Error(logger).Log("msg", "unable to fetch current rule groups for validation", "err", err.Error(), "user", userID)
   489  		http.Error(w, err.Error(), http.StatusInternalServerError)
   490  		return
   491  	}
   492  
   493  	if err := a.ruler.AssertMaxRuleGroups(userID, len(rgs)+1); err != nil {
   494  		level.Error(logger).Log("msg", "limit validation failure", "err", err.Error(), "user", userID)
   495  		http.Error(w, err.Error(), http.StatusBadRequest)
   496  		return
   497  	}
   498  
   499  	rgProto := rulespb.ToProto(userID, namespace, rg)
   500  
   501  	level.Debug(logger).Log("msg", "attempting to store rulegroup", "userID", userID, "group", rgProto.String())
   502  	err = a.store.SetRuleGroup(req.Context(), userID, namespace, rgProto)
   503  	if err != nil {
   504  		level.Error(logger).Log("msg", "unable to store rule group", "err", err.Error())
   505  		http.Error(w, err.Error(), http.StatusInternalServerError)
   506  		return
   507  	}
   508  
   509  	respondAccepted(w, logger)
   510  }
   511  
   512  func (a *API) DeleteNamespace(w http.ResponseWriter, req *http.Request) {
   513  	logger := util_log.WithContext(req.Context(), a.logger)
   514  
   515  	userID, namespace, _, err := parseRequest(req, true, false)
   516  	if err != nil {
   517  		respondError(logger, w, err.Error())
   518  		return
   519  	}
   520  
   521  	err = a.store.DeleteNamespace(req.Context(), userID, namespace)
   522  	if err != nil {
   523  		if err == rulestore.ErrGroupNamespaceNotFound {
   524  			http.Error(w, err.Error(), http.StatusNotFound)
   525  			return
   526  		}
   527  		respondError(logger, w, err.Error())
   528  		return
   529  	}
   530  
   531  	respondAccepted(w, logger)
   532  }
   533  
   534  func (a *API) DeleteRuleGroup(w http.ResponseWriter, req *http.Request) {
   535  	logger := util_log.WithContext(req.Context(), a.logger)
   536  
   537  	userID, namespace, groupName, err := parseRequest(req, true, true)
   538  	if err != nil {
   539  		respondError(logger, w, err.Error())
   540  		return
   541  	}
   542  
   543  	err = a.store.DeleteRuleGroup(req.Context(), userID, namespace, groupName)
   544  	if err != nil {
   545  		if err == rulestore.ErrGroupNotFound {
   546  			http.Error(w, err.Error(), http.StatusNotFound)
   547  			return
   548  		}
   549  		respondError(logger, w, err.Error())
   550  		return
   551  	}
   552  
   553  	respondAccepted(w, logger)
   554  }