github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/alertmanager/api.go (about)

     1  package alertmanager
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"io/ioutil"
     8  	"net/http"
     9  	"os"
    10  	"path/filepath"
    11  	"reflect"
    12  
    13  	"github.com/go-kit/log"
    14  	"github.com/go-kit/log/level"
    15  	"github.com/grafana/dskit/concurrency"
    16  	"github.com/pkg/errors"
    17  	"github.com/prometheus/alertmanager/config"
    18  	"github.com/prometheus/alertmanager/template"
    19  	commoncfg "github.com/prometheus/common/config"
    20  	"gopkg.in/yaml.v2"
    21  
    22  	"github.com/cortexproject/cortex/pkg/alertmanager/alertspb"
    23  	"github.com/cortexproject/cortex/pkg/tenant"
    24  	"github.com/cortexproject/cortex/pkg/util"
    25  	util_log "github.com/cortexproject/cortex/pkg/util/log"
    26  )
    27  
    28  const (
    29  	errMarshallingYAML       = "error marshalling YAML Alertmanager config"
    30  	errValidatingConfig      = "error validating Alertmanager config"
    31  	errReadingConfiguration  = "unable to read the Alertmanager config"
    32  	errStoringConfiguration  = "unable to store the Alertmanager config"
    33  	errDeletingConfiguration = "unable to delete the Alertmanager config"
    34  	errNoOrgID               = "unable to determine the OrgID"
    35  	errListAllUser           = "unable to list the Alertmanager users"
    36  	errConfigurationTooBig   = "Alertmanager configuration is too big, limit: %d bytes"
    37  	errTooManyTemplates      = "too many templates in the configuration: %d (limit: %d)"
    38  	errTemplateTooBig        = "template %s is too big: %d bytes (limit: %d bytes)"
    39  
    40  	fetchConcurrency = 16
    41  )
    42  
    43  var (
    44  	errPasswordFileNotAllowed        = errors.New("setting password_file, bearer_token_file and credentials_file is not allowed")
    45  	errOAuth2SecretFileNotAllowed    = errors.New("setting OAuth2 client_secret_file is not allowed")
    46  	errProxyURLNotAllowed            = errors.New("setting proxy_url is not allowed")
    47  	errTLSFileNotAllowed             = errors.New("setting TLS ca_file, cert_file and key_file is not allowed")
    48  	errSlackAPIURLFileNotAllowed     = errors.New("setting Slack api_url_file and global slack_api_url_file is not allowed")
    49  	errVictorOpsAPIKeyFileNotAllowed = errors.New("setting VictorOps api_key_file is not allowed")
    50  )
    51  
    52  // UserConfig is used to communicate a users alertmanager configs
    53  type UserConfig struct {
    54  	TemplateFiles      map[string]string `yaml:"template_files"`
    55  	AlertmanagerConfig string            `yaml:"alertmanager_config"`
    56  }
    57  
    58  func (am *MultitenantAlertmanager) GetUserConfig(w http.ResponseWriter, r *http.Request) {
    59  	logger := util_log.WithContext(r.Context(), am.logger)
    60  
    61  	userID, err := tenant.TenantID(r.Context())
    62  	if err != nil {
    63  		level.Error(logger).Log("msg", errNoOrgID, "err", err.Error())
    64  		http.Error(w, fmt.Sprintf("%s: %s", errNoOrgID, err.Error()), http.StatusUnauthorized)
    65  		return
    66  	}
    67  
    68  	cfg, err := am.store.GetAlertConfig(r.Context(), userID)
    69  	if err != nil {
    70  		if err == alertspb.ErrNotFound {
    71  			http.Error(w, err.Error(), http.StatusNotFound)
    72  		} else {
    73  			http.Error(w, err.Error(), http.StatusInternalServerError)
    74  		}
    75  		return
    76  	}
    77  
    78  	d, err := yaml.Marshal(&UserConfig{
    79  		TemplateFiles:      alertspb.ParseTemplates(cfg),
    80  		AlertmanagerConfig: cfg.RawConfig,
    81  	})
    82  
    83  	if err != nil {
    84  		level.Error(logger).Log("msg", errMarshallingYAML, "err", err, "user", userID)
    85  		http.Error(w, fmt.Sprintf("%s: %s", errMarshallingYAML, err.Error()), http.StatusInternalServerError)
    86  		return
    87  	}
    88  
    89  	w.Header().Set("Content-Type", "application/yaml")
    90  	if _, err := w.Write(d); err != nil {
    91  		http.Error(w, err.Error(), http.StatusInternalServerError)
    92  		return
    93  	}
    94  }
    95  
    96  func (am *MultitenantAlertmanager) SetUserConfig(w http.ResponseWriter, r *http.Request) {
    97  	logger := util_log.WithContext(r.Context(), am.logger)
    98  	userID, err := tenant.TenantID(r.Context())
    99  	if err != nil {
   100  		level.Error(logger).Log("msg", errNoOrgID, "err", err.Error())
   101  		http.Error(w, fmt.Sprintf("%s: %s", errNoOrgID, err.Error()), http.StatusUnauthorized)
   102  		return
   103  	}
   104  
   105  	var input io.Reader
   106  	maxConfigSize := am.limits.AlertmanagerMaxConfigSize(userID)
   107  	if maxConfigSize > 0 {
   108  		// LimitReader will return EOF after reading specified number of bytes. To check if
   109  		// we have read too many bytes, allow one extra byte.
   110  		input = io.LimitReader(r.Body, int64(maxConfigSize)+1)
   111  	} else {
   112  		input = r.Body
   113  	}
   114  
   115  	payload, err := ioutil.ReadAll(input)
   116  	if err != nil {
   117  		level.Error(logger).Log("msg", errReadingConfiguration, "err", err.Error())
   118  		http.Error(w, fmt.Sprintf("%s: %s", errReadingConfiguration, err.Error()), http.StatusBadRequest)
   119  		return
   120  	}
   121  
   122  	if maxConfigSize > 0 && len(payload) > maxConfigSize {
   123  		msg := fmt.Sprintf(errConfigurationTooBig, maxConfigSize)
   124  		level.Warn(logger).Log("msg", msg)
   125  		http.Error(w, msg, http.StatusBadRequest)
   126  		return
   127  	}
   128  
   129  	cfg := &UserConfig{}
   130  	err = yaml.Unmarshal(payload, cfg)
   131  	if err != nil {
   132  		level.Error(logger).Log("msg", errMarshallingYAML, "err", err.Error())
   133  		http.Error(w, fmt.Sprintf("%s: %s", errMarshallingYAML, err.Error()), http.StatusBadRequest)
   134  		return
   135  	}
   136  
   137  	cfgDesc := alertspb.ToProto(cfg.AlertmanagerConfig, cfg.TemplateFiles, userID)
   138  	if err := validateUserConfig(logger, cfgDesc, am.limits, userID); err != nil {
   139  		level.Warn(logger).Log("msg", errValidatingConfig, "err", err.Error())
   140  		http.Error(w, fmt.Sprintf("%s: %s", errValidatingConfig, err.Error()), http.StatusBadRequest)
   141  		return
   142  	}
   143  
   144  	err = am.store.SetAlertConfig(r.Context(), cfgDesc)
   145  	if err != nil {
   146  		level.Error(logger).Log("msg", errStoringConfiguration, "err", err.Error())
   147  		http.Error(w, fmt.Sprintf("%s: %s", errStoringConfiguration, err.Error()), http.StatusInternalServerError)
   148  		return
   149  	}
   150  
   151  	w.WriteHeader(http.StatusCreated)
   152  }
   153  
   154  // DeleteUserConfig is exposed via user-visible API (if enabled, uses DELETE method), but also as an internal endpoint using POST method.
   155  // Note that if no config exists for a user, StatusOK is returned.
   156  func (am *MultitenantAlertmanager) DeleteUserConfig(w http.ResponseWriter, r *http.Request) {
   157  	logger := util_log.WithContext(r.Context(), am.logger)
   158  	userID, err := tenant.TenantID(r.Context())
   159  	if err != nil {
   160  		level.Error(logger).Log("msg", errNoOrgID, "err", err.Error())
   161  		http.Error(w, fmt.Sprintf("%s: %s", errNoOrgID, err.Error()), http.StatusUnauthorized)
   162  		return
   163  	}
   164  
   165  	err = am.store.DeleteAlertConfig(r.Context(), userID)
   166  	if err != nil {
   167  		level.Error(logger).Log("msg", errDeletingConfiguration, "err", err.Error())
   168  		http.Error(w, fmt.Sprintf("%s: %s", errDeletingConfiguration, err.Error()), http.StatusInternalServerError)
   169  		return
   170  	}
   171  
   172  	w.WriteHeader(http.StatusOK)
   173  }
   174  
   175  // Partially copied from: https://github.com/prometheus/alertmanager/blob/8e861c646bf67599a1704fc843c6a94d519ce312/cli/check_config.go#L65-L96
   176  func validateUserConfig(logger log.Logger, cfg alertspb.AlertConfigDesc, limits Limits, user string) error {
   177  	// We don't have a valid use case for empty configurations. If a tenant does not have a
   178  	// configuration set and issue a request to the Alertmanager, we'll a) upload an empty
   179  	// config and b) immediately start an Alertmanager instance for them if a fallback
   180  	// configuration is provisioned.
   181  	if cfg.RawConfig == "" {
   182  		return fmt.Errorf("configuration provided is empty, if you'd like to remove your configuration please use the delete configuration endpoint")
   183  	}
   184  
   185  	amCfg, err := config.Load(cfg.RawConfig)
   186  	if err != nil {
   187  		return err
   188  	}
   189  
   190  	// Validate the config recursively scanning it.
   191  	if err := validateAlertmanagerConfig(amCfg); err != nil {
   192  		return err
   193  	}
   194  
   195  	// Validate templates referenced in the alertmanager config.
   196  	for _, name := range amCfg.Templates {
   197  		if err := validateTemplateFilename(name); err != nil {
   198  			return err
   199  		}
   200  	}
   201  
   202  	// Check template limits.
   203  	if l := limits.AlertmanagerMaxTemplatesCount(user); l > 0 && len(cfg.Templates) > l {
   204  		return fmt.Errorf(errTooManyTemplates, len(cfg.Templates), l)
   205  	}
   206  
   207  	if maxSize := limits.AlertmanagerMaxTemplateSize(user); maxSize > 0 {
   208  		for _, tmpl := range cfg.Templates {
   209  			if size := len(tmpl.GetBody()); size > maxSize {
   210  				return fmt.Errorf(errTemplateTooBig, tmpl.GetFilename(), size, maxSize)
   211  			}
   212  		}
   213  	}
   214  
   215  	// Validate template files.
   216  	for _, tmpl := range cfg.Templates {
   217  		if err := validateTemplateFilename(tmpl.Filename); err != nil {
   218  			return err
   219  		}
   220  	}
   221  
   222  	// Create templates on disk in a temporary directory.
   223  	// Note: This means the validation will succeed if we can write to tmp but
   224  	// not to configured data dir, and on the flipside, it'll fail if we can't write
   225  	// to tmpDir. Ignoring both cases for now as they're ultra rare but will revisit if
   226  	// we see this in the wild.
   227  	userTempDir, err := ioutil.TempDir("", "validate-config-"+cfg.User)
   228  	if err != nil {
   229  		return err
   230  	}
   231  	defer os.RemoveAll(userTempDir)
   232  
   233  	for _, tmpl := range cfg.Templates {
   234  		templateFilepath, err := safeTemplateFilepath(userTempDir, tmpl.Filename)
   235  		if err != nil {
   236  			level.Error(logger).Log("msg", "unable to create template file path", "err", err, "user", cfg.User)
   237  			return err
   238  		}
   239  
   240  		if _, err = storeTemplateFile(templateFilepath, tmpl.Body); err != nil {
   241  			level.Error(logger).Log("msg", "unable to store template file", "err", err, "user", cfg.User)
   242  			return fmt.Errorf("unable to store template file '%s'", tmpl.Filename)
   243  		}
   244  	}
   245  
   246  	templateFiles := make([]string, len(amCfg.Templates))
   247  	for i, t := range amCfg.Templates {
   248  		templateFiles[i] = filepath.Join(userTempDir, t)
   249  	}
   250  
   251  	_, err = template.FromGlobs(templateFiles...)
   252  	if err != nil {
   253  		return err
   254  	}
   255  
   256  	// Note: Not validating the MultitenantAlertmanager.transformConfig function as that
   257  	// that function shouldn't break configuration. Only way it can fail is if the base
   258  	// autoWebhookURL itself is broken. In that case, I would argue, we should accept the config
   259  	// not reject it.
   260  
   261  	return nil
   262  }
   263  
   264  func (am *MultitenantAlertmanager) ListAllConfigs(w http.ResponseWriter, r *http.Request) {
   265  	logger := util_log.WithContext(r.Context(), am.logger)
   266  	userIDs, err := am.store.ListAllUsers(r.Context())
   267  	if err != nil {
   268  		level.Error(logger).Log("msg", "failed to list users of alertmanager", "err", err)
   269  		http.Error(w, fmt.Sprintf("%s: %s", errListAllUser, err.Error()), http.StatusInternalServerError)
   270  		return
   271  	}
   272  
   273  	done := make(chan struct{})
   274  	iter := make(chan interface{})
   275  
   276  	go func() {
   277  		util.StreamWriteYAMLResponse(w, iter, logger)
   278  		close(done)
   279  	}()
   280  
   281  	err = concurrency.ForEachUser(r.Context(), userIDs, fetchConcurrency, func(ctx context.Context, userID string) error {
   282  		cfg, err := am.store.GetAlertConfig(ctx, userID)
   283  		if errors.Is(err, alertspb.ErrNotFound) {
   284  			return nil
   285  		} else if err != nil {
   286  			return errors.Wrapf(err, "failed to fetch alertmanager config for user %s", userID)
   287  		}
   288  		data := map[string]*UserConfig{
   289  			userID: {
   290  				TemplateFiles:      alertspb.ParseTemplates(cfg),
   291  				AlertmanagerConfig: cfg.RawConfig,
   292  			},
   293  		}
   294  
   295  		select {
   296  		case iter <- data:
   297  		case <-done: // stop early, if sending response has already finished
   298  		}
   299  
   300  		return nil
   301  	})
   302  	if err != nil {
   303  		level.Error(logger).Log("msg", "failed to list all alertmanager configs", "err", err)
   304  	}
   305  	close(iter)
   306  	<-done
   307  }
   308  
   309  // validateAlertmanagerConfig recursively scans the input config looking for data types for which
   310  // we have a specific validation and, whenever encountered, it runs their validation. Returns the
   311  // first error or nil if validation succeeds.
   312  func validateAlertmanagerConfig(cfg interface{}) error {
   313  	v := reflect.ValueOf(cfg)
   314  	t := v.Type()
   315  
   316  	// Skip invalid, the zero value or a nil pointer (checked by zero value).
   317  	if !v.IsValid() || v.IsZero() {
   318  		return nil
   319  	}
   320  
   321  	// If the input config is a pointer then we need to get its value.
   322  	// At this point the pointer value can't be nil.
   323  	if v.Kind() == reflect.Ptr {
   324  		v = v.Elem()
   325  		t = v.Type()
   326  	}
   327  
   328  	// Check if the input config is a data type for which we have a specific validation.
   329  	// At this point the value can't be a pointer anymore.
   330  	switch t {
   331  	case reflect.TypeOf(config.GlobalConfig{}):
   332  		if err := validateGlobalConfig(v.Interface().(config.GlobalConfig)); err != nil {
   333  			return err
   334  		}
   335  
   336  	case reflect.TypeOf(commoncfg.HTTPClientConfig{}):
   337  		if err := validateReceiverHTTPConfig(v.Interface().(commoncfg.HTTPClientConfig)); err != nil {
   338  			return err
   339  		}
   340  
   341  	case reflect.TypeOf(commoncfg.TLSConfig{}):
   342  		if err := validateReceiverTLSConfig(v.Interface().(commoncfg.TLSConfig)); err != nil {
   343  			return err
   344  		}
   345  
   346  	case reflect.TypeOf(config.SlackConfig{}):
   347  		if err := validateSlackConfig(v.Interface().(config.SlackConfig)); err != nil {
   348  			return err
   349  		}
   350  
   351  	case reflect.TypeOf(config.VictorOpsConfig{}):
   352  		if err := validateVictorOpsConfig(v.Interface().(config.VictorOpsConfig)); err != nil {
   353  			return err
   354  		}
   355  	}
   356  
   357  	// If the input config is a struct, recursively iterate on all fields.
   358  	if t.Kind() == reflect.Struct {
   359  		for i := 0; i < t.NumField(); i++ {
   360  			field := t.Field(i)
   361  			fieldValue := v.FieldByIndex(field.Index)
   362  
   363  			// Skip any field value which can't be converted to interface (eg. primitive types).
   364  			if fieldValue.CanInterface() {
   365  				if err := validateAlertmanagerConfig(fieldValue.Interface()); err != nil {
   366  					return err
   367  				}
   368  			}
   369  		}
   370  	}
   371  
   372  	if t.Kind() == reflect.Slice || t.Kind() == reflect.Array {
   373  		for i := 0; i < v.Len(); i++ {
   374  			fieldValue := v.Index(i)
   375  
   376  			// Skip any field value which can't be converted to interface (eg. primitive types).
   377  			if fieldValue.CanInterface() {
   378  				if err := validateAlertmanagerConfig(fieldValue.Interface()); err != nil {
   379  					return err
   380  				}
   381  			}
   382  		}
   383  	}
   384  
   385  	if t.Kind() == reflect.Map {
   386  		for _, key := range v.MapKeys() {
   387  			fieldValue := v.MapIndex(key)
   388  
   389  			// Skip any field value which can't be converted to interface (eg. primitive types).
   390  			if fieldValue.CanInterface() {
   391  				if err := validateAlertmanagerConfig(fieldValue.Interface()); err != nil {
   392  					return err
   393  				}
   394  			}
   395  		}
   396  	}
   397  
   398  	return nil
   399  }
   400  
   401  // validateReceiverHTTPConfig validates the HTTP config and returns an error if it contains
   402  // settings not allowed by Cortex.
   403  func validateReceiverHTTPConfig(cfg commoncfg.HTTPClientConfig) error {
   404  	if cfg.BasicAuth != nil && cfg.BasicAuth.PasswordFile != "" {
   405  		return errPasswordFileNotAllowed
   406  	}
   407  	if cfg.Authorization != nil && cfg.Authorization.CredentialsFile != "" {
   408  		return errPasswordFileNotAllowed
   409  	}
   410  	if cfg.BearerTokenFile != "" {
   411  		return errPasswordFileNotAllowed
   412  	}
   413  	if cfg.ProxyURL.URL != nil {
   414  		return errProxyURLNotAllowed
   415  	}
   416  	if cfg.OAuth2 != nil && cfg.OAuth2.ClientSecretFile != "" {
   417  		return errOAuth2SecretFileNotAllowed
   418  	}
   419  	return validateReceiverTLSConfig(cfg.TLSConfig)
   420  }
   421  
   422  // validateReceiverTLSConfig validates the TLS config and returns an error if it contains
   423  // settings not allowed by Cortex.
   424  func validateReceiverTLSConfig(cfg commoncfg.TLSConfig) error {
   425  	if cfg.CAFile != "" || cfg.CertFile != "" || cfg.KeyFile != "" {
   426  		return errTLSFileNotAllowed
   427  	}
   428  	return nil
   429  }
   430  
   431  // validateGlobalConfig validates the Global config and returns an error if it contains
   432  // settings now allowed by Cortex.
   433  func validateGlobalConfig(cfg config.GlobalConfig) error {
   434  	if cfg.SlackAPIURLFile != "" {
   435  		return errSlackAPIURLFileNotAllowed
   436  	}
   437  	return nil
   438  }
   439  
   440  // validateSlackConfig validates the Slack config and returns an error if it contains
   441  // settings now allowed by Cortex.
   442  func validateSlackConfig(cfg config.SlackConfig) error {
   443  	if cfg.APIURLFile != "" {
   444  		return errSlackAPIURLFileNotAllowed
   445  	}
   446  	return nil
   447  }
   448  
   449  // validateVictorOpsConfig validates the VictorOps config and returns an error if it contains
   450  // settings now allowed by Cortex.
   451  func validateVictorOpsConfig(cfg config.VictorOpsConfig) error {
   452  	if cfg.APIKeyFile != "" {
   453  		return errVictorOpsAPIKeyFileNotAllowed
   454  	}
   455  	return nil
   456  }