github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/alertmanager/api.go (about) 1 package alertmanager 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "io/ioutil" 8 "net/http" 9 "os" 10 "path/filepath" 11 "reflect" 12 13 "github.com/go-kit/log" 14 "github.com/go-kit/log/level" 15 "github.com/grafana/dskit/concurrency" 16 "github.com/pkg/errors" 17 "github.com/prometheus/alertmanager/config" 18 "github.com/prometheus/alertmanager/template" 19 commoncfg "github.com/prometheus/common/config" 20 "gopkg.in/yaml.v2" 21 22 "github.com/cortexproject/cortex/pkg/alertmanager/alertspb" 23 "github.com/cortexproject/cortex/pkg/tenant" 24 "github.com/cortexproject/cortex/pkg/util" 25 util_log "github.com/cortexproject/cortex/pkg/util/log" 26 ) 27 28 const ( 29 errMarshallingYAML = "error marshalling YAML Alertmanager config" 30 errValidatingConfig = "error validating Alertmanager config" 31 errReadingConfiguration = "unable to read the Alertmanager config" 32 errStoringConfiguration = "unable to store the Alertmanager config" 33 errDeletingConfiguration = "unable to delete the Alertmanager config" 34 errNoOrgID = "unable to determine the OrgID" 35 errListAllUser = "unable to list the Alertmanager users" 36 errConfigurationTooBig = "Alertmanager configuration is too big, limit: %d bytes" 37 errTooManyTemplates = "too many templates in the configuration: %d (limit: %d)" 38 errTemplateTooBig = "template %s is too big: %d bytes (limit: %d bytes)" 39 40 fetchConcurrency = 16 41 ) 42 43 var ( 44 errPasswordFileNotAllowed = errors.New("setting password_file, bearer_token_file and credentials_file is not allowed") 45 errOAuth2SecretFileNotAllowed = errors.New("setting OAuth2 client_secret_file is not allowed") 46 errProxyURLNotAllowed = errors.New("setting proxy_url is not allowed") 47 errTLSFileNotAllowed = errors.New("setting TLS ca_file, cert_file and key_file is not allowed") 48 errSlackAPIURLFileNotAllowed = errors.New("setting Slack api_url_file and global slack_api_url_file is not allowed") 49 errVictorOpsAPIKeyFileNotAllowed = errors.New("setting VictorOps api_key_file is not allowed") 50 ) 51 52 // UserConfig is used to communicate a users alertmanager configs 53 type UserConfig struct { 54 TemplateFiles map[string]string `yaml:"template_files"` 55 AlertmanagerConfig string `yaml:"alertmanager_config"` 56 } 57 58 func (am *MultitenantAlertmanager) GetUserConfig(w http.ResponseWriter, r *http.Request) { 59 logger := util_log.WithContext(r.Context(), am.logger) 60 61 userID, err := tenant.TenantID(r.Context()) 62 if err != nil { 63 level.Error(logger).Log("msg", errNoOrgID, "err", err.Error()) 64 http.Error(w, fmt.Sprintf("%s: %s", errNoOrgID, err.Error()), http.StatusUnauthorized) 65 return 66 } 67 68 cfg, err := am.store.GetAlertConfig(r.Context(), userID) 69 if err != nil { 70 if err == alertspb.ErrNotFound { 71 http.Error(w, err.Error(), http.StatusNotFound) 72 } else { 73 http.Error(w, err.Error(), http.StatusInternalServerError) 74 } 75 return 76 } 77 78 d, err := yaml.Marshal(&UserConfig{ 79 TemplateFiles: alertspb.ParseTemplates(cfg), 80 AlertmanagerConfig: cfg.RawConfig, 81 }) 82 83 if err != nil { 84 level.Error(logger).Log("msg", errMarshallingYAML, "err", err, "user", userID) 85 http.Error(w, fmt.Sprintf("%s: %s", errMarshallingYAML, err.Error()), http.StatusInternalServerError) 86 return 87 } 88 89 w.Header().Set("Content-Type", "application/yaml") 90 if _, err := w.Write(d); err != nil { 91 http.Error(w, err.Error(), http.StatusInternalServerError) 92 return 93 } 94 } 95 96 func (am *MultitenantAlertmanager) SetUserConfig(w http.ResponseWriter, r *http.Request) { 97 logger := util_log.WithContext(r.Context(), am.logger) 98 userID, err := tenant.TenantID(r.Context()) 99 if err != nil { 100 level.Error(logger).Log("msg", errNoOrgID, "err", err.Error()) 101 http.Error(w, fmt.Sprintf("%s: %s", errNoOrgID, err.Error()), http.StatusUnauthorized) 102 return 103 } 104 105 var input io.Reader 106 maxConfigSize := am.limits.AlertmanagerMaxConfigSize(userID) 107 if maxConfigSize > 0 { 108 // LimitReader will return EOF after reading specified number of bytes. To check if 109 // we have read too many bytes, allow one extra byte. 110 input = io.LimitReader(r.Body, int64(maxConfigSize)+1) 111 } else { 112 input = r.Body 113 } 114 115 payload, err := ioutil.ReadAll(input) 116 if err != nil { 117 level.Error(logger).Log("msg", errReadingConfiguration, "err", err.Error()) 118 http.Error(w, fmt.Sprintf("%s: %s", errReadingConfiguration, err.Error()), http.StatusBadRequest) 119 return 120 } 121 122 if maxConfigSize > 0 && len(payload) > maxConfigSize { 123 msg := fmt.Sprintf(errConfigurationTooBig, maxConfigSize) 124 level.Warn(logger).Log("msg", msg) 125 http.Error(w, msg, http.StatusBadRequest) 126 return 127 } 128 129 cfg := &UserConfig{} 130 err = yaml.Unmarshal(payload, cfg) 131 if err != nil { 132 level.Error(logger).Log("msg", errMarshallingYAML, "err", err.Error()) 133 http.Error(w, fmt.Sprintf("%s: %s", errMarshallingYAML, err.Error()), http.StatusBadRequest) 134 return 135 } 136 137 cfgDesc := alertspb.ToProto(cfg.AlertmanagerConfig, cfg.TemplateFiles, userID) 138 if err := validateUserConfig(logger, cfgDesc, am.limits, userID); err != nil { 139 level.Warn(logger).Log("msg", errValidatingConfig, "err", err.Error()) 140 http.Error(w, fmt.Sprintf("%s: %s", errValidatingConfig, err.Error()), http.StatusBadRequest) 141 return 142 } 143 144 err = am.store.SetAlertConfig(r.Context(), cfgDesc) 145 if err != nil { 146 level.Error(logger).Log("msg", errStoringConfiguration, "err", err.Error()) 147 http.Error(w, fmt.Sprintf("%s: %s", errStoringConfiguration, err.Error()), http.StatusInternalServerError) 148 return 149 } 150 151 w.WriteHeader(http.StatusCreated) 152 } 153 154 // DeleteUserConfig is exposed via user-visible API (if enabled, uses DELETE method), but also as an internal endpoint using POST method. 155 // Note that if no config exists for a user, StatusOK is returned. 156 func (am *MultitenantAlertmanager) DeleteUserConfig(w http.ResponseWriter, r *http.Request) { 157 logger := util_log.WithContext(r.Context(), am.logger) 158 userID, err := tenant.TenantID(r.Context()) 159 if err != nil { 160 level.Error(logger).Log("msg", errNoOrgID, "err", err.Error()) 161 http.Error(w, fmt.Sprintf("%s: %s", errNoOrgID, err.Error()), http.StatusUnauthorized) 162 return 163 } 164 165 err = am.store.DeleteAlertConfig(r.Context(), userID) 166 if err != nil { 167 level.Error(logger).Log("msg", errDeletingConfiguration, "err", err.Error()) 168 http.Error(w, fmt.Sprintf("%s: %s", errDeletingConfiguration, err.Error()), http.StatusInternalServerError) 169 return 170 } 171 172 w.WriteHeader(http.StatusOK) 173 } 174 175 // Partially copied from: https://github.com/prometheus/alertmanager/blob/8e861c646bf67599a1704fc843c6a94d519ce312/cli/check_config.go#L65-L96 176 func validateUserConfig(logger log.Logger, cfg alertspb.AlertConfigDesc, limits Limits, user string) error { 177 // We don't have a valid use case for empty configurations. If a tenant does not have a 178 // configuration set and issue a request to the Alertmanager, we'll a) upload an empty 179 // config and b) immediately start an Alertmanager instance for them if a fallback 180 // configuration is provisioned. 181 if cfg.RawConfig == "" { 182 return fmt.Errorf("configuration provided is empty, if you'd like to remove your configuration please use the delete configuration endpoint") 183 } 184 185 amCfg, err := config.Load(cfg.RawConfig) 186 if err != nil { 187 return err 188 } 189 190 // Validate the config recursively scanning it. 191 if err := validateAlertmanagerConfig(amCfg); err != nil { 192 return err 193 } 194 195 // Validate templates referenced in the alertmanager config. 196 for _, name := range amCfg.Templates { 197 if err := validateTemplateFilename(name); err != nil { 198 return err 199 } 200 } 201 202 // Check template limits. 203 if l := limits.AlertmanagerMaxTemplatesCount(user); l > 0 && len(cfg.Templates) > l { 204 return fmt.Errorf(errTooManyTemplates, len(cfg.Templates), l) 205 } 206 207 if maxSize := limits.AlertmanagerMaxTemplateSize(user); maxSize > 0 { 208 for _, tmpl := range cfg.Templates { 209 if size := len(tmpl.GetBody()); size > maxSize { 210 return fmt.Errorf(errTemplateTooBig, tmpl.GetFilename(), size, maxSize) 211 } 212 } 213 } 214 215 // Validate template files. 216 for _, tmpl := range cfg.Templates { 217 if err := validateTemplateFilename(tmpl.Filename); err != nil { 218 return err 219 } 220 } 221 222 // Create templates on disk in a temporary directory. 223 // Note: This means the validation will succeed if we can write to tmp but 224 // not to configured data dir, and on the flipside, it'll fail if we can't write 225 // to tmpDir. Ignoring both cases for now as they're ultra rare but will revisit if 226 // we see this in the wild. 227 userTempDir, err := ioutil.TempDir("", "validate-config-"+cfg.User) 228 if err != nil { 229 return err 230 } 231 defer os.RemoveAll(userTempDir) 232 233 for _, tmpl := range cfg.Templates { 234 templateFilepath, err := safeTemplateFilepath(userTempDir, tmpl.Filename) 235 if err != nil { 236 level.Error(logger).Log("msg", "unable to create template file path", "err", err, "user", cfg.User) 237 return err 238 } 239 240 if _, err = storeTemplateFile(templateFilepath, tmpl.Body); err != nil { 241 level.Error(logger).Log("msg", "unable to store template file", "err", err, "user", cfg.User) 242 return fmt.Errorf("unable to store template file '%s'", tmpl.Filename) 243 } 244 } 245 246 templateFiles := make([]string, len(amCfg.Templates)) 247 for i, t := range amCfg.Templates { 248 templateFiles[i] = filepath.Join(userTempDir, t) 249 } 250 251 _, err = template.FromGlobs(templateFiles...) 252 if err != nil { 253 return err 254 } 255 256 // Note: Not validating the MultitenantAlertmanager.transformConfig function as that 257 // that function shouldn't break configuration. Only way it can fail is if the base 258 // autoWebhookURL itself is broken. In that case, I would argue, we should accept the config 259 // not reject it. 260 261 return nil 262 } 263 264 func (am *MultitenantAlertmanager) ListAllConfigs(w http.ResponseWriter, r *http.Request) { 265 logger := util_log.WithContext(r.Context(), am.logger) 266 userIDs, err := am.store.ListAllUsers(r.Context()) 267 if err != nil { 268 level.Error(logger).Log("msg", "failed to list users of alertmanager", "err", err) 269 http.Error(w, fmt.Sprintf("%s: %s", errListAllUser, err.Error()), http.StatusInternalServerError) 270 return 271 } 272 273 done := make(chan struct{}) 274 iter := make(chan interface{}) 275 276 go func() { 277 util.StreamWriteYAMLResponse(w, iter, logger) 278 close(done) 279 }() 280 281 err = concurrency.ForEachUser(r.Context(), userIDs, fetchConcurrency, func(ctx context.Context, userID string) error { 282 cfg, err := am.store.GetAlertConfig(ctx, userID) 283 if errors.Is(err, alertspb.ErrNotFound) { 284 return nil 285 } else if err != nil { 286 return errors.Wrapf(err, "failed to fetch alertmanager config for user %s", userID) 287 } 288 data := map[string]*UserConfig{ 289 userID: { 290 TemplateFiles: alertspb.ParseTemplates(cfg), 291 AlertmanagerConfig: cfg.RawConfig, 292 }, 293 } 294 295 select { 296 case iter <- data: 297 case <-done: // stop early, if sending response has already finished 298 } 299 300 return nil 301 }) 302 if err != nil { 303 level.Error(logger).Log("msg", "failed to list all alertmanager configs", "err", err) 304 } 305 close(iter) 306 <-done 307 } 308 309 // validateAlertmanagerConfig recursively scans the input config looking for data types for which 310 // we have a specific validation and, whenever encountered, it runs their validation. Returns the 311 // first error or nil if validation succeeds. 312 func validateAlertmanagerConfig(cfg interface{}) error { 313 v := reflect.ValueOf(cfg) 314 t := v.Type() 315 316 // Skip invalid, the zero value or a nil pointer (checked by zero value). 317 if !v.IsValid() || v.IsZero() { 318 return nil 319 } 320 321 // If the input config is a pointer then we need to get its value. 322 // At this point the pointer value can't be nil. 323 if v.Kind() == reflect.Ptr { 324 v = v.Elem() 325 t = v.Type() 326 } 327 328 // Check if the input config is a data type for which we have a specific validation. 329 // At this point the value can't be a pointer anymore. 330 switch t { 331 case reflect.TypeOf(config.GlobalConfig{}): 332 if err := validateGlobalConfig(v.Interface().(config.GlobalConfig)); err != nil { 333 return err 334 } 335 336 case reflect.TypeOf(commoncfg.HTTPClientConfig{}): 337 if err := validateReceiverHTTPConfig(v.Interface().(commoncfg.HTTPClientConfig)); err != nil { 338 return err 339 } 340 341 case reflect.TypeOf(commoncfg.TLSConfig{}): 342 if err := validateReceiverTLSConfig(v.Interface().(commoncfg.TLSConfig)); err != nil { 343 return err 344 } 345 346 case reflect.TypeOf(config.SlackConfig{}): 347 if err := validateSlackConfig(v.Interface().(config.SlackConfig)); err != nil { 348 return err 349 } 350 351 case reflect.TypeOf(config.VictorOpsConfig{}): 352 if err := validateVictorOpsConfig(v.Interface().(config.VictorOpsConfig)); err != nil { 353 return err 354 } 355 } 356 357 // If the input config is a struct, recursively iterate on all fields. 358 if t.Kind() == reflect.Struct { 359 for i := 0; i < t.NumField(); i++ { 360 field := t.Field(i) 361 fieldValue := v.FieldByIndex(field.Index) 362 363 // Skip any field value which can't be converted to interface (eg. primitive types). 364 if fieldValue.CanInterface() { 365 if err := validateAlertmanagerConfig(fieldValue.Interface()); err != nil { 366 return err 367 } 368 } 369 } 370 } 371 372 if t.Kind() == reflect.Slice || t.Kind() == reflect.Array { 373 for i := 0; i < v.Len(); i++ { 374 fieldValue := v.Index(i) 375 376 // Skip any field value which can't be converted to interface (eg. primitive types). 377 if fieldValue.CanInterface() { 378 if err := validateAlertmanagerConfig(fieldValue.Interface()); err != nil { 379 return err 380 } 381 } 382 } 383 } 384 385 if t.Kind() == reflect.Map { 386 for _, key := range v.MapKeys() { 387 fieldValue := v.MapIndex(key) 388 389 // Skip any field value which can't be converted to interface (eg. primitive types). 390 if fieldValue.CanInterface() { 391 if err := validateAlertmanagerConfig(fieldValue.Interface()); err != nil { 392 return err 393 } 394 } 395 } 396 } 397 398 return nil 399 } 400 401 // validateReceiverHTTPConfig validates the HTTP config and returns an error if it contains 402 // settings not allowed by Cortex. 403 func validateReceiverHTTPConfig(cfg commoncfg.HTTPClientConfig) error { 404 if cfg.BasicAuth != nil && cfg.BasicAuth.PasswordFile != "" { 405 return errPasswordFileNotAllowed 406 } 407 if cfg.Authorization != nil && cfg.Authorization.CredentialsFile != "" { 408 return errPasswordFileNotAllowed 409 } 410 if cfg.BearerTokenFile != "" { 411 return errPasswordFileNotAllowed 412 } 413 if cfg.ProxyURL.URL != nil { 414 return errProxyURLNotAllowed 415 } 416 if cfg.OAuth2 != nil && cfg.OAuth2.ClientSecretFile != "" { 417 return errOAuth2SecretFileNotAllowed 418 } 419 return validateReceiverTLSConfig(cfg.TLSConfig) 420 } 421 422 // validateReceiverTLSConfig validates the TLS config and returns an error if it contains 423 // settings not allowed by Cortex. 424 func validateReceiverTLSConfig(cfg commoncfg.TLSConfig) error { 425 if cfg.CAFile != "" || cfg.CertFile != "" || cfg.KeyFile != "" { 426 return errTLSFileNotAllowed 427 } 428 return nil 429 } 430 431 // validateGlobalConfig validates the Global config and returns an error if it contains 432 // settings now allowed by Cortex. 433 func validateGlobalConfig(cfg config.GlobalConfig) error { 434 if cfg.SlackAPIURLFile != "" { 435 return errSlackAPIURLFileNotAllowed 436 } 437 return nil 438 } 439 440 // validateSlackConfig validates the Slack config and returns an error if it contains 441 // settings now allowed by Cortex. 442 func validateSlackConfig(cfg config.SlackConfig) error { 443 if cfg.APIURLFile != "" { 444 return errSlackAPIURLFileNotAllowed 445 } 446 return nil 447 } 448 449 // validateVictorOpsConfig validates the VictorOps config and returns an error if it contains 450 // settings now allowed by Cortex. 451 func validateVictorOpsConfig(cfg config.VictorOpsConfig) error { 452 if cfg.APIKeyFile != "" { 453 return errVictorOpsAPIKeyFileNotAllowed 454 } 455 return nil 456 }