github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/alertmanager/multitenant_test.go (about)

     1  package alertmanager
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/json"
     7  	"errors"
     8  	"fmt"
     9  	"io/ioutil"
    10  	"math/rand"
    11  	"net/http"
    12  	"net/http/httptest"
    13  	"net/http/pprof"
    14  	"os"
    15  	"path/filepath"
    16  	"regexp"
    17  	"strings"
    18  	"sync"
    19  	"testing"
    20  	"time"
    21  
    22  	"github.com/go-kit/log"
    23  	"github.com/grafana/dskit/concurrency"
    24  	"github.com/grafana/dskit/flagext"
    25  	"github.com/grafana/dskit/kv/consul"
    26  	"github.com/grafana/dskit/ring"
    27  	"github.com/grafana/dskit/services"
    28  	"github.com/prometheus/alertmanager/cluster/clusterpb"
    29  	"github.com/prometheus/alertmanager/notify"
    30  	"github.com/prometheus/alertmanager/pkg/labels"
    31  	"github.com/prometheus/alertmanager/types"
    32  	"github.com/prometheus/client_golang/prometheus"
    33  	"github.com/prometheus/client_golang/prometheus/testutil"
    34  	"github.com/prometheus/common/model"
    35  	"github.com/stretchr/testify/assert"
    36  	"github.com/stretchr/testify/require"
    37  	"github.com/thanos-io/thanos/pkg/objstore"
    38  	"github.com/weaveworks/common/httpgrpc"
    39  	"github.com/weaveworks/common/user"
    40  	"go.uber.org/atomic"
    41  	"golang.org/x/time/rate"
    42  	"google.golang.org/grpc"
    43  
    44  	"github.com/cortexproject/cortex/pkg/alertmanager/alertmanagerpb"
    45  	"github.com/cortexproject/cortex/pkg/alertmanager/alertspb"
    46  	"github.com/cortexproject/cortex/pkg/alertmanager/alertstore"
    47  	"github.com/cortexproject/cortex/pkg/alertmanager/alertstore/bucketclient"
    48  	"github.com/cortexproject/cortex/pkg/storage/bucket"
    49  	"github.com/cortexproject/cortex/pkg/util"
    50  	"github.com/cortexproject/cortex/pkg/util/test"
    51  	"github.com/cortexproject/cortex/pkg/util/validation"
    52  )
    53  
    54  var (
    55  	simpleConfigOne = `route:
    56    receiver: dummy
    57  
    58  receivers:
    59    - name: dummy`
    60  
    61  	simpleConfigTwo = `route:
    62    receiver: dummy
    63  
    64  receivers:
    65    - name: dummy`
    66  )
    67  
    68  func mockAlertmanagerConfig(t *testing.T) *MultitenantAlertmanagerConfig {
    69  	t.Helper()
    70  
    71  	externalURL := flagext.URLValue{}
    72  	err := externalURL.Set("http://localhost/api/prom")
    73  	require.NoError(t, err)
    74  
    75  	tempDir, err := ioutil.TempDir(os.TempDir(), "alertmanager")
    76  	require.NoError(t, err)
    77  
    78  	t.Cleanup(func() {
    79  		err := os.RemoveAll(tempDir)
    80  		require.NoError(t, err)
    81  	})
    82  
    83  	cfg := &MultitenantAlertmanagerConfig{}
    84  	flagext.DefaultValues(cfg)
    85  
    86  	cfg.ExternalURL = externalURL
    87  	cfg.DataDir = tempDir
    88  	cfg.ShardingRing.InstanceID = "test"
    89  	cfg.ShardingRing.InstanceAddr = "127.0.0.1"
    90  	cfg.PollInterval = time.Minute
    91  
    92  	return cfg
    93  }
    94  
    95  func TestMultitenantAlertmanagerConfig_Validate(t *testing.T) {
    96  	tests := map[string]struct {
    97  		setup    func(t *testing.T, cfg *MultitenantAlertmanagerConfig, storageCfg *alertstore.Config)
    98  		expected error
    99  	}{
   100  		"should pass with default config": {
   101  			setup:    func(t *testing.T, cfg *MultitenantAlertmanagerConfig, storageCfg *alertstore.Config) {},
   102  			expected: nil,
   103  		},
   104  		"should fail if persistent interval is 0": {
   105  			setup: func(t *testing.T, cfg *MultitenantAlertmanagerConfig, storageCfg *alertstore.Config) {
   106  				cfg.Persister.Interval = 0
   107  			},
   108  			expected: errInvalidPersistInterval,
   109  		},
   110  		"should fail if persistent interval is negative": {
   111  			setup: func(t *testing.T, cfg *MultitenantAlertmanagerConfig, storageCfg *alertstore.Config) {
   112  				cfg.Persister.Interval = -1
   113  			},
   114  			expected: errInvalidPersistInterval,
   115  		},
   116  		"should fail if external URL ends with /": {
   117  			setup: func(t *testing.T, cfg *MultitenantAlertmanagerConfig, storageCfg *alertstore.Config) {
   118  				require.NoError(t, cfg.ExternalURL.Set("http://localhost/prefix/"))
   119  			},
   120  			expected: errInvalidExternalURL,
   121  		},
   122  		"should succeed if external URL does not end with /": {
   123  			setup: func(t *testing.T, cfg *MultitenantAlertmanagerConfig, storageCfg *alertstore.Config) {
   124  				require.NoError(t, cfg.ExternalURL.Set("http://localhost/prefix"))
   125  			},
   126  			expected: nil,
   127  		},
   128  		"should succeed if sharding enabled and new storage configuration given with bucket client": {
   129  			setup: func(t *testing.T, cfg *MultitenantAlertmanagerConfig, storageCfg *alertstore.Config) {
   130  				cfg.ShardingEnabled = true
   131  				storageCfg.Backend = "s3"
   132  			},
   133  			expected: nil,
   134  		},
   135  		"should fail if sharding enabled and new storage store configuration given with local type": {
   136  			setup: func(t *testing.T, cfg *MultitenantAlertmanagerConfig, storageCfg *alertstore.Config) {
   137  				cfg.ShardingEnabled = true
   138  				storageCfg.Backend = "local"
   139  			},
   140  			expected: errShardingUnsupportedStorage,
   141  		},
   142  		"should fail if sharding enabled and new storage store configuration given with configdb type": {
   143  			setup: func(t *testing.T, cfg *MultitenantAlertmanagerConfig, storageCfg *alertstore.Config) {
   144  				cfg.ShardingEnabled = true
   145  				storageCfg.Backend = "configdb"
   146  			},
   147  			expected: errShardingUnsupportedStorage,
   148  		},
   149  		"should fail if sharding enabled and legacy store configuration given": {
   150  			setup: func(t *testing.T, cfg *MultitenantAlertmanagerConfig, storageCfg *alertstore.Config) {
   151  				cfg.ShardingEnabled = true
   152  				cfg.Store.Type = "s3"
   153  			},
   154  			expected: errShardingLegacyStorage,
   155  		},
   156  		"should fail if zone aware is enabled but zone is not set": {
   157  			setup: func(t *testing.T, cfg *MultitenantAlertmanagerConfig, storageCfg *alertstore.Config) {
   158  				cfg.ShardingEnabled = true
   159  				cfg.ShardingRing.ZoneAwarenessEnabled = true
   160  			},
   161  			expected: errZoneAwarenessEnabledWithoutZoneInfo,
   162  		},
   163  	}
   164  
   165  	for testName, testData := range tests {
   166  		t.Run(testName, func(t *testing.T) {
   167  			cfg := &MultitenantAlertmanagerConfig{}
   168  			storageCfg := alertstore.Config{}
   169  			flagext.DefaultValues(cfg)
   170  			flagext.DefaultValues(&storageCfg)
   171  			testData.setup(t, cfg, &storageCfg)
   172  			assert.Equal(t, testData.expected, cfg.Validate(storageCfg))
   173  		})
   174  	}
   175  }
   176  
   177  func TestMultitenantAlertmanager_loadAndSyncConfigs(t *testing.T) {
   178  	ctx := context.Background()
   179  
   180  	// Run this test using a real storage client.
   181  	store := prepareInMemoryAlertStore()
   182  	require.NoError(t, store.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
   183  		User:      "user1",
   184  		RawConfig: simpleConfigOne,
   185  		Templates: []*alertspb.TemplateDesc{},
   186  	}))
   187  	require.NoError(t, store.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
   188  		User:      "user2",
   189  		RawConfig: simpleConfigOne,
   190  		Templates: []*alertspb.TemplateDesc{},
   191  	}))
   192  
   193  	reg := prometheus.NewPedanticRegistry()
   194  	cfg := mockAlertmanagerConfig(t)
   195  	am, err := createMultitenantAlertmanager(cfg, nil, nil, store, nil, nil, log.NewNopLogger(), reg)
   196  	require.NoError(t, err)
   197  
   198  	// Ensure the configs are synced correctly
   199  	err = am.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   200  	require.NoError(t, err)
   201  	require.Len(t, am.alertmanagers, 2)
   202  
   203  	currentConfig, cfgExists := am.cfgs["user1"]
   204  	require.True(t, cfgExists)
   205  	require.Equal(t, simpleConfigOne, currentConfig.RawConfig)
   206  
   207  	assert.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(`
   208  		# HELP cortex_alertmanager_config_last_reload_successful Boolean set to 1 whenever the last configuration reload attempt was successful.
   209  		# TYPE cortex_alertmanager_config_last_reload_successful gauge
   210  		cortex_alertmanager_config_last_reload_successful{user="user1"} 1
   211  		cortex_alertmanager_config_last_reload_successful{user="user2"} 1
   212  	`), "cortex_alertmanager_config_last_reload_successful"))
   213  
   214  	// Ensure when a 3rd config is added, it is synced correctly
   215  	user3Cfg := alertspb.AlertConfigDesc{
   216  		User: "user3",
   217  		RawConfig: simpleConfigOne + `
   218  templates:
   219  - 'first.tpl'
   220  - 'second.tpl'
   221  `,
   222  		Templates: []*alertspb.TemplateDesc{
   223  			{
   224  				Filename: "first.tpl",
   225  				Body:     `{{ define "t1" }}Template 1 ... {{end}}`,
   226  			},
   227  			{
   228  				Filename: "second.tpl",
   229  				Body:     `{{ define "t2" }}Template 2{{ end}}`,
   230  			},
   231  		},
   232  	}
   233  	require.NoError(t, store.SetAlertConfig(ctx, user3Cfg))
   234  
   235  	err = am.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   236  	require.NoError(t, err)
   237  	require.Len(t, am.alertmanagers, 3)
   238  
   239  	dirs := am.getPerUserDirectories()
   240  	user3Dir := dirs["user3"]
   241  	require.NotZero(t, user3Dir)
   242  	require.True(t, dirExists(t, user3Dir))
   243  	require.True(t, dirExists(t, filepath.Join(user3Dir, templatesDir)))
   244  	require.True(t, fileExists(t, filepath.Join(user3Dir, templatesDir, "first.tpl")))
   245  	require.True(t, fileExists(t, filepath.Join(user3Dir, templatesDir, "second.tpl")))
   246  
   247  	assert.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(`
   248  		# HELP cortex_alertmanager_config_last_reload_successful Boolean set to 1 whenever the last configuration reload attempt was successful.
   249  		# TYPE cortex_alertmanager_config_last_reload_successful gauge
   250  		cortex_alertmanager_config_last_reload_successful{user="user1"} 1
   251  		cortex_alertmanager_config_last_reload_successful{user="user2"} 1
   252  		cortex_alertmanager_config_last_reload_successful{user="user3"} 1
   253  	`), "cortex_alertmanager_config_last_reload_successful"))
   254  
   255  	// Ensure the config is updated
   256  	require.NoError(t, store.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
   257  		User:      "user1",
   258  		RawConfig: simpleConfigTwo,
   259  		Templates: []*alertspb.TemplateDesc{},
   260  	}))
   261  
   262  	err = am.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   263  	require.NoError(t, err)
   264  
   265  	currentConfig, cfgExists = am.cfgs["user1"]
   266  	require.True(t, cfgExists)
   267  	require.Equal(t, simpleConfigTwo, currentConfig.RawConfig)
   268  
   269  	// Test Delete User, ensure config is removed and the resources are freed.
   270  	require.NoError(t, store.DeleteAlertConfig(ctx, "user3"))
   271  	err = am.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   272  	require.NoError(t, err)
   273  	currentConfig, cfgExists = am.cfgs["user3"]
   274  	require.False(t, cfgExists)
   275  	require.Equal(t, "", currentConfig.RawConfig)
   276  
   277  	_, cfgExists = am.alertmanagers["user3"]
   278  	require.False(t, cfgExists)
   279  	dirs = am.getPerUserDirectories()
   280  	require.NotZero(t, dirs["user1"])
   281  	require.NotZero(t, dirs["user2"])
   282  	require.Zero(t, dirs["user3"]) // User3 is deleted, so we should have no more files for it.
   283  	require.False(t, fileExists(t, user3Dir))
   284  
   285  	assert.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(`
   286  		# HELP cortex_alertmanager_config_last_reload_successful Boolean set to 1 whenever the last configuration reload attempt was successful.
   287  		# TYPE cortex_alertmanager_config_last_reload_successful gauge
   288  		cortex_alertmanager_config_last_reload_successful{user="user1"} 1
   289  		cortex_alertmanager_config_last_reload_successful{user="user2"} 1
   290  	`), "cortex_alertmanager_config_last_reload_successful"))
   291  
   292  	// Ensure when a 3rd config is re-added, it is synced correctly
   293  	require.NoError(t, store.SetAlertConfig(ctx, user3Cfg))
   294  
   295  	err = am.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   296  	require.NoError(t, err)
   297  
   298  	currentConfig, cfgExists = am.cfgs["user3"]
   299  	require.True(t, cfgExists)
   300  	require.Equal(t, user3Cfg.RawConfig, currentConfig.RawConfig)
   301  
   302  	_, cfgExists = am.alertmanagers["user3"]
   303  	require.True(t, cfgExists)
   304  	dirs = am.getPerUserDirectories()
   305  	require.NotZero(t, dirs["user1"])
   306  	require.NotZero(t, dirs["user2"])
   307  	require.Equal(t, user3Dir, dirs["user3"]) // Dir should exist, even though state files are not generated yet.
   308  
   309  	// Hierarchy that existed before should exist again.
   310  	require.True(t, dirExists(t, user3Dir))
   311  	require.True(t, dirExists(t, filepath.Join(user3Dir, templatesDir)))
   312  	require.True(t, fileExists(t, filepath.Join(user3Dir, templatesDir, "first.tpl")))
   313  	require.True(t, fileExists(t, filepath.Join(user3Dir, templatesDir, "second.tpl")))
   314  
   315  	assert.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(`
   316  		# HELP cortex_alertmanager_config_last_reload_successful Boolean set to 1 whenever the last configuration reload attempt was successful.
   317  		# TYPE cortex_alertmanager_config_last_reload_successful gauge
   318  		cortex_alertmanager_config_last_reload_successful{user="user1"} 1
   319  		cortex_alertmanager_config_last_reload_successful{user="user2"} 1
   320  		cortex_alertmanager_config_last_reload_successful{user="user3"} 1
   321  	`), "cortex_alertmanager_config_last_reload_successful"))
   322  
   323  	// Removed template files should be cleaned up
   324  	user3Cfg.Templates = []*alertspb.TemplateDesc{
   325  		{
   326  			Filename: "first.tpl",
   327  			Body:     `{{ define "t1" }}Template 1 ... {{end}}`,
   328  		},
   329  	}
   330  
   331  	require.NoError(t, store.SetAlertConfig(ctx, user3Cfg))
   332  
   333  	err = am.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   334  	require.NoError(t, err)
   335  
   336  	require.True(t, dirExists(t, user3Dir))
   337  	require.True(t, fileExists(t, filepath.Join(user3Dir, templatesDir, "first.tpl")))
   338  	require.False(t, fileExists(t, filepath.Join(user3Dir, templatesDir, "second.tpl")))
   339  }
   340  
   341  func TestMultitenantAlertmanager_FirewallShouldBlockHTTPBasedReceiversWhenEnabled(t *testing.T) {
   342  	tests := map[string]struct {
   343  		getAlertmanagerConfig func(backendURL string) string
   344  	}{
   345  		"webhook": {
   346  			getAlertmanagerConfig: func(backendURL string) string {
   347  				return fmt.Sprintf(`
   348  route:
   349    receiver: webhook
   350    group_wait: 0s
   351    group_interval: 1s
   352  
   353  receivers:
   354    - name: webhook
   355      webhook_configs:
   356        - url: %s
   357  `, backendURL)
   358  			},
   359  		},
   360  		"pagerduty": {
   361  			getAlertmanagerConfig: func(backendURL string) string {
   362  				return fmt.Sprintf(`
   363  route:
   364    receiver: pagerduty
   365    group_wait: 0s
   366    group_interval: 1s
   367  
   368  receivers:
   369    - name: pagerduty
   370      pagerduty_configs:
   371        - url: %s
   372          routing_key: secret
   373  `, backendURL)
   374  			},
   375  		},
   376  		"slack": {
   377  			getAlertmanagerConfig: func(backendURL string) string {
   378  				return fmt.Sprintf(`
   379  route:
   380    receiver: slack
   381    group_wait: 0s
   382    group_interval: 1s
   383  
   384  receivers:
   385    - name: slack
   386      slack_configs:
   387        - api_url: %s
   388          channel: test
   389  `, backendURL)
   390  			},
   391  		},
   392  		"opsgenie": {
   393  			getAlertmanagerConfig: func(backendURL string) string {
   394  				return fmt.Sprintf(`
   395  route:
   396    receiver: opsgenie
   397    group_wait: 0s
   398    group_interval: 1s
   399  
   400  receivers:
   401    - name: opsgenie
   402      opsgenie_configs:
   403        - api_url: %s
   404          api_key: secret
   405  `, backendURL)
   406  			},
   407  		},
   408  		"wechat": {
   409  			getAlertmanagerConfig: func(backendURL string) string {
   410  				return fmt.Sprintf(`
   411  route:
   412    receiver: wechat
   413    group_wait: 0s
   414    group_interval: 1s
   415  
   416  receivers:
   417    - name: wechat
   418      wechat_configs:
   419        - api_url: %s
   420          api_secret: secret
   421          corp_id: babycorp
   422  `, backendURL)
   423  			},
   424  		},
   425  	}
   426  
   427  	for receiverName, testData := range tests {
   428  		for _, firewallEnabled := range []bool{true, false} {
   429  			receiverName := receiverName
   430  			testData := testData
   431  			firewallEnabled := firewallEnabled
   432  
   433  			t.Run(fmt.Sprintf("receiver=%s firewall enabled=%v", receiverName, firewallEnabled), func(t *testing.T) {
   434  				t.Parallel()
   435  
   436  				ctx := context.Background()
   437  				userID := "user-1"
   438  				serverInvoked := atomic.NewBool(false)
   439  
   440  				// Create a local HTTP server to test whether the request is received.
   441  				server := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
   442  					serverInvoked.Store(true)
   443  					writer.WriteHeader(http.StatusOK)
   444  				}))
   445  				defer server.Close()
   446  
   447  				// Create the alertmanager config.
   448  				alertmanagerCfg := testData.getAlertmanagerConfig(fmt.Sprintf("http://%s", server.Listener.Addr().String()))
   449  
   450  				// Store the alertmanager config in the bucket.
   451  				store := prepareInMemoryAlertStore()
   452  				require.NoError(t, store.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
   453  					User:      userID,
   454  					RawConfig: alertmanagerCfg,
   455  				}))
   456  
   457  				// Prepare the alertmanager config.
   458  				cfg := mockAlertmanagerConfig(t)
   459  
   460  				// Prepare the limits config.
   461  				var limits validation.Limits
   462  				flagext.DefaultValues(&limits)
   463  				limits.AlertmanagerReceiversBlockPrivateAddresses = firewallEnabled
   464  
   465  				overrides, err := validation.NewOverrides(limits, nil)
   466  				require.NoError(t, err)
   467  
   468  				// Start the alertmanager.
   469  				reg := prometheus.NewPedanticRegistry()
   470  				logs := &concurrency.SyncBuffer{}
   471  				logger := log.NewLogfmtLogger(logs)
   472  				am, err := createMultitenantAlertmanager(cfg, nil, nil, store, nil, overrides, logger, reg)
   473  				require.NoError(t, err)
   474  				require.NoError(t, services.StartAndAwaitRunning(ctx, am))
   475  				t.Cleanup(func() {
   476  					require.NoError(t, services.StopAndAwaitTerminated(ctx, am))
   477  				})
   478  
   479  				// Ensure the configs are synced correctly.
   480  				assert.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(`
   481  		# HELP cortex_alertmanager_config_last_reload_successful Boolean set to 1 whenever the last configuration reload attempt was successful.
   482  		# TYPE cortex_alertmanager_config_last_reload_successful gauge
   483  		cortex_alertmanager_config_last_reload_successful{user="user-1"} 1
   484  	`), "cortex_alertmanager_config_last_reload_successful"))
   485  
   486  				// Create an alert to push.
   487  				alerts := types.Alerts(&types.Alert{
   488  					Alert: model.Alert{
   489  						Labels:   map[model.LabelName]model.LabelValue{model.AlertNameLabel: "test"},
   490  						StartsAt: time.Now().Add(-time.Minute),
   491  						EndsAt:   time.Now().Add(time.Minute),
   492  					},
   493  					UpdatedAt: time.Now(),
   494  					Timeout:   false,
   495  				})
   496  
   497  				alertsPayload, err := json.Marshal(alerts)
   498  				require.NoError(t, err)
   499  
   500  				// Push an alert.
   501  				req := httptest.NewRequest(http.MethodPost, cfg.ExternalURL.String()+"/api/v1/alerts", bytes.NewReader(alertsPayload))
   502  				req.Header.Set("content-type", "application/json")
   503  				reqCtx := user.InjectOrgID(req.Context(), userID)
   504  				{
   505  					w := httptest.NewRecorder()
   506  					am.ServeHTTP(w, req.WithContext(reqCtx))
   507  
   508  					resp := w.Result()
   509  					_, err := ioutil.ReadAll(resp.Body)
   510  					require.NoError(t, err)
   511  					assert.Equal(t, http.StatusOK, w.Code)
   512  				}
   513  
   514  				// Ensure the server endpoint has not been called if firewall is enabled. Since the alert is delivered
   515  				// asynchronously, we should pool it for a short period.
   516  				deadline := time.Now().Add(3 * time.Second)
   517  				for {
   518  					if time.Now().After(deadline) || serverInvoked.Load() {
   519  						break
   520  					}
   521  					time.Sleep(100 * time.Millisecond)
   522  				}
   523  
   524  				assert.Equal(t, !firewallEnabled, serverInvoked.Load())
   525  
   526  				// Print all alertmanager logs to have more information if this test fails in CI.
   527  				t.Logf("Alertmanager logs:\n%s", logs.String())
   528  			})
   529  		}
   530  	}
   531  }
   532  
   533  func TestMultitenantAlertmanager_migrateStateFilesToPerTenantDirectories(t *testing.T) {
   534  	ctx := context.Background()
   535  
   536  	const (
   537  		user1 = "user1"
   538  		user2 = "user2"
   539  	)
   540  
   541  	store := prepareInMemoryAlertStore()
   542  	require.NoError(t, store.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
   543  		User:      user2,
   544  		RawConfig: simpleConfigOne,
   545  		Templates: []*alertspb.TemplateDesc{},
   546  	}))
   547  
   548  	reg := prometheus.NewPedanticRegistry()
   549  	cfg := mockAlertmanagerConfig(t)
   550  	am, err := createMultitenantAlertmanager(cfg, nil, nil, store, nil, nil, log.NewNopLogger(), reg)
   551  	require.NoError(t, err)
   552  
   553  	createFile(t, filepath.Join(cfg.DataDir, "nflog:"+user1))
   554  	createFile(t, filepath.Join(cfg.DataDir, "silences:"+user1))
   555  	createFile(t, filepath.Join(cfg.DataDir, "nflog:"+user2))
   556  	createFile(t, filepath.Join(cfg.DataDir, "templates", user2, "template.tpl"))
   557  
   558  	require.NoError(t, am.migrateStateFilesToPerTenantDirectories())
   559  	require.True(t, fileExists(t, filepath.Join(cfg.DataDir, user1, notificationLogSnapshot)))
   560  	require.True(t, fileExists(t, filepath.Join(cfg.DataDir, user1, silencesSnapshot)))
   561  	require.True(t, fileExists(t, filepath.Join(cfg.DataDir, user2, notificationLogSnapshot)))
   562  	require.True(t, dirExists(t, filepath.Join(cfg.DataDir, user2, templatesDir)))
   563  	require.True(t, fileExists(t, filepath.Join(cfg.DataDir, user2, templatesDir, "template.tpl")))
   564  }
   565  
   566  func fileExists(t *testing.T, path string) bool {
   567  	return checkExists(t, path, false)
   568  }
   569  
   570  func dirExists(t *testing.T, path string) bool {
   571  	return checkExists(t, path, true)
   572  }
   573  
   574  func checkExists(t *testing.T, path string, dir bool) bool {
   575  	fi, err := os.Stat(path)
   576  	if err != nil {
   577  		if os.IsNotExist(err) {
   578  			return false
   579  		}
   580  		require.NoError(t, err)
   581  	}
   582  
   583  	require.Equal(t, dir, fi.IsDir())
   584  	return true
   585  }
   586  
   587  func TestMultitenantAlertmanager_deleteUnusedLocalUserState(t *testing.T) {
   588  	ctx := context.Background()
   589  
   590  	const (
   591  		user1 = "user1"
   592  		user2 = "user2"
   593  	)
   594  
   595  	store := prepareInMemoryAlertStore()
   596  	require.NoError(t, store.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
   597  		User:      user2,
   598  		RawConfig: simpleConfigOne,
   599  		Templates: []*alertspb.TemplateDesc{},
   600  	}))
   601  
   602  	reg := prometheus.NewPedanticRegistry()
   603  	cfg := mockAlertmanagerConfig(t)
   604  	am, err := createMultitenantAlertmanager(cfg, nil, nil, store, nil, nil, log.NewNopLogger(), reg)
   605  	require.NoError(t, err)
   606  
   607  	createFile(t, filepath.Join(cfg.DataDir, user1, notificationLogSnapshot))
   608  	createFile(t, filepath.Join(cfg.DataDir, user1, silencesSnapshot))
   609  	createFile(t, filepath.Join(cfg.DataDir, user2, notificationLogSnapshot))
   610  	createFile(t, filepath.Join(cfg.DataDir, user2, templatesDir, "template.tpl"))
   611  
   612  	dirs := am.getPerUserDirectories()
   613  	require.Equal(t, 2, len(dirs))
   614  	require.NotZero(t, dirs[user1])
   615  	require.NotZero(t, dirs[user2])
   616  
   617  	// Ensure the configs are synced correctly
   618  	err = am.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   619  	require.NoError(t, err)
   620  
   621  	// loadAndSyncConfigs also cleans up obsolete files. Let's verify that.
   622  	dirs = am.getPerUserDirectories()
   623  
   624  	require.Zero(t, dirs[user1])    // has no configuration, files were deleted
   625  	require.NotZero(t, dirs[user2]) // has config, files survived
   626  }
   627  
   628  func TestMultitenantAlertmanager_zoneAwareSharding(t *testing.T) {
   629  	ctx := context.Background()
   630  	alertStore := prepareInMemoryAlertStore()
   631  	ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil)
   632  	t.Cleanup(func() { assert.NoError(t, closer.Close()) })
   633  
   634  	const (
   635  		user1 = "user1"
   636  		user2 = "user2"
   637  		user3 = "user3"
   638  	)
   639  
   640  	createInstance := func(i int, zone string, registries *util.UserRegistries) *MultitenantAlertmanager {
   641  		reg := prometheus.NewPedanticRegistry()
   642  		cfg := mockAlertmanagerConfig(t)
   643  		instanceID := fmt.Sprintf("instance-%d", i)
   644  		registries.AddUserRegistry(instanceID, reg)
   645  
   646  		cfg.ShardingRing.ReplicationFactor = 2
   647  		cfg.ShardingRing.InstanceID = instanceID
   648  		cfg.ShardingRing.InstanceAddr = fmt.Sprintf("127.0.0.1-%d", i)
   649  		cfg.ShardingEnabled = true
   650  		cfg.ShardingRing.ZoneAwarenessEnabled = true
   651  		cfg.ShardingRing.InstanceZone = zone
   652  
   653  		am, err := createMultitenantAlertmanager(cfg, nil, nil, alertStore, ringStore, nil, log.NewLogfmtLogger(os.Stdout), reg)
   654  		require.NoError(t, err)
   655  		t.Cleanup(func() {
   656  			require.NoError(t, services.StopAndAwaitTerminated(ctx, am))
   657  		})
   658  		require.NoError(t, services.StartAndAwaitRunning(ctx, am))
   659  
   660  		return am
   661  	}
   662  
   663  	registriesZoneA := util.NewUserRegistries()
   664  	registriesZoneB := util.NewUserRegistries()
   665  
   666  	am1ZoneA := createInstance(1, "zoneA", registriesZoneA)
   667  	am2ZoneA := createInstance(2, "zoneA", registriesZoneA)
   668  	am1ZoneB := createInstance(3, "zoneB", registriesZoneB)
   669  
   670  	{
   671  		require.NoError(t, alertStore.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
   672  			User:      user1,
   673  			RawConfig: simpleConfigOne,
   674  			Templates: []*alertspb.TemplateDesc{},
   675  		}))
   676  		require.NoError(t, alertStore.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
   677  			User:      user2,
   678  			RawConfig: simpleConfigOne,
   679  			Templates: []*alertspb.TemplateDesc{},
   680  		}))
   681  		require.NoError(t, alertStore.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
   682  			User:      user3,
   683  			RawConfig: simpleConfigOne,
   684  			Templates: []*alertspb.TemplateDesc{},
   685  		}))
   686  
   687  		err := am1ZoneA.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   688  		require.NoError(t, err)
   689  		err = am2ZoneA.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   690  		require.NoError(t, err)
   691  		err = am1ZoneB.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   692  		require.NoError(t, err)
   693  	}
   694  
   695  	metricsZoneA := registriesZoneA.BuildMetricFamiliesPerUser()
   696  	metricsZoneB := registriesZoneB.BuildMetricFamiliesPerUser()
   697  
   698  	assert.Equal(t, float64(3), metricsZoneA.GetSumOfGauges("cortex_alertmanager_tenants_owned"))
   699  	assert.Equal(t, float64(3), metricsZoneB.GetSumOfGauges("cortex_alertmanager_tenants_owned"))
   700  }
   701  
   702  func TestMultitenantAlertmanager_deleteUnusedRemoteUserState(t *testing.T) {
   703  	ctx := context.Background()
   704  
   705  	const (
   706  		user1 = "user1"
   707  		user2 = "user2"
   708  	)
   709  
   710  	alertStore := prepareInMemoryAlertStore()
   711  	ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil)
   712  	t.Cleanup(func() { assert.NoError(t, closer.Close()) })
   713  
   714  	createInstance := func(i int) *MultitenantAlertmanager {
   715  		reg := prometheus.NewPedanticRegistry()
   716  		cfg := mockAlertmanagerConfig(t)
   717  
   718  		cfg.ShardingRing.ReplicationFactor = 1
   719  		cfg.ShardingRing.InstanceID = fmt.Sprintf("instance-%d", i)
   720  		cfg.ShardingRing.InstanceAddr = fmt.Sprintf("127.0.0.1-%d", i)
   721  		cfg.ShardingEnabled = true
   722  
   723  		// Increase state write interval so that state gets written sooner, making test faster.
   724  		cfg.Persister.Interval = 500 * time.Millisecond
   725  
   726  		am, err := createMultitenantAlertmanager(cfg, nil, nil, alertStore, ringStore, nil, log.NewLogfmtLogger(os.Stdout), reg)
   727  		require.NoError(t, err)
   728  		t.Cleanup(func() {
   729  			require.NoError(t, services.StopAndAwaitTerminated(ctx, am))
   730  		})
   731  		require.NoError(t, services.StartAndAwaitRunning(ctx, am))
   732  
   733  		return am
   734  	}
   735  
   736  	// Create two instances. With replication factor of 1, this means that only one
   737  	// of the instances will own the user. This tests that an instance does not delete
   738  	// state for users that are configured, but are owned by other instances.
   739  	am1 := createInstance(1)
   740  	am2 := createInstance(2)
   741  
   742  	// Configure the users and wait for the state persister to write some state for both.
   743  	{
   744  		require.NoError(t, alertStore.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
   745  			User:      user1,
   746  			RawConfig: simpleConfigOne,
   747  			Templates: []*alertspb.TemplateDesc{},
   748  		}))
   749  		require.NoError(t, alertStore.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
   750  			User:      user2,
   751  			RawConfig: simpleConfigOne,
   752  			Templates: []*alertspb.TemplateDesc{},
   753  		}))
   754  
   755  		err := am1.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   756  		require.NoError(t, err)
   757  		err = am2.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   758  		require.NoError(t, err)
   759  
   760  		require.Eventually(t, func() bool {
   761  			_, err1 := alertStore.GetFullState(context.Background(), user1)
   762  			_, err2 := alertStore.GetFullState(context.Background(), user2)
   763  			return err1 == nil && err2 == nil
   764  		}, 5*time.Second, 100*time.Millisecond, "timed out waiting for state to be persisted")
   765  	}
   766  
   767  	// Perform another sync to trigger cleanup; this should have no effect.
   768  	{
   769  		err := am1.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   770  		require.NoError(t, err)
   771  		err = am2.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   772  		require.NoError(t, err)
   773  
   774  		_, err = alertStore.GetFullState(context.Background(), user1)
   775  		require.NoError(t, err)
   776  		_, err = alertStore.GetFullState(context.Background(), user2)
   777  		require.NoError(t, err)
   778  	}
   779  
   780  	// Delete one configuration and trigger cleanup; state for only that user should be deleted.
   781  	{
   782  		require.NoError(t, alertStore.DeleteAlertConfig(ctx, user1))
   783  
   784  		err := am1.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   785  		require.NoError(t, err)
   786  		err = am2.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   787  		require.NoError(t, err)
   788  
   789  		_, err = alertStore.GetFullState(context.Background(), user1)
   790  		require.Equal(t, alertspb.ErrNotFound, err)
   791  		_, err = alertStore.GetFullState(context.Background(), user2)
   792  		require.NoError(t, err)
   793  	}
   794  }
   795  
   796  func createFile(t *testing.T, path string) string {
   797  	dir := filepath.Dir(path)
   798  	require.NoError(t, os.MkdirAll(dir, 0777))
   799  	f, err := os.Create(path)
   800  	require.NoError(t, err)
   801  	require.NoError(t, f.Close())
   802  	return path
   803  }
   804  
   805  func TestMultitenantAlertmanager_NoExternalURL(t *testing.T) {
   806  	amConfig := mockAlertmanagerConfig(t)
   807  	amConfig.ExternalURL = flagext.URLValue{} // no external URL
   808  
   809  	// Create the Multitenant Alertmanager.
   810  	reg := prometheus.NewPedanticRegistry()
   811  	_, err := NewMultitenantAlertmanager(amConfig, nil, nil, log.NewNopLogger(), reg)
   812  
   813  	require.EqualError(t, err, "unable to create Alertmanager because the external URL has not been configured")
   814  }
   815  
   816  func TestMultitenantAlertmanager_ServeHTTP(t *testing.T) {
   817  	// Run this test using a real storage client.
   818  	store := prepareInMemoryAlertStore()
   819  
   820  	amConfig := mockAlertmanagerConfig(t)
   821  
   822  	externalURL := flagext.URLValue{}
   823  	err := externalURL.Set("http://localhost:8080/alertmanager")
   824  	require.NoError(t, err)
   825  
   826  	amConfig.ExternalURL = externalURL
   827  
   828  	// Create the Multitenant Alertmanager.
   829  	reg := prometheus.NewPedanticRegistry()
   830  	am, err := createMultitenantAlertmanager(amConfig, nil, nil, store, nil, nil, log.NewNopLogger(), reg)
   831  	require.NoError(t, err)
   832  
   833  	require.NoError(t, services.StartAndAwaitRunning(context.Background(), am))
   834  	defer services.StopAndAwaitTerminated(context.Background(), am) //nolint:errcheck
   835  
   836  	// Request when no user configuration is present.
   837  	req := httptest.NewRequest("GET", externalURL.String(), nil)
   838  	ctx := user.InjectOrgID(req.Context(), "user1")
   839  
   840  	{
   841  		w := httptest.NewRecorder()
   842  		am.ServeHTTP(w, req.WithContext(ctx))
   843  
   844  		resp := w.Result()
   845  		body, _ := ioutil.ReadAll(resp.Body)
   846  		require.Equal(t, 404, w.Code)
   847  		require.Equal(t, "the Alertmanager is not configured\n", string(body))
   848  	}
   849  
   850  	// Create a configuration for the user in storage.
   851  	require.NoError(t, store.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
   852  		User:      "user1",
   853  		RawConfig: simpleConfigTwo,
   854  		Templates: []*alertspb.TemplateDesc{},
   855  	}))
   856  
   857  	// Make the alertmanager pick it up.
   858  	err = am.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   859  	require.NoError(t, err)
   860  
   861  	// Request when AM is active.
   862  	{
   863  		w := httptest.NewRecorder()
   864  		am.ServeHTTP(w, req.WithContext(ctx))
   865  
   866  		require.Equal(t, 301, w.Code) // redirect to UI
   867  	}
   868  
   869  	// Verify that GET /metrics returns 404 even when AM is active.
   870  	{
   871  		metricURL := externalURL.String() + "/metrics"
   872  		require.Equal(t, "http://localhost:8080/alertmanager/metrics", metricURL)
   873  		verify404(ctx, t, am, "GET", metricURL)
   874  	}
   875  
   876  	// Verify that POST /-/reload returns 404 even when AM is active.
   877  	{
   878  		metricURL := externalURL.String() + "/-/reload"
   879  		require.Equal(t, "http://localhost:8080/alertmanager/-/reload", metricURL)
   880  		verify404(ctx, t, am, "POST", metricURL)
   881  	}
   882  
   883  	// Verify that GET /debug/index returns 404 even when AM is active.
   884  	{
   885  		// Register pprof Index (under non-standard path, but this path is exposed by AM using default MUX!)
   886  		http.HandleFunc("/alertmanager/debug/index", pprof.Index)
   887  
   888  		metricURL := externalURL.String() + "/debug/index"
   889  		require.Equal(t, "http://localhost:8080/alertmanager/debug/index", metricURL)
   890  		verify404(ctx, t, am, "GET", metricURL)
   891  	}
   892  
   893  	// Remove the tenant's Alertmanager
   894  	require.NoError(t, store.DeleteAlertConfig(ctx, "user1"))
   895  	err = am.loadAndSyncConfigs(context.Background(), reasonPeriodic)
   896  	require.NoError(t, err)
   897  
   898  	{
   899  		// Request when the alertmanager is gone
   900  		w := httptest.NewRecorder()
   901  		am.ServeHTTP(w, req.WithContext(ctx))
   902  
   903  		resp := w.Result()
   904  		body, _ := ioutil.ReadAll(resp.Body)
   905  		require.Equal(t, 404, w.Code)
   906  		require.Equal(t, "the Alertmanager is not configured\n", string(body))
   907  	}
   908  }
   909  
   910  func verify404(ctx context.Context, t *testing.T, am *MultitenantAlertmanager, method string, url string) {
   911  	metricsReq := httptest.NewRequest(method, url, strings.NewReader("Hello")) // Body for POST Request.
   912  	w := httptest.NewRecorder()
   913  	am.ServeHTTP(w, metricsReq.WithContext(ctx))
   914  
   915  	require.Equal(t, 404, w.Code)
   916  }
   917  
   918  func TestMultitenantAlertmanager_ServeHTTPWithFallbackConfig(t *testing.T) {
   919  	ctx := context.Background()
   920  	amConfig := mockAlertmanagerConfig(t)
   921  
   922  	// Run this test using a real storage client.
   923  	store := prepareInMemoryAlertStore()
   924  
   925  	externalURL := flagext.URLValue{}
   926  	err := externalURL.Set("http://localhost:8080/alertmanager")
   927  	require.NoError(t, err)
   928  
   929  	fallbackCfg := `
   930  global:
   931    smtp_smarthost: 'localhost:25'
   932    smtp_from: 'youraddress@example.org'
   933  route:
   934    receiver: example-email
   935  receivers:
   936    - name: example-email
   937      email_configs:
   938      - to: 'youraddress@example.org'
   939  `
   940  	amConfig.ExternalURL = externalURL
   941  
   942  	// Create the Multitenant Alertmanager.
   943  	am, err := createMultitenantAlertmanager(amConfig, nil, nil, store, nil, nil, log.NewNopLogger(), nil)
   944  	require.NoError(t, err)
   945  	am.fallbackConfig = fallbackCfg
   946  
   947  	require.NoError(t, services.StartAndAwaitRunning(ctx, am))
   948  	defer services.StopAndAwaitTerminated(ctx, am) //nolint:errcheck
   949  
   950  	// Request when no user configuration is present.
   951  	req := httptest.NewRequest("GET", externalURL.String()+"/api/v1/status", nil)
   952  	w := httptest.NewRecorder()
   953  
   954  	am.ServeHTTP(w, req.WithContext(user.InjectOrgID(req.Context(), "user1")))
   955  
   956  	resp := w.Result()
   957  
   958  	// It succeeds and the Alertmanager is started.
   959  	require.Equal(t, http.StatusOK, resp.StatusCode)
   960  	require.Len(t, am.alertmanagers, 1)
   961  	_, exists := am.alertmanagers["user1"]
   962  	require.True(t, exists)
   963  
   964  	// Even after a poll...
   965  	err = am.loadAndSyncConfigs(ctx, reasonPeriodic)
   966  	require.NoError(t, err)
   967  
   968  	//  It does not remove the Alertmanager.
   969  	require.Len(t, am.alertmanagers, 1)
   970  	_, exists = am.alertmanagers["user1"]
   971  	require.True(t, exists)
   972  
   973  	// Remove the Alertmanager configuration.
   974  	require.NoError(t, store.DeleteAlertConfig(ctx, "user1"))
   975  	err = am.loadAndSyncConfigs(ctx, reasonPeriodic)
   976  	require.NoError(t, err)
   977  
   978  	// Even after removing it.. We start it again with the fallback configuration.
   979  	w = httptest.NewRecorder()
   980  	am.ServeHTTP(w, req.WithContext(user.InjectOrgID(req.Context(), "user1")))
   981  
   982  	resp = w.Result()
   983  	require.Equal(t, http.StatusOK, resp.StatusCode)
   984  }
   985  
   986  func TestMultitenantAlertmanager_InitialSyncWithSharding(t *testing.T) {
   987  	tc := []struct {
   988  		name          string
   989  		existing      bool
   990  		initialState  ring.InstanceState
   991  		initialTokens ring.Tokens
   992  	}{
   993  		{
   994  			name:     "with no instance in the ring",
   995  			existing: false,
   996  		},
   997  		{
   998  			name:          "with an instance already in the ring with PENDING state and no tokens",
   999  			existing:      true,
  1000  			initialState:  ring.PENDING,
  1001  			initialTokens: ring.Tokens{},
  1002  		},
  1003  		{
  1004  			name:          "with an instance already in the ring with JOINING state and some tokens",
  1005  			existing:      true,
  1006  			initialState:  ring.JOINING,
  1007  			initialTokens: ring.Tokens{1, 2, 3, 4, 5, 6, 7, 8, 9},
  1008  		},
  1009  		{
  1010  			name:          "with an instance already in the ring with ACTIVE state and all tokens",
  1011  			existing:      true,
  1012  			initialState:  ring.ACTIVE,
  1013  			initialTokens: ring.GenerateTokens(128, nil),
  1014  		},
  1015  		{
  1016  			name:          "with an instance already in the ring with LEAVING state and all tokens",
  1017  			existing:      true,
  1018  			initialState:  ring.LEAVING,
  1019  			initialTokens: ring.Tokens{100000},
  1020  		},
  1021  	}
  1022  
  1023  	for _, tt := range tc {
  1024  		t.Run(tt.name, func(t *testing.T) {
  1025  			ctx := context.Background()
  1026  			amConfig := mockAlertmanagerConfig(t)
  1027  			amConfig.ShardingEnabled = true
  1028  			ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil)
  1029  			t.Cleanup(func() { assert.NoError(t, closer.Close()) })
  1030  
  1031  			// Use an alert store with a mocked backend.
  1032  			bkt := &bucket.ClientMock{}
  1033  			alertStore := bucketclient.NewBucketAlertStore(bkt, nil, log.NewNopLogger())
  1034  
  1035  			// Setup the initial instance state in the ring.
  1036  			if tt.existing {
  1037  				require.NoError(t, ringStore.CAS(ctx, RingKey, func(in interface{}) (interface{}, bool, error) {
  1038  					ringDesc := ring.GetOrCreateRingDesc(in)
  1039  					ringDesc.AddIngester(amConfig.ShardingRing.InstanceID, amConfig.ShardingRing.InstanceAddr, "", tt.initialTokens, tt.initialState, time.Now())
  1040  					return ringDesc, true, nil
  1041  				}))
  1042  			}
  1043  
  1044  			am, err := createMultitenantAlertmanager(amConfig, nil, nil, alertStore, ringStore, nil, log.NewNopLogger(), nil)
  1045  			require.NoError(t, err)
  1046  			defer services.StopAndAwaitTerminated(ctx, am) //nolint:errcheck
  1047  
  1048  			// Before being registered in the ring.
  1049  			require.False(t, am.ringLifecycler.IsRegistered())
  1050  			require.Equal(t, ring.PENDING.String(), am.ringLifecycler.GetState().String())
  1051  			require.Equal(t, 0, len(am.ringLifecycler.GetTokens()))
  1052  			require.Equal(t, ring.Tokens{}, am.ringLifecycler.GetTokens())
  1053  
  1054  			// During the initial sync, we expect two things. That the instance is already
  1055  			// registered with the ring (meaning we have tokens) and that its state is JOINING.
  1056  			bkt.MockIterWithCallback("alerts/", nil, nil, func() {
  1057  				require.True(t, am.ringLifecycler.IsRegistered())
  1058  				require.Equal(t, ring.JOINING.String(), am.ringLifecycler.GetState().String())
  1059  			})
  1060  			bkt.MockIter("alertmanager/", nil, nil)
  1061  
  1062  			// Once successfully started, the instance should be ACTIVE in the ring.
  1063  			require.NoError(t, services.StartAndAwaitRunning(ctx, am))
  1064  
  1065  			// After being registered in the ring.
  1066  			require.True(t, am.ringLifecycler.IsRegistered())
  1067  			require.Equal(t, ring.ACTIVE.String(), am.ringLifecycler.GetState().String())
  1068  			require.Equal(t, 128, len(am.ringLifecycler.GetTokens()))
  1069  			require.Subset(t, am.ringLifecycler.GetTokens(), tt.initialTokens)
  1070  		})
  1071  	}
  1072  }
  1073  
  1074  func TestMultitenantAlertmanager_PerTenantSharding(t *testing.T) {
  1075  	tc := []struct {
  1076  		name              string
  1077  		tenantShardSize   int
  1078  		replicationFactor int
  1079  		instances         int
  1080  		configs           int
  1081  		expectedTenants   int
  1082  		withSharding      bool
  1083  	}{
  1084  		{
  1085  			name:            "sharding disabled, 1 instance",
  1086  			instances:       1,
  1087  			configs:         10,
  1088  			expectedTenants: 10,
  1089  		},
  1090  		{
  1091  			name:            "sharding disabled, 2 instances",
  1092  			instances:       2,
  1093  			configs:         10,
  1094  			expectedTenants: 10 * 2, // each instance loads _all_ tenants.
  1095  		},
  1096  		{
  1097  			name:              "sharding enabled, 1 instance, RF = 1",
  1098  			withSharding:      true,
  1099  			instances:         1,
  1100  			replicationFactor: 1,
  1101  			configs:           10,
  1102  			expectedTenants:   10, // same as no sharding and 1 instance
  1103  		},
  1104  		{
  1105  			name:              "sharding enabled, 2 instances, RF = 1",
  1106  			withSharding:      true,
  1107  			instances:         2,
  1108  			replicationFactor: 1,
  1109  			configs:           10,
  1110  			expectedTenants:   10, // configs * replication factor
  1111  		},
  1112  		{
  1113  			name:              "sharding enabled, 3 instances, RF = 2",
  1114  			withSharding:      true,
  1115  			instances:         3,
  1116  			replicationFactor: 2,
  1117  			configs:           10,
  1118  			expectedTenants:   20, // configs * replication factor
  1119  		},
  1120  		{
  1121  			name:              "sharding enabled, 5 instances, RF = 3",
  1122  			withSharding:      true,
  1123  			instances:         5,
  1124  			replicationFactor: 3,
  1125  			configs:           10,
  1126  			expectedTenants:   30, // configs * replication factor
  1127  		},
  1128  	}
  1129  
  1130  	for _, tt := range tc {
  1131  		t.Run(tt.name, func(t *testing.T) {
  1132  			ctx := context.Background()
  1133  			ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil)
  1134  			t.Cleanup(func() { assert.NoError(t, closer.Close()) })
  1135  
  1136  			alertStore := prepareInMemoryAlertStore()
  1137  
  1138  			var instances []*MultitenantAlertmanager
  1139  			var instanceIDs []string
  1140  			registries := util.NewUserRegistries()
  1141  
  1142  			// First, add the number of configs to the store.
  1143  			for i := 1; i <= tt.configs; i++ {
  1144  				u := fmt.Sprintf("u-%d", i)
  1145  				require.NoError(t, alertStore.SetAlertConfig(context.Background(), alertspb.AlertConfigDesc{
  1146  					User:      u,
  1147  					RawConfig: simpleConfigOne,
  1148  					Templates: []*alertspb.TemplateDesc{},
  1149  				}))
  1150  			}
  1151  
  1152  			// Then, create the alertmanager instances, start them and add their registries to the slice.
  1153  			for i := 1; i <= tt.instances; i++ {
  1154  				instanceIDs = append(instanceIDs, fmt.Sprintf("alertmanager-%d", i))
  1155  				instanceID := fmt.Sprintf("alertmanager-%d", i)
  1156  
  1157  				amConfig := mockAlertmanagerConfig(t)
  1158  				amConfig.ShardingRing.ReplicationFactor = tt.replicationFactor
  1159  				amConfig.ShardingRing.InstanceID = instanceID
  1160  				amConfig.ShardingRing.InstanceAddr = fmt.Sprintf("127.0.0.%d", i)
  1161  				// Do not check the ring topology changes or poll in an interval in this test (we explicitly sync alertmanagers).
  1162  				amConfig.PollInterval = time.Hour
  1163  				amConfig.ShardingRing.RingCheckPeriod = time.Hour
  1164  
  1165  				if tt.withSharding {
  1166  					amConfig.ShardingEnabled = true
  1167  				}
  1168  
  1169  				reg := prometheus.NewPedanticRegistry()
  1170  				am, err := createMultitenantAlertmanager(amConfig, nil, nil, alertStore, ringStore, nil, log.NewNopLogger(), reg)
  1171  				require.NoError(t, err)
  1172  				defer services.StopAndAwaitTerminated(ctx, am) //nolint:errcheck
  1173  
  1174  				require.NoError(t, services.StartAndAwaitRunning(ctx, am))
  1175  
  1176  				instances = append(instances, am)
  1177  				instanceIDs = append(instanceIDs, instanceID)
  1178  				registries.AddUserRegistry(instanceID, reg)
  1179  			}
  1180  
  1181  			// If we're testing sharding, we need make sure the ring is settled.
  1182  			if tt.withSharding {
  1183  				ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
  1184  				defer cancel()
  1185  
  1186  				// The alertmanager is ready to be tested once all instances are ACTIVE and the ring settles.
  1187  				for _, am := range instances {
  1188  					for _, id := range instanceIDs {
  1189  						require.NoError(t, ring.WaitInstanceState(ctx, am.ring, id, ring.ACTIVE))
  1190  					}
  1191  				}
  1192  			}
  1193  
  1194  			// Now that the ring has settled, sync configs with the instances.
  1195  			var numConfigs, numInstances int
  1196  			for _, am := range instances {
  1197  				err := am.loadAndSyncConfigs(ctx, reasonRingChange)
  1198  				require.NoError(t, err)
  1199  				numConfigs += len(am.cfgs)
  1200  				numInstances += len(am.alertmanagers)
  1201  			}
  1202  
  1203  			metrics := registries.BuildMetricFamiliesPerUser()
  1204  			assert.Equal(t, tt.expectedTenants, numConfigs)
  1205  			assert.Equal(t, tt.expectedTenants, numInstances)
  1206  			assert.Equal(t, float64(tt.expectedTenants), metrics.GetSumOfGauges("cortex_alertmanager_tenants_owned"))
  1207  			assert.Equal(t, float64(tt.configs*tt.instances), metrics.GetSumOfGauges("cortex_alertmanager_tenants_discovered"))
  1208  		})
  1209  	}
  1210  }
  1211  
  1212  func TestMultitenantAlertmanager_SyncOnRingTopologyChanges(t *testing.T) {
  1213  	registeredAt := time.Now()
  1214  
  1215  	tc := []struct {
  1216  		name       string
  1217  		setupRing  func(desc *ring.Desc)
  1218  		updateRing func(desc *ring.Desc)
  1219  		expected   bool
  1220  	}{
  1221  		{
  1222  			name: "when an instance is added to the ring",
  1223  			setupRing: func(desc *ring.Desc) {
  1224  				desc.AddIngester("alertmanager-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt)
  1225  			},
  1226  			updateRing: func(desc *ring.Desc) {
  1227  				desc.AddIngester("alertmanager-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.ACTIVE, registeredAt)
  1228  			},
  1229  			expected: true,
  1230  		},
  1231  		{
  1232  			name: "when an instance is removed from the ring",
  1233  			setupRing: func(desc *ring.Desc) {
  1234  				desc.AddIngester("alertmanager-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt)
  1235  				desc.AddIngester("alertmanager-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.ACTIVE, registeredAt)
  1236  			},
  1237  			updateRing: func(desc *ring.Desc) {
  1238  				desc.RemoveIngester("alertmanager-1")
  1239  			},
  1240  			expected: true,
  1241  		},
  1242  		{
  1243  			name: "should sync when an instance changes state",
  1244  			setupRing: func(desc *ring.Desc) {
  1245  				desc.AddIngester("alertmanager-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt)
  1246  				desc.AddIngester("alertmanager-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.JOINING, registeredAt)
  1247  			},
  1248  			updateRing: func(desc *ring.Desc) {
  1249  				instance := desc.Ingesters["alertmanager-2"]
  1250  				instance.State = ring.ACTIVE
  1251  				desc.Ingesters["alertmanager-2"] = instance
  1252  			},
  1253  			expected: true,
  1254  		},
  1255  		{
  1256  			name: "should sync when an healthy instance becomes unhealthy",
  1257  			setupRing: func(desc *ring.Desc) {
  1258  				desc.AddIngester("alertmanager-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt)
  1259  				desc.AddIngester("alertmanager-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.ACTIVE, registeredAt)
  1260  			},
  1261  			updateRing: func(desc *ring.Desc) {
  1262  				instance := desc.Ingesters["alertmanager-1"]
  1263  				instance.Timestamp = time.Now().Add(-time.Hour).Unix()
  1264  				desc.Ingesters["alertmanager-1"] = instance
  1265  			},
  1266  			expected: true,
  1267  		},
  1268  		{
  1269  			name: "should sync when an unhealthy instance becomes healthy",
  1270  			setupRing: func(desc *ring.Desc) {
  1271  				desc.AddIngester("alertmanager-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt)
  1272  
  1273  				instance := desc.AddIngester("alertmanager-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.ACTIVE, registeredAt)
  1274  				instance.Timestamp = time.Now().Add(-time.Hour).Unix()
  1275  				desc.Ingesters["alertmanager-2"] = instance
  1276  			},
  1277  			updateRing: func(desc *ring.Desc) {
  1278  				instance := desc.Ingesters["alertmanager-2"]
  1279  				instance.Timestamp = time.Now().Unix()
  1280  				desc.Ingesters["alertmanager-2"] = instance
  1281  			},
  1282  			expected: true,
  1283  		},
  1284  		{
  1285  			name: "should NOT sync when an instance updates the heartbeat",
  1286  			setupRing: func(desc *ring.Desc) {
  1287  				desc.AddIngester("alertmanager-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt)
  1288  				desc.AddIngester("alertmanager-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.ACTIVE, registeredAt)
  1289  			},
  1290  			updateRing: func(desc *ring.Desc) {
  1291  				instance := desc.Ingesters["alertmanager-1"]
  1292  				instance.Timestamp = time.Now().Add(time.Second).Unix()
  1293  				desc.Ingesters["alertmanager-1"] = instance
  1294  			},
  1295  			expected: false,
  1296  		},
  1297  		{
  1298  			name: "should NOT sync when an instance is auto-forgotten in the ring but was already unhealthy in the previous state",
  1299  			setupRing: func(desc *ring.Desc) {
  1300  				desc.AddIngester("alertmanager-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt)
  1301  				desc.AddIngester("alertmanager-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.ACTIVE, registeredAt)
  1302  
  1303  				instance := desc.Ingesters["alertmanager-2"]
  1304  				instance.Timestamp = time.Now().Add(-time.Hour).Unix()
  1305  				desc.Ingesters["alertmanager-2"] = instance
  1306  			},
  1307  			updateRing: func(desc *ring.Desc) {
  1308  				desc.RemoveIngester("alertmanager-2")
  1309  			},
  1310  			expected: false,
  1311  		},
  1312  	}
  1313  
  1314  	for _, tt := range tc {
  1315  		t.Run(tt.name, func(t *testing.T) {
  1316  			ctx := context.Background()
  1317  			amConfig := mockAlertmanagerConfig(t)
  1318  			amConfig.ShardingEnabled = true
  1319  			amConfig.ShardingRing.RingCheckPeriod = 100 * time.Millisecond
  1320  			amConfig.PollInterval = time.Hour // Don't trigger the periodic check.
  1321  
  1322  			ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil)
  1323  			t.Cleanup(func() { assert.NoError(t, closer.Close()) })
  1324  
  1325  			alertStore := prepareInMemoryAlertStore()
  1326  
  1327  			reg := prometheus.NewPedanticRegistry()
  1328  			am, err := createMultitenantAlertmanager(amConfig, nil, nil, alertStore, ringStore, nil, log.NewNopLogger(), reg)
  1329  			require.NoError(t, err)
  1330  
  1331  			require.NoError(t, ringStore.CAS(ctx, RingKey, func(in interface{}) (interface{}, bool, error) {
  1332  				ringDesc := ring.GetOrCreateRingDesc(in)
  1333  				tt.setupRing(ringDesc)
  1334  				return ringDesc, true, nil
  1335  			}))
  1336  
  1337  			require.NoError(t, services.StartAndAwaitRunning(ctx, am))
  1338  			defer services.StopAndAwaitTerminated(ctx, am) //nolint:errcheck
  1339  
  1340  			// Make sure the initial sync happened.
  1341  			regs := util.NewUserRegistries()
  1342  			regs.AddUserRegistry("test", reg)
  1343  			metrics := regs.BuildMetricFamiliesPerUser()
  1344  			assert.Equal(t, float64(1), metrics.GetSumOfCounters("cortex_alertmanager_sync_configs_total"))
  1345  
  1346  			// Change the ring topology.
  1347  			require.NoError(t, ringStore.CAS(ctx, RingKey, func(in interface{}) (interface{}, bool, error) {
  1348  				ringDesc := ring.GetOrCreateRingDesc(in)
  1349  				tt.updateRing(ringDesc)
  1350  				return ringDesc, true, nil
  1351  			}))
  1352  
  1353  			// Assert if we expected an additional sync or not.
  1354  			expectedSyncs := 1
  1355  			if tt.expected {
  1356  				expectedSyncs++
  1357  			}
  1358  			test.Poll(t, 3*time.Second, float64(expectedSyncs), func() interface{} {
  1359  				metrics := regs.BuildMetricFamiliesPerUser()
  1360  				return metrics.GetSumOfCounters("cortex_alertmanager_sync_configs_total")
  1361  			})
  1362  		})
  1363  	}
  1364  }
  1365  
  1366  func TestMultitenantAlertmanager_RingLifecyclerShouldAutoForgetUnhealthyInstances(t *testing.T) {
  1367  	const unhealthyInstanceID = "alertmanager-bad-1"
  1368  	const heartbeatTimeout = time.Minute
  1369  	ctx := context.Background()
  1370  	amConfig := mockAlertmanagerConfig(t)
  1371  	amConfig.ShardingEnabled = true
  1372  	amConfig.ShardingRing.HeartbeatPeriod = 100 * time.Millisecond
  1373  	amConfig.ShardingRing.HeartbeatTimeout = heartbeatTimeout
  1374  
  1375  	ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil)
  1376  	t.Cleanup(func() { assert.NoError(t, closer.Close()) })
  1377  
  1378  	alertStore := prepareInMemoryAlertStore()
  1379  
  1380  	am, err := createMultitenantAlertmanager(amConfig, nil, nil, alertStore, ringStore, nil, log.NewNopLogger(), nil)
  1381  	require.NoError(t, err)
  1382  	require.NoError(t, services.StartAndAwaitRunning(ctx, am))
  1383  	defer services.StopAndAwaitTerminated(ctx, am) //nolint:errcheck
  1384  
  1385  	require.NoError(t, ringStore.CAS(ctx, RingKey, func(in interface{}) (interface{}, bool, error) {
  1386  		ringDesc := ring.GetOrCreateRingDesc(in)
  1387  		instance := ringDesc.AddIngester(unhealthyInstanceID, "127.0.0.1", "", ring.GenerateTokens(RingNumTokens, nil), ring.ACTIVE, time.Now())
  1388  		instance.Timestamp = time.Now().Add(-(ringAutoForgetUnhealthyPeriods + 1) * heartbeatTimeout).Unix()
  1389  		ringDesc.Ingesters[unhealthyInstanceID] = instance
  1390  
  1391  		return ringDesc, true, nil
  1392  	}))
  1393  
  1394  	test.Poll(t, time.Second, false, func() interface{} {
  1395  		d, err := ringStore.Get(ctx, RingKey)
  1396  		if err != nil {
  1397  			return err
  1398  		}
  1399  
  1400  		_, ok := ring.GetOrCreateRingDesc(d).Ingesters[unhealthyInstanceID]
  1401  		return ok
  1402  	})
  1403  }
  1404  
  1405  func TestMultitenantAlertmanager_InitialSyncFailureWithSharding(t *testing.T) {
  1406  	ctx := context.Background()
  1407  	amConfig := mockAlertmanagerConfig(t)
  1408  	amConfig.ShardingEnabled = true
  1409  	ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil)
  1410  	t.Cleanup(func() { assert.NoError(t, closer.Close()) })
  1411  
  1412  	// Mock the store to fail listing configs.
  1413  	bkt := &bucket.ClientMock{}
  1414  	bkt.MockIter("alerts/", nil, errors.New("failed to list alerts"))
  1415  	bkt.MockIter("alertmanager/", nil, nil)
  1416  	store := bucketclient.NewBucketAlertStore(bkt, nil, log.NewNopLogger())
  1417  
  1418  	am, err := createMultitenantAlertmanager(amConfig, nil, nil, store, ringStore, nil, log.NewNopLogger(), nil)
  1419  	require.NoError(t, err)
  1420  	defer services.StopAndAwaitTerminated(ctx, am) //nolint:errcheck
  1421  
  1422  	require.NoError(t, am.StartAsync(ctx))
  1423  	err = am.AwaitRunning(ctx)
  1424  	require.Error(t, err)
  1425  	require.Equal(t, services.Failed, am.State())
  1426  	require.False(t, am.ringLifecycler.IsRegistered())
  1427  	require.NotNil(t, am.ring)
  1428  }
  1429  
  1430  func TestAlertmanager_ReplicasPosition(t *testing.T) {
  1431  	ctx := context.Background()
  1432  	ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil)
  1433  	t.Cleanup(func() { assert.NoError(t, closer.Close()) })
  1434  
  1435  	mockStore := prepareInMemoryAlertStore()
  1436  	require.NoError(t, mockStore.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
  1437  		User:      "user-1",
  1438  		RawConfig: simpleConfigOne,
  1439  		Templates: []*alertspb.TemplateDesc{},
  1440  	}))
  1441  
  1442  	var instances []*MultitenantAlertmanager
  1443  	var instanceIDs []string
  1444  	registries := util.NewUserRegistries()
  1445  
  1446  	// First, create the alertmanager instances, we'll use a replication factor of 3 and create 3 instances so that we can get the tenant on each replica.
  1447  	for i := 1; i <= 3; i++ {
  1448  		// instanceIDs = append(instanceIDs, fmt.Sprintf("alertmanager-%d", i))
  1449  		instanceID := fmt.Sprintf("alertmanager-%d", i)
  1450  
  1451  		amConfig := mockAlertmanagerConfig(t)
  1452  		amConfig.ShardingRing.ReplicationFactor = 3
  1453  		amConfig.ShardingRing.InstanceID = instanceID
  1454  		amConfig.ShardingRing.InstanceAddr = fmt.Sprintf("127.0.0.%d", i)
  1455  
  1456  		// Do not check the ring topology changes or poll in an interval in this test (we explicitly sync alertmanagers).
  1457  		amConfig.PollInterval = time.Hour
  1458  		amConfig.ShardingRing.RingCheckPeriod = time.Hour
  1459  		amConfig.ShardingEnabled = true
  1460  
  1461  		reg := prometheus.NewPedanticRegistry()
  1462  		am, err := createMultitenantAlertmanager(amConfig, nil, nil, mockStore, ringStore, nil, log.NewNopLogger(), reg)
  1463  		require.NoError(t, err)
  1464  		defer services.StopAndAwaitTerminated(ctx, am) //nolint:errcheck
  1465  
  1466  		require.NoError(t, services.StartAndAwaitRunning(ctx, am))
  1467  
  1468  		instances = append(instances, am)
  1469  		instanceIDs = append(instanceIDs, instanceID)
  1470  		registries.AddUserRegistry(instanceID, reg)
  1471  	}
  1472  
  1473  	// We need make sure the ring is settled. The alertmanager is ready to be tested once all instances are ACTIVE and the ring settles.
  1474  	ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
  1475  	defer cancel()
  1476  
  1477  	for _, am := range instances {
  1478  		for _, id := range instanceIDs {
  1479  			require.NoError(t, ring.WaitInstanceState(ctx, am.ring, id, ring.ACTIVE))
  1480  		}
  1481  	}
  1482  
  1483  	// Now that the ring has settled, sync configs with the instances.
  1484  	for _, am := range instances {
  1485  		err := am.loadAndSyncConfigs(ctx, reasonRingChange)
  1486  		require.NoError(t, err)
  1487  	}
  1488  
  1489  	// Now that the ring has settled, we expect each AM instance to have a different position.
  1490  	// Let's walk through them and collect the positions.
  1491  	var positions []int
  1492  	for _, instance := range instances {
  1493  		instance.alertmanagersMtx.Lock()
  1494  		am, ok := instance.alertmanagers["user-1"]
  1495  		require.True(t, ok)
  1496  		positions = append(positions, am.state.Position())
  1497  		instance.alertmanagersMtx.Unlock()
  1498  	}
  1499  
  1500  	require.ElementsMatch(t, []int{0, 1, 2}, positions)
  1501  }
  1502  
  1503  func TestAlertmanager_StateReplicationWithSharding(t *testing.T) {
  1504  	tc := []struct {
  1505  		name              string
  1506  		replicationFactor int
  1507  		instances         int
  1508  		withSharding      bool
  1509  	}{
  1510  		{
  1511  			name:              "sharding disabled (hence no replication factor),  1 instance",
  1512  			withSharding:      false,
  1513  			instances:         1,
  1514  			replicationFactor: 0,
  1515  		},
  1516  		{
  1517  			name:              "sharding enabled, RF = 2, 2 instances",
  1518  			withSharding:      true,
  1519  			instances:         2,
  1520  			replicationFactor: 2,
  1521  		},
  1522  		{
  1523  			name:              "sharding enabled, RF = 3, 10 instance",
  1524  			withSharding:      true,
  1525  			instances:         10,
  1526  			replicationFactor: 3,
  1527  		},
  1528  	}
  1529  
  1530  	for _, tt := range tc {
  1531  		t.Run(tt.name, func(t *testing.T) {
  1532  			ctx := context.Background()
  1533  			ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil)
  1534  			t.Cleanup(func() { assert.NoError(t, closer.Close()) })
  1535  
  1536  			mockStore := prepareInMemoryAlertStore()
  1537  			clientPool := newPassthroughAlertmanagerClientPool()
  1538  			externalURL := flagext.URLValue{}
  1539  			err := externalURL.Set("http://localhost:8080/alertmanager")
  1540  			require.NoError(t, err)
  1541  
  1542  			var instances []*MultitenantAlertmanager
  1543  			var instanceIDs []string
  1544  			registries := util.NewUserRegistries()
  1545  
  1546  			// First, add the number of configs to the store.
  1547  			for i := 1; i <= 12; i++ {
  1548  				u := fmt.Sprintf("u-%d", i)
  1549  				require.NoError(t, mockStore.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
  1550  					User:      u,
  1551  					RawConfig: simpleConfigOne,
  1552  					Templates: []*alertspb.TemplateDesc{},
  1553  				}))
  1554  			}
  1555  
  1556  			// Then, create the alertmanager instances, start them and add their registries to the slice.
  1557  			for i := 1; i <= tt.instances; i++ {
  1558  				instanceIDs = append(instanceIDs, fmt.Sprintf("alertmanager-%d", i))
  1559  				instanceID := fmt.Sprintf("alertmanager-%d", i)
  1560  
  1561  				amConfig := mockAlertmanagerConfig(t)
  1562  				amConfig.ExternalURL = externalURL
  1563  				amConfig.ShardingRing.ReplicationFactor = tt.replicationFactor
  1564  				amConfig.ShardingRing.InstanceID = instanceID
  1565  				amConfig.ShardingRing.InstanceAddr = fmt.Sprintf("127.0.0.%d", i)
  1566  
  1567  				// Do not check the ring topology changes or poll in an interval in this test (we explicitly sync alertmanagers).
  1568  				amConfig.PollInterval = time.Hour
  1569  				amConfig.ShardingRing.RingCheckPeriod = time.Hour
  1570  
  1571  				if tt.withSharding {
  1572  					amConfig.ShardingEnabled = true
  1573  				}
  1574  
  1575  				reg := prometheus.NewPedanticRegistry()
  1576  				am, err := createMultitenantAlertmanager(amConfig, nil, nil, mockStore, ringStore, nil, log.NewNopLogger(), reg)
  1577  				require.NoError(t, err)
  1578  				defer services.StopAndAwaitTerminated(ctx, am) //nolint:errcheck
  1579  
  1580  				if tt.withSharding {
  1581  					clientPool.setServer(amConfig.ShardingRing.InstanceAddr+":0", am)
  1582  					am.alertmanagerClientsPool = clientPool
  1583  				}
  1584  
  1585  				require.NoError(t, services.StartAndAwaitRunning(ctx, am))
  1586  
  1587  				instances = append(instances, am)
  1588  				instanceIDs = append(instanceIDs, instanceID)
  1589  				registries.AddUserRegistry(instanceID, reg)
  1590  			}
  1591  
  1592  			// If we're testing with sharding, we need make sure the ring is settled.
  1593  			if tt.withSharding {
  1594  				ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
  1595  				defer cancel()
  1596  
  1597  				// The alertmanager is ready to be tested once all instances are ACTIVE and the ring settles.
  1598  				for _, am := range instances {
  1599  					for _, id := range instanceIDs {
  1600  						require.NoError(t, ring.WaitInstanceState(ctx, am.ring, id, ring.ACTIVE))
  1601  					}
  1602  				}
  1603  			}
  1604  
  1605  			// Now that the ring has settled, sync configs with the instances.
  1606  			var numConfigs, numInstances int
  1607  			for _, am := range instances {
  1608  				err := am.loadAndSyncConfigs(ctx, reasonRingChange)
  1609  				require.NoError(t, err)
  1610  				numConfigs += len(am.cfgs)
  1611  				numInstances += len(am.alertmanagers)
  1612  			}
  1613  
  1614  			// With sharding enabled, we propagate messages over gRPC instead of using a gossip over TCP.
  1615  			// 1. First, get a random multitenant instance
  1616  			//    We must pick an instance which actually has a user configured.
  1617  			var multitenantAM *MultitenantAlertmanager
  1618  			for {
  1619  				multitenantAM = instances[rand.Intn(len(instances))]
  1620  
  1621  				multitenantAM.alertmanagersMtx.Lock()
  1622  				amount := len(multitenantAM.alertmanagers)
  1623  				multitenantAM.alertmanagersMtx.Unlock()
  1624  				if amount > 0 {
  1625  					break
  1626  				}
  1627  			}
  1628  
  1629  			// 2. Then, get a random user that exists in that particular alertmanager instance.
  1630  			multitenantAM.alertmanagersMtx.Lock()
  1631  			require.Greater(t, len(multitenantAM.alertmanagers), 0)
  1632  			k := rand.Intn(len(multitenantAM.alertmanagers))
  1633  			var userID string
  1634  			for u := range multitenantAM.alertmanagers {
  1635  				if k == 0 {
  1636  					userID = u
  1637  					break
  1638  				}
  1639  				k--
  1640  			}
  1641  			multitenantAM.alertmanagersMtx.Unlock()
  1642  
  1643  			// 3. Now that we have our alertmanager user, let's create a silence and make sure it is replicated.
  1644  			silence := types.Silence{
  1645  				Matchers: labels.Matchers{
  1646  					{Name: "instance", Value: "prometheus-one"},
  1647  				},
  1648  				Comment:  "Created for a test case.",
  1649  				StartsAt: time.Now(),
  1650  				EndsAt:   time.Now().Add(time.Hour),
  1651  			}
  1652  			data, err := json.Marshal(silence)
  1653  			require.NoError(t, err)
  1654  
  1655  			// 4. Create the silence in one of the alertmanagers
  1656  			req := httptest.NewRequest(http.MethodPost, externalURL.String()+"/api/v2/silences", bytes.NewReader(data))
  1657  			req.Header.Set("content-type", "application/json")
  1658  			reqCtx := user.InjectOrgID(req.Context(), userID)
  1659  			{
  1660  				w := httptest.NewRecorder()
  1661  				multitenantAM.serveRequest(w, req.WithContext(reqCtx))
  1662  
  1663  				resp := w.Result()
  1664  				body, _ := ioutil.ReadAll(resp.Body)
  1665  				assert.Equal(t, http.StatusOK, w.Code)
  1666  				require.Regexp(t, regexp.MustCompile(`{"silenceID":".+"}`), string(body))
  1667  			}
  1668  
  1669  			// If sharding is not enabled, we never propagate any messages amongst replicas in this way, and we can stop here.
  1670  			if !tt.withSharding {
  1671  				metrics := registries.BuildMetricFamiliesPerUser()
  1672  
  1673  				assert.Equal(t, float64(1), metrics.GetSumOfGauges("cortex_alertmanager_silences"))
  1674  				assert.Equal(t, float64(0), metrics.GetSumOfCounters("cortex_alertmanager_state_replication_total"))
  1675  				assert.Equal(t, float64(0), metrics.GetSumOfCounters("cortex_alertmanager_state_replication_failed_total"))
  1676  				return
  1677  			}
  1678  
  1679  			var metrics util.MetricFamiliesPerUser
  1680  
  1681  			// 5. Then, make sure it is propagated successfully.
  1682  			//    Replication is asynchronous, so we may have to wait a short period of time.
  1683  			assert.Eventually(t, func() bool {
  1684  				metrics = registries.BuildMetricFamiliesPerUser()
  1685  				return (float64(tt.replicationFactor) == metrics.GetSumOfGauges("cortex_alertmanager_silences") &&
  1686  					float64(tt.replicationFactor) == metrics.GetSumOfCounters("cortex_alertmanager_state_replication_total"))
  1687  			}, 5*time.Second, 100*time.Millisecond)
  1688  
  1689  			assert.Equal(t, float64(tt.replicationFactor), metrics.GetSumOfCounters("cortex_alertmanager_state_replication_total"))
  1690  			assert.Equal(t, float64(0), metrics.GetSumOfCounters("cortex_alertmanager_state_replication_failed_total"))
  1691  
  1692  			// 5b. Check the number of partial states merged are as we expect.
  1693  			// Partial states are currently replicated twice:
  1694  			//   For RF=1 1 -> 0      = Total 0 merges
  1695  			//   For RF=2 1 -> 1 -> 1 = Total 2 merges
  1696  			//   For RF=3 1 -> 2 -> 4 = Total 6 merges
  1697  			nFanOut := tt.replicationFactor - 1
  1698  			nMerges := nFanOut + (nFanOut * nFanOut)
  1699  
  1700  			assert.Eventually(t, func() bool {
  1701  				metrics = registries.BuildMetricFamiliesPerUser()
  1702  				return float64(nMerges) == metrics.GetSumOfCounters("cortex_alertmanager_partial_state_merges_total")
  1703  			}, 5*time.Second, 100*time.Millisecond)
  1704  
  1705  			assert.Equal(t, float64(0), metrics.GetSumOfCounters("cortex_alertmanager_partial_state_merges_failed_total"))
  1706  		})
  1707  	}
  1708  }
  1709  
  1710  func TestAlertmanager_StateReplicationWithSharding_InitialSyncFromPeers(t *testing.T) {
  1711  	tc := []struct {
  1712  		name              string
  1713  		replicationFactor int
  1714  	}{
  1715  		{
  1716  			name:              "RF = 2",
  1717  			replicationFactor: 2,
  1718  		},
  1719  		{
  1720  			name:              "RF = 3",
  1721  			replicationFactor: 3,
  1722  		},
  1723  	}
  1724  
  1725  	for _, tt := range tc {
  1726  		t.Run(tt.name, func(t *testing.T) {
  1727  			ctx := context.Background()
  1728  			ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil)
  1729  			t.Cleanup(func() { assert.NoError(t, closer.Close()) })
  1730  
  1731  			mockStore := prepareInMemoryAlertStore()
  1732  			clientPool := newPassthroughAlertmanagerClientPool()
  1733  			externalURL := flagext.URLValue{}
  1734  			err := externalURL.Set("http://localhost:8080/alertmanager")
  1735  			require.NoError(t, err)
  1736  
  1737  			var instances []*MultitenantAlertmanager
  1738  			var instanceIDs []string
  1739  			registries := util.NewUserRegistries()
  1740  
  1741  			// Create only two users - no need for more for these test cases.
  1742  			for i := 1; i <= 2; i++ {
  1743  				u := fmt.Sprintf("u-%d", i)
  1744  				require.NoError(t, mockStore.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
  1745  					User:      u,
  1746  					RawConfig: simpleConfigOne,
  1747  					Templates: []*alertspb.TemplateDesc{},
  1748  				}))
  1749  			}
  1750  
  1751  			createInstance := func(i int) *MultitenantAlertmanager {
  1752  				instanceIDs = append(instanceIDs, fmt.Sprintf("alertmanager-%d", i))
  1753  				instanceID := fmt.Sprintf("alertmanager-%d", i)
  1754  
  1755  				amConfig := mockAlertmanagerConfig(t)
  1756  				amConfig.ExternalURL = externalURL
  1757  				amConfig.ShardingRing.ReplicationFactor = tt.replicationFactor
  1758  				amConfig.ShardingRing.InstanceID = instanceID
  1759  				amConfig.ShardingRing.InstanceAddr = fmt.Sprintf("127.0.0.%d", i)
  1760  
  1761  				// Do not check the ring topology changes or poll in an interval in this test (we explicitly sync alertmanagers).
  1762  				amConfig.PollInterval = time.Hour
  1763  				amConfig.ShardingRing.RingCheckPeriod = time.Hour
  1764  
  1765  				amConfig.ShardingEnabled = true
  1766  
  1767  				reg := prometheus.NewPedanticRegistry()
  1768  				am, err := createMultitenantAlertmanager(amConfig, nil, nil, mockStore, ringStore, nil, log.NewNopLogger(), reg)
  1769  				require.NoError(t, err)
  1770  
  1771  				clientPool.setServer(amConfig.ShardingRing.InstanceAddr+":0", am)
  1772  				am.alertmanagerClientsPool = clientPool
  1773  
  1774  				require.NoError(t, services.StartAndAwaitRunning(ctx, am))
  1775  				t.Cleanup(func() {
  1776  					require.NoError(t, services.StopAndAwaitTerminated(ctx, am))
  1777  				})
  1778  
  1779  				instances = append(instances, am)
  1780  				instanceIDs = append(instanceIDs, instanceID)
  1781  				registries.AddUserRegistry(instanceID, reg)
  1782  
  1783  				// Make sure the ring is settled.
  1784  				{
  1785  					ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
  1786  					defer cancel()
  1787  
  1788  					// The alertmanager is ready to be tested once all instances are ACTIVE and the ring settles.
  1789  					for _, am := range instances {
  1790  						for _, id := range instanceIDs {
  1791  							require.NoError(t, ring.WaitInstanceState(ctx, am.ring, id, ring.ACTIVE))
  1792  						}
  1793  					}
  1794  				}
  1795  
  1796  				// Now that the ring has settled, sync configs with the instances.
  1797  				require.NoError(t, am.loadAndSyncConfigs(ctx, reasonRingChange))
  1798  
  1799  				return am
  1800  			}
  1801  
  1802  			writeSilence := func(i *MultitenantAlertmanager, userID string) {
  1803  				silence := types.Silence{
  1804  					Matchers: labels.Matchers{
  1805  						{Name: "instance", Value: "prometheus-one"},
  1806  					},
  1807  					Comment:  "Created for a test case.",
  1808  					StartsAt: time.Now(),
  1809  					EndsAt:   time.Now().Add(time.Hour),
  1810  				}
  1811  				data, err := json.Marshal(silence)
  1812  				require.NoError(t, err)
  1813  
  1814  				req := httptest.NewRequest(http.MethodPost, externalURL.String()+"/api/v2/silences", bytes.NewReader(data))
  1815  				req.Header.Set("content-type", "application/json")
  1816  				reqCtx := user.InjectOrgID(req.Context(), userID)
  1817  				{
  1818  					w := httptest.NewRecorder()
  1819  					i.serveRequest(w, req.WithContext(reqCtx))
  1820  
  1821  					resp := w.Result()
  1822  					body, _ := ioutil.ReadAll(resp.Body)
  1823  					assert.Equal(t, http.StatusOK, w.Code)
  1824  					require.Regexp(t, regexp.MustCompile(`{"silenceID":".+"}`), string(body))
  1825  				}
  1826  			}
  1827  
  1828  			checkSilence := func(i *MultitenantAlertmanager, userID string) {
  1829  				req := httptest.NewRequest(http.MethodGet, externalURL.String()+"/api/v2/silences", nil)
  1830  				req.Header.Set("content-type", "application/json")
  1831  				reqCtx := user.InjectOrgID(req.Context(), userID)
  1832  				{
  1833  					w := httptest.NewRecorder()
  1834  					i.serveRequest(w, req.WithContext(reqCtx))
  1835  
  1836  					resp := w.Result()
  1837  					body, _ := ioutil.ReadAll(resp.Body)
  1838  					assert.Equal(t, http.StatusOK, w.Code)
  1839  					require.Regexp(t, regexp.MustCompile(`"comment":"Created for a test case."`), string(body))
  1840  				}
  1841  			}
  1842  
  1843  			// 1. Create the first instance and load the user configurations.
  1844  			i1 := createInstance(1)
  1845  
  1846  			// 2. Create a silence in the first alertmanager instance and check we can read it.
  1847  			writeSilence(i1, "u-1")
  1848  			// 2.a. Check the silence was created (paranoia).
  1849  			checkSilence(i1, "u-1")
  1850  			// 2.b. Check the relevant metrics were updated.
  1851  			{
  1852  				metrics := registries.BuildMetricFamiliesPerUser()
  1853  				assert.Equal(t, float64(1), metrics.GetSumOfGauges("cortex_alertmanager_silences"))
  1854  			}
  1855  			// 2.c. Wait for the silence replication to be attempted; note this is asynchronous.
  1856  			{
  1857  				test.Poll(t, 5*time.Second, float64(1), func() interface{} {
  1858  					metrics := registries.BuildMetricFamiliesPerUser()
  1859  					return metrics.GetSumOfCounters("cortex_alertmanager_state_replication_total")
  1860  				})
  1861  				metrics := registries.BuildMetricFamiliesPerUser()
  1862  				assert.Equal(t, float64(0), metrics.GetSumOfCounters("cortex_alertmanager_state_replication_failed_total"))
  1863  			}
  1864  
  1865  			// 3. Create a second instance. This should attempt to fetch the silence from the first.
  1866  			i2 := createInstance(2)
  1867  
  1868  			// 3.a. Check the silence was fetched from the first instance successfully.
  1869  			checkSilence(i2, "u-1")
  1870  
  1871  			// 3.b. Check the metrics: We should see the additional silences without any replication activity.
  1872  			{
  1873  				metrics := registries.BuildMetricFamiliesPerUser()
  1874  				assert.Equal(t, float64(2), metrics.GetSumOfGauges("cortex_alertmanager_silences"))
  1875  				assert.Equal(t, float64(1), metrics.GetSumOfCounters("cortex_alertmanager_state_replication_total"))
  1876  				assert.Equal(t, float64(0), metrics.GetSumOfCounters("cortex_alertmanager_state_replication_failed_total"))
  1877  			}
  1878  
  1879  			if tt.replicationFactor >= 3 {
  1880  				// 4. When testing RF = 3, create a third instance, to test obtaining state from multiple places.
  1881  				i3 := createInstance(3)
  1882  
  1883  				// 4.a. Check the silence was fetched one or both of the instances successfully.
  1884  				checkSilence(i3, "u-1")
  1885  
  1886  				// 4.b. Check the metrics one more time. We should have three replicas of the silence.
  1887  				{
  1888  					metrics := registries.BuildMetricFamiliesPerUser()
  1889  					assert.Equal(t, float64(3), metrics.GetSumOfGauges("cortex_alertmanager_silences"))
  1890  					assert.Equal(t, float64(1), metrics.GetSumOfCounters("cortex_alertmanager_state_replication_total"))
  1891  					assert.Equal(t, float64(0), metrics.GetSumOfCounters("cortex_alertmanager_state_replication_failed_total"))
  1892  				}
  1893  			}
  1894  		})
  1895  	}
  1896  }
  1897  
  1898  // prepareInMemoryAlertStore builds and returns an in-memory alert store.
  1899  func prepareInMemoryAlertStore() alertstore.AlertStore {
  1900  	return bucketclient.NewBucketAlertStore(objstore.NewInMemBucket(), nil, log.NewNopLogger())
  1901  }
  1902  
  1903  func TestSafeTemplateFilepath(t *testing.T) {
  1904  	tests := map[string]struct {
  1905  		dir          string
  1906  		template     string
  1907  		expectedPath string
  1908  		expectedErr  error
  1909  	}{
  1910  		"should succeed if the provided template is a filename": {
  1911  			dir:          "/data/tenant",
  1912  			template:     "test.tmpl",
  1913  			expectedPath: "/data/tenant/test.tmpl",
  1914  		},
  1915  		"should fail if the provided template is escaping the dir": {
  1916  			dir:         "/data/tenant",
  1917  			template:    "../test.tmpl",
  1918  			expectedErr: errors.New(`invalid template name "../test.tmpl": the template filepath is escaping the per-tenant local directory`),
  1919  		},
  1920  	}
  1921  
  1922  	for testName, testData := range tests {
  1923  		t.Run(testName, func(t *testing.T) {
  1924  			actualPath, actualErr := safeTemplateFilepath(testData.dir, testData.template)
  1925  			assert.Equal(t, testData.expectedErr, actualErr)
  1926  			assert.Equal(t, testData.expectedPath, actualPath)
  1927  		})
  1928  	}
  1929  }
  1930  
  1931  func TestStoreTemplateFile(t *testing.T) {
  1932  	tempDir, err := ioutil.TempDir(os.TempDir(), "alertmanager")
  1933  	require.NoError(t, err)
  1934  
  1935  	t.Cleanup(func() {
  1936  		require.NoError(t, os.RemoveAll(tempDir))
  1937  	})
  1938  
  1939  	testTemplateDir := filepath.Join(tempDir, templatesDir)
  1940  
  1941  	changed, err := storeTemplateFile(filepath.Join(testTemplateDir, "some-template"), "content")
  1942  	require.NoError(t, err)
  1943  	require.True(t, changed)
  1944  
  1945  	changed, err = storeTemplateFile(filepath.Join(testTemplateDir, "some-template"), "new content")
  1946  	require.NoError(t, err)
  1947  	require.True(t, changed)
  1948  
  1949  	changed, err = storeTemplateFile(filepath.Join(testTemplateDir, "some-template"), "new content") // reusing previous content
  1950  	require.NoError(t, err)
  1951  	require.False(t, changed)
  1952  }
  1953  
  1954  func TestMultitenantAlertmanager_verifyRateLimitedEmailConfig(t *testing.T) {
  1955  	ctx := context.Background()
  1956  
  1957  	config := `global:
  1958    resolve_timeout: 1m
  1959    smtp_require_tls: false
  1960  
  1961  route:
  1962    receiver: 'email'
  1963  
  1964  receivers:
  1965  - name: 'email'
  1966    email_configs:
  1967    - to: test@example.com
  1968      from: test@example.com
  1969      smarthost: smtp:2525
  1970  `
  1971  
  1972  	// Run this test using a real storage client.
  1973  	store := prepareInMemoryAlertStore()
  1974  	require.NoError(t, store.SetAlertConfig(ctx, alertspb.AlertConfigDesc{
  1975  		User:      "user",
  1976  		RawConfig: config,
  1977  		Templates: []*alertspb.TemplateDesc{},
  1978  	}))
  1979  
  1980  	limits := mockAlertManagerLimits{
  1981  		emailNotificationRateLimit: 0,
  1982  		emailNotificationBurst:     0,
  1983  	}
  1984  
  1985  	reg := prometheus.NewPedanticRegistry()
  1986  	cfg := mockAlertmanagerConfig(t)
  1987  	am, err := createMultitenantAlertmanager(cfg, nil, nil, store, nil, &limits, log.NewNopLogger(), reg)
  1988  	require.NoError(t, err)
  1989  
  1990  	err = am.loadAndSyncConfigs(context.Background(), reasonPeriodic)
  1991  	require.NoError(t, err)
  1992  	require.Len(t, am.alertmanagers, 1)
  1993  
  1994  	am.alertmanagersMtx.Lock()
  1995  	uam := am.alertmanagers["user"]
  1996  	am.alertmanagersMtx.Unlock()
  1997  
  1998  	require.NotNil(t, uam)
  1999  
  2000  	ctx = notify.WithReceiverName(ctx, "email")
  2001  	ctx = notify.WithGroupKey(ctx, "key")
  2002  	ctx = notify.WithRepeatInterval(ctx, time.Minute)
  2003  
  2004  	// Verify that rate-limiter is in place for email notifier.
  2005  	_, _, err = uam.lastPipeline.Exec(ctx, log.NewNopLogger(), &types.Alert{})
  2006  	require.NotNil(t, err)
  2007  	require.Contains(t, err.Error(), errRateLimited.Error())
  2008  }
  2009  
  2010  type passthroughAlertmanagerClient struct {
  2011  	server alertmanagerpb.AlertmanagerServer
  2012  }
  2013  
  2014  func (am *passthroughAlertmanagerClient) UpdateState(ctx context.Context, in *clusterpb.Part, opts ...grpc.CallOption) (*alertmanagerpb.UpdateStateResponse, error) {
  2015  	return am.server.UpdateState(ctx, in)
  2016  }
  2017  
  2018  func (am *passthroughAlertmanagerClient) ReadState(ctx context.Context, in *alertmanagerpb.ReadStateRequest, opts ...grpc.CallOption) (*alertmanagerpb.ReadStateResponse, error) {
  2019  	return am.server.ReadState(ctx, in)
  2020  }
  2021  
  2022  func (am *passthroughAlertmanagerClient) HandleRequest(context.Context, *httpgrpc.HTTPRequest, ...grpc.CallOption) (*httpgrpc.HTTPResponse, error) {
  2023  	return nil, fmt.Errorf("unexpected call to HandleRequest")
  2024  }
  2025  
  2026  func (am *passthroughAlertmanagerClient) RemoteAddress() string {
  2027  	return ""
  2028  }
  2029  
  2030  // passthroughAlertmanagerClientPool allows testing the logic of gRPC calls between alertmanager instances
  2031  // by invoking client calls directly to a peer instance in the unit test, without the server running.
  2032  type passthroughAlertmanagerClientPool struct {
  2033  	serversMtx sync.Mutex
  2034  	servers    map[string]alertmanagerpb.AlertmanagerServer
  2035  }
  2036  
  2037  func newPassthroughAlertmanagerClientPool() *passthroughAlertmanagerClientPool {
  2038  	return &passthroughAlertmanagerClientPool{
  2039  		servers: make(map[string]alertmanagerpb.AlertmanagerServer),
  2040  	}
  2041  }
  2042  
  2043  func (f *passthroughAlertmanagerClientPool) setServer(addr string, server alertmanagerpb.AlertmanagerServer) {
  2044  	f.serversMtx.Lock()
  2045  	defer f.serversMtx.Unlock()
  2046  	f.servers[addr] = server
  2047  }
  2048  
  2049  func (f *passthroughAlertmanagerClientPool) GetClientFor(addr string) (Client, error) {
  2050  	f.serversMtx.Lock()
  2051  	defer f.serversMtx.Unlock()
  2052  	s, ok := f.servers[addr]
  2053  	if !ok {
  2054  		return nil, fmt.Errorf("client not found for address: %v", addr)
  2055  	}
  2056  	return Client(&passthroughAlertmanagerClient{s}), nil
  2057  }
  2058  
  2059  type mockAlertManagerLimits struct {
  2060  	emailNotificationRateLimit     rate.Limit
  2061  	emailNotificationBurst         int
  2062  	maxConfigSize                  int
  2063  	maxTemplatesCount              int
  2064  	maxSizeOfTemplate              int
  2065  	maxDispatcherAggregationGroups int
  2066  	maxAlertsCount                 int
  2067  	maxAlertsSizeBytes             int
  2068  }
  2069  
  2070  func (m *mockAlertManagerLimits) AlertmanagerMaxConfigSize(tenant string) int {
  2071  	return m.maxConfigSize
  2072  }
  2073  
  2074  func (m *mockAlertManagerLimits) AlertmanagerMaxTemplatesCount(tenant string) int {
  2075  	return m.maxTemplatesCount
  2076  }
  2077  
  2078  func (m *mockAlertManagerLimits) AlertmanagerMaxTemplateSize(tenant string) int {
  2079  	return m.maxSizeOfTemplate
  2080  }
  2081  
  2082  func (m *mockAlertManagerLimits) AlertmanagerReceiversBlockCIDRNetworks(user string) []flagext.CIDR {
  2083  	panic("implement me")
  2084  }
  2085  
  2086  func (m *mockAlertManagerLimits) AlertmanagerReceiversBlockPrivateAddresses(user string) bool {
  2087  	panic("implement me")
  2088  }
  2089  
  2090  func (m *mockAlertManagerLimits) NotificationRateLimit(_ string, integration string) rate.Limit {
  2091  	return m.emailNotificationRateLimit
  2092  }
  2093  
  2094  func (m *mockAlertManagerLimits) NotificationBurstSize(_ string, integration string) int {
  2095  	return m.emailNotificationBurst
  2096  }
  2097  
  2098  func (m *mockAlertManagerLimits) AlertmanagerMaxDispatcherAggregationGroups(_ string) int {
  2099  	return m.maxDispatcherAggregationGroups
  2100  }
  2101  
  2102  func (m *mockAlertManagerLimits) AlertmanagerMaxAlertsCount(_ string) int {
  2103  	return m.maxAlertsCount
  2104  }
  2105  
  2106  func (m *mockAlertManagerLimits) AlertmanagerMaxAlertsSizeBytes(_ string) int {
  2107  	return m.maxAlertsSizeBytes
  2108  }