github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/alertmanager/alertmanager_test.go (about)

     1  package alertmanager
     2  
     3  import (
     4  	"fmt"
     5  	"net/url"
     6  	"strings"
     7  	"testing"
     8  	"time"
     9  
    10  	"github.com/go-kit/log"
    11  	"github.com/prometheus/alertmanager/config"
    12  	"github.com/prometheus/alertmanager/types"
    13  	"github.com/prometheus/client_golang/prometheus"
    14  	"github.com/prometheus/client_golang/prometheus/testutil"
    15  	"github.com/prometheus/common/model"
    16  	"github.com/stretchr/testify/assert"
    17  	"github.com/stretchr/testify/require"
    18  
    19  	"github.com/cortexproject/cortex/pkg/util/test"
    20  )
    21  
    22  func TestDispatcherGroupLimits(t *testing.T) {
    23  	for name, tc := range map[string]struct {
    24  		groups           int
    25  		groupsLimit      int
    26  		expectedFailures int
    27  	}{
    28  		"no limit":   {groups: 5, groupsLimit: 0, expectedFailures: 0},
    29  		"high limit": {groups: 5, groupsLimit: 10, expectedFailures: 0},
    30  		"low limit":  {groups: 5, groupsLimit: 3, expectedFailures: 4}, // 2 groups that fail, 2 alerts per group = 4 failures
    31  	} {
    32  		t.Run(name, func(t *testing.T) {
    33  			createAlertmanagerAndSendAlerts(t, tc.groups, tc.groupsLimit, tc.expectedFailures)
    34  		})
    35  	}
    36  }
    37  
    38  func createAlertmanagerAndSendAlerts(t *testing.T, alertGroups, groupsLimit, expectedFailures int) {
    39  	user := "test"
    40  
    41  	reg := prometheus.NewPedanticRegistry()
    42  	am, err := New(&Config{
    43  		UserID:          user,
    44  		Logger:          log.NewNopLogger(),
    45  		Limits:          &mockAlertManagerLimits{maxDispatcherAggregationGroups: groupsLimit},
    46  		TenantDataDir:   t.TempDir(),
    47  		ExternalURL:     &url.URL{Path: "/am"},
    48  		ShardingEnabled: false,
    49  	}, reg)
    50  	require.NoError(t, err)
    51  	defer am.StopAndWait()
    52  
    53  	cfgRaw := `receivers:
    54  - name: 'prod'
    55  
    56  route:
    57    group_by: ['alertname']
    58    group_wait: 10ms
    59    group_interval: 10ms
    60    receiver: 'prod'`
    61  
    62  	cfg, err := config.Load(cfgRaw)
    63  	require.NoError(t, err)
    64  	require.NoError(t, am.ApplyConfig(user, cfg, cfgRaw))
    65  
    66  	now := time.Now()
    67  
    68  	for i := 0; i < alertGroups; i++ {
    69  		alertName := model.LabelValue(fmt.Sprintf("Alert-%d", i))
    70  
    71  		inputAlerts := []*types.Alert{
    72  			{
    73  				Alert: model.Alert{
    74  					Labels: model.LabelSet{
    75  						"alertname": alertName,
    76  						"a":         "b",
    77  					},
    78  					Annotations:  model.LabelSet{"foo": "bar"},
    79  					StartsAt:     now,
    80  					EndsAt:       now.Add(5 * time.Minute),
    81  					GeneratorURL: "http://example.com/prometheus",
    82  				},
    83  				UpdatedAt: now,
    84  				Timeout:   false,
    85  			},
    86  
    87  			{
    88  				Alert: model.Alert{
    89  					Labels: model.LabelSet{
    90  						"alertname": alertName,
    91  						"z":         "y",
    92  					},
    93  					Annotations:  model.LabelSet{"foo": "bar"},
    94  					StartsAt:     now,
    95  					EndsAt:       now.Add(5 * time.Minute),
    96  					GeneratorURL: "http://example.com/prometheus",
    97  				},
    98  				UpdatedAt: now,
    99  				Timeout:   false,
   100  			},
   101  		}
   102  		require.NoError(t, am.alerts.Put(inputAlerts...))
   103  	}
   104  
   105  	// Give it some time, as alerts are sent to dispatcher asynchronously.
   106  	test.Poll(t, 3*time.Second, nil, func() interface{} {
   107  		return testutil.GatherAndCompare(reg, strings.NewReader(fmt.Sprintf(`
   108  		# HELP alertmanager_dispatcher_aggregation_group_limit_reached_total Number of times when dispatcher failed to create new aggregation group due to limit.
   109  		# TYPE alertmanager_dispatcher_aggregation_group_limit_reached_total counter
   110  		alertmanager_dispatcher_aggregation_group_limit_reached_total %d
   111  	`, expectedFailures)), "alertmanager_dispatcher_aggregation_group_limit_reached_total")
   112  	})
   113  }
   114  
   115  var (
   116  	alert1 = model.Alert{
   117  		Labels:       model.LabelSet{"alert": "first"},
   118  		Annotations:  model.LabelSet{"job": "test"},
   119  		StartsAt:     time.Now(),
   120  		EndsAt:       time.Now(),
   121  		GeneratorURL: "some URL",
   122  	}
   123  	alert1Size = alertSize(alert1)
   124  
   125  	alert2 = model.Alert{
   126  		Labels:       model.LabelSet{"alert": "second"},
   127  		Annotations:  model.LabelSet{"job": "test", "cluster": "prod"},
   128  		StartsAt:     time.Now(),
   129  		EndsAt:       time.Now(),
   130  		GeneratorURL: "some URL",
   131  	}
   132  	alert2Size = alertSize(alert2)
   133  )
   134  
   135  type callbackOp struct {
   136  	alert               *types.Alert
   137  	existing            bool
   138  	delete              bool // true=delete, false=insert.
   139  	expectedInsertError error
   140  
   141  	// expected values after operation.
   142  	expectedCount     int
   143  	expectedTotalSize int
   144  }
   145  
   146  func TestAlertsLimiterWithNoLimits(t *testing.T) {
   147  	ops := []callbackOp{
   148  		{alert: &types.Alert{Alert: alert1}, existing: false, expectedCount: 1, expectedTotalSize: alert1Size},
   149  		{alert: &types.Alert{Alert: alert2}, existing: false, expectedCount: 2, expectedTotalSize: alert1Size + alert2Size},
   150  		{alert: &types.Alert{Alert: alert2}, delete: true, expectedCount: 1, expectedTotalSize: alert1Size},
   151  		{alert: &types.Alert{Alert: alert1}, delete: true, expectedCount: 0, expectedTotalSize: 0},
   152  	}
   153  
   154  	testLimiter(t, &mockAlertManagerLimits{}, ops)
   155  }
   156  
   157  func TestAlertsLimiterWithCountLimit(t *testing.T) {
   158  	alert2WithMoreAnnotations := alert2
   159  	alert2WithMoreAnnotations.Annotations = model.LabelSet{"job": "test", "cluster": "prod", "new": "super-long-annotation"}
   160  	alert2WithMoreAnnotationsSize := alertSize(alert2WithMoreAnnotations)
   161  
   162  	ops := []callbackOp{
   163  		{alert: &types.Alert{Alert: alert1}, existing: false, expectedCount: 1, expectedTotalSize: alert1Size},
   164  		{alert: &types.Alert{Alert: alert2}, existing: false, expectedInsertError: fmt.Errorf(errTooManyAlerts, 1), expectedCount: 1, expectedTotalSize: alert1Size},
   165  		{alert: &types.Alert{Alert: alert1}, delete: true, expectedCount: 0, expectedTotalSize: 0},
   166  
   167  		{alert: &types.Alert{Alert: alert2}, existing: false, expectedCount: 1, expectedTotalSize: alert2Size},
   168  		// Update of existing alert works -- doesn't change count.
   169  		{alert: &types.Alert{Alert: alert2WithMoreAnnotations}, existing: true, expectedCount: 1, expectedTotalSize: alert2WithMoreAnnotationsSize},
   170  		{alert: &types.Alert{Alert: alert2}, delete: true, expectedCount: 0, expectedTotalSize: 0},
   171  	}
   172  
   173  	testLimiter(t, &mockAlertManagerLimits{maxAlertsCount: 1}, ops)
   174  }
   175  
   176  func TestAlertsLimiterWithSizeLimit(t *testing.T) {
   177  	alert2WithMoreAnnotations := alert2
   178  	alert2WithMoreAnnotations.Annotations = model.LabelSet{"job": "test", "cluster": "prod", "new": "super-long-annotation"}
   179  
   180  	ops := []callbackOp{
   181  		{alert: &types.Alert{Alert: alert1}, existing: false, expectedCount: 1, expectedTotalSize: alert1Size},
   182  		{alert: &types.Alert{Alert: alert2}, existing: false, expectedInsertError: fmt.Errorf(errAlertsTooBig, alert2Size), expectedCount: 1, expectedTotalSize: alert1Size},
   183  		{alert: &types.Alert{Alert: alert2WithMoreAnnotations}, existing: false, expectedInsertError: fmt.Errorf(errAlertsTooBig, alert2Size), expectedCount: 1, expectedTotalSize: alert1Size},
   184  		{alert: &types.Alert{Alert: alert1}, delete: true, expectedCount: 0, expectedTotalSize: 0},
   185  
   186  		{alert: &types.Alert{Alert: alert2}, existing: false, expectedCount: 1, expectedTotalSize: alert2Size},
   187  		{alert: &types.Alert{Alert: alert2}, delete: true, expectedCount: 0, expectedTotalSize: 0},
   188  	}
   189  
   190  	// Prerequisite for this test. We set size limit to alert2Size, but inserting alert1 first will prevent insertion of alert2.
   191  	require.True(t, alert2Size > alert1Size)
   192  
   193  	testLimiter(t, &mockAlertManagerLimits{maxAlertsSizeBytes: alert2Size}, ops)
   194  }
   195  
   196  func TestAlertsLimiterWithSizeLimitAndAnnotationUpdate(t *testing.T) {
   197  	alert2WithMoreAnnotations := alert2
   198  	alert2WithMoreAnnotations.Annotations = model.LabelSet{"job": "test", "cluster": "prod", "new": "super-long-annotation"}
   199  	alert2WithMoreAnnotationsSize := alertSize(alert2WithMoreAnnotations)
   200  
   201  	// Updating alert with larger annotation that goes over the size limit fails.
   202  	testLimiter(t, &mockAlertManagerLimits{maxAlertsSizeBytes: alert2Size}, []callbackOp{
   203  		{alert: &types.Alert{Alert: alert2}, existing: false, expectedCount: 1, expectedTotalSize: alert2Size},
   204  		{alert: &types.Alert{Alert: alert2WithMoreAnnotations}, existing: true, expectedInsertError: fmt.Errorf(errAlertsTooBig, alert2Size), expectedCount: 1, expectedTotalSize: alert2Size},
   205  	})
   206  
   207  	// Updating alert with larger annotations in the limit works fine.
   208  	testLimiter(t, &mockAlertManagerLimits{maxAlertsSizeBytes: alert2WithMoreAnnotationsSize}, []callbackOp{
   209  		{alert: &types.Alert{Alert: alert2}, existing: false, expectedCount: 1, expectedTotalSize: alert2Size},
   210  		{alert: &types.Alert{Alert: alert2WithMoreAnnotations}, existing: true, expectedCount: 1, expectedTotalSize: alert2WithMoreAnnotationsSize},
   211  		{alert: &types.Alert{Alert: alert2}, existing: true, expectedCount: 1, expectedTotalSize: alert2Size},
   212  	})
   213  }
   214  
   215  // testLimiter sends sequence of alerts to limiter, and checks if limiter updated reacted correctly.
   216  func testLimiter(t *testing.T, limits Limits, ops []callbackOp) {
   217  	reg := prometheus.NewPedanticRegistry()
   218  
   219  	limiter := newAlertsLimiter("test", limits, reg)
   220  
   221  	for ix, op := range ops {
   222  		if op.delete {
   223  			limiter.PostDelete(op.alert)
   224  		} else {
   225  			err := limiter.PreStore(op.alert, op.existing)
   226  			require.Equal(t, op.expectedInsertError, err, "op %d", ix)
   227  			if err == nil {
   228  				limiter.PostStore(op.alert, op.existing)
   229  			}
   230  		}
   231  
   232  		count, totalSize := limiter.currentStats()
   233  
   234  		assert.Equal(t, op.expectedCount, count, "wrong count, op %d", ix)
   235  		assert.Equal(t, op.expectedTotalSize, totalSize, "wrong total size, op %d", ix)
   236  	}
   237  }