github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/alertmanager/alertmanager_test.go (about) 1 package alertmanager 2 3 import ( 4 "fmt" 5 "net/url" 6 "strings" 7 "testing" 8 "time" 9 10 "github.com/go-kit/log" 11 "github.com/prometheus/alertmanager/config" 12 "github.com/prometheus/alertmanager/types" 13 "github.com/prometheus/client_golang/prometheus" 14 "github.com/prometheus/client_golang/prometheus/testutil" 15 "github.com/prometheus/common/model" 16 "github.com/stretchr/testify/assert" 17 "github.com/stretchr/testify/require" 18 19 "github.com/cortexproject/cortex/pkg/util/test" 20 ) 21 22 func TestDispatcherGroupLimits(t *testing.T) { 23 for name, tc := range map[string]struct { 24 groups int 25 groupsLimit int 26 expectedFailures int 27 }{ 28 "no limit": {groups: 5, groupsLimit: 0, expectedFailures: 0}, 29 "high limit": {groups: 5, groupsLimit: 10, expectedFailures: 0}, 30 "low limit": {groups: 5, groupsLimit: 3, expectedFailures: 4}, // 2 groups that fail, 2 alerts per group = 4 failures 31 } { 32 t.Run(name, func(t *testing.T) { 33 createAlertmanagerAndSendAlerts(t, tc.groups, tc.groupsLimit, tc.expectedFailures) 34 }) 35 } 36 } 37 38 func createAlertmanagerAndSendAlerts(t *testing.T, alertGroups, groupsLimit, expectedFailures int) { 39 user := "test" 40 41 reg := prometheus.NewPedanticRegistry() 42 am, err := New(&Config{ 43 UserID: user, 44 Logger: log.NewNopLogger(), 45 Limits: &mockAlertManagerLimits{maxDispatcherAggregationGroups: groupsLimit}, 46 TenantDataDir: t.TempDir(), 47 ExternalURL: &url.URL{Path: "/am"}, 48 ShardingEnabled: false, 49 }, reg) 50 require.NoError(t, err) 51 defer am.StopAndWait() 52 53 cfgRaw := `receivers: 54 - name: 'prod' 55 56 route: 57 group_by: ['alertname'] 58 group_wait: 10ms 59 group_interval: 10ms 60 receiver: 'prod'` 61 62 cfg, err := config.Load(cfgRaw) 63 require.NoError(t, err) 64 require.NoError(t, am.ApplyConfig(user, cfg, cfgRaw)) 65 66 now := time.Now() 67 68 for i := 0; i < alertGroups; i++ { 69 alertName := model.LabelValue(fmt.Sprintf("Alert-%d", i)) 70 71 inputAlerts := []*types.Alert{ 72 { 73 Alert: model.Alert{ 74 Labels: model.LabelSet{ 75 "alertname": alertName, 76 "a": "b", 77 }, 78 Annotations: model.LabelSet{"foo": "bar"}, 79 StartsAt: now, 80 EndsAt: now.Add(5 * time.Minute), 81 GeneratorURL: "http://example.com/prometheus", 82 }, 83 UpdatedAt: now, 84 Timeout: false, 85 }, 86 87 { 88 Alert: model.Alert{ 89 Labels: model.LabelSet{ 90 "alertname": alertName, 91 "z": "y", 92 }, 93 Annotations: model.LabelSet{"foo": "bar"}, 94 StartsAt: now, 95 EndsAt: now.Add(5 * time.Minute), 96 GeneratorURL: "http://example.com/prometheus", 97 }, 98 UpdatedAt: now, 99 Timeout: false, 100 }, 101 } 102 require.NoError(t, am.alerts.Put(inputAlerts...)) 103 } 104 105 // Give it some time, as alerts are sent to dispatcher asynchronously. 106 test.Poll(t, 3*time.Second, nil, func() interface{} { 107 return testutil.GatherAndCompare(reg, strings.NewReader(fmt.Sprintf(` 108 # HELP alertmanager_dispatcher_aggregation_group_limit_reached_total Number of times when dispatcher failed to create new aggregation group due to limit. 109 # TYPE alertmanager_dispatcher_aggregation_group_limit_reached_total counter 110 alertmanager_dispatcher_aggregation_group_limit_reached_total %d 111 `, expectedFailures)), "alertmanager_dispatcher_aggregation_group_limit_reached_total") 112 }) 113 } 114 115 var ( 116 alert1 = model.Alert{ 117 Labels: model.LabelSet{"alert": "first"}, 118 Annotations: model.LabelSet{"job": "test"}, 119 StartsAt: time.Now(), 120 EndsAt: time.Now(), 121 GeneratorURL: "some URL", 122 } 123 alert1Size = alertSize(alert1) 124 125 alert2 = model.Alert{ 126 Labels: model.LabelSet{"alert": "second"}, 127 Annotations: model.LabelSet{"job": "test", "cluster": "prod"}, 128 StartsAt: time.Now(), 129 EndsAt: time.Now(), 130 GeneratorURL: "some URL", 131 } 132 alert2Size = alertSize(alert2) 133 ) 134 135 type callbackOp struct { 136 alert *types.Alert 137 existing bool 138 delete bool // true=delete, false=insert. 139 expectedInsertError error 140 141 // expected values after operation. 142 expectedCount int 143 expectedTotalSize int 144 } 145 146 func TestAlertsLimiterWithNoLimits(t *testing.T) { 147 ops := []callbackOp{ 148 {alert: &types.Alert{Alert: alert1}, existing: false, expectedCount: 1, expectedTotalSize: alert1Size}, 149 {alert: &types.Alert{Alert: alert2}, existing: false, expectedCount: 2, expectedTotalSize: alert1Size + alert2Size}, 150 {alert: &types.Alert{Alert: alert2}, delete: true, expectedCount: 1, expectedTotalSize: alert1Size}, 151 {alert: &types.Alert{Alert: alert1}, delete: true, expectedCount: 0, expectedTotalSize: 0}, 152 } 153 154 testLimiter(t, &mockAlertManagerLimits{}, ops) 155 } 156 157 func TestAlertsLimiterWithCountLimit(t *testing.T) { 158 alert2WithMoreAnnotations := alert2 159 alert2WithMoreAnnotations.Annotations = model.LabelSet{"job": "test", "cluster": "prod", "new": "super-long-annotation"} 160 alert2WithMoreAnnotationsSize := alertSize(alert2WithMoreAnnotations) 161 162 ops := []callbackOp{ 163 {alert: &types.Alert{Alert: alert1}, existing: false, expectedCount: 1, expectedTotalSize: alert1Size}, 164 {alert: &types.Alert{Alert: alert2}, existing: false, expectedInsertError: fmt.Errorf(errTooManyAlerts, 1), expectedCount: 1, expectedTotalSize: alert1Size}, 165 {alert: &types.Alert{Alert: alert1}, delete: true, expectedCount: 0, expectedTotalSize: 0}, 166 167 {alert: &types.Alert{Alert: alert2}, existing: false, expectedCount: 1, expectedTotalSize: alert2Size}, 168 // Update of existing alert works -- doesn't change count. 169 {alert: &types.Alert{Alert: alert2WithMoreAnnotations}, existing: true, expectedCount: 1, expectedTotalSize: alert2WithMoreAnnotationsSize}, 170 {alert: &types.Alert{Alert: alert2}, delete: true, expectedCount: 0, expectedTotalSize: 0}, 171 } 172 173 testLimiter(t, &mockAlertManagerLimits{maxAlertsCount: 1}, ops) 174 } 175 176 func TestAlertsLimiterWithSizeLimit(t *testing.T) { 177 alert2WithMoreAnnotations := alert2 178 alert2WithMoreAnnotations.Annotations = model.LabelSet{"job": "test", "cluster": "prod", "new": "super-long-annotation"} 179 180 ops := []callbackOp{ 181 {alert: &types.Alert{Alert: alert1}, existing: false, expectedCount: 1, expectedTotalSize: alert1Size}, 182 {alert: &types.Alert{Alert: alert2}, existing: false, expectedInsertError: fmt.Errorf(errAlertsTooBig, alert2Size), expectedCount: 1, expectedTotalSize: alert1Size}, 183 {alert: &types.Alert{Alert: alert2WithMoreAnnotations}, existing: false, expectedInsertError: fmt.Errorf(errAlertsTooBig, alert2Size), expectedCount: 1, expectedTotalSize: alert1Size}, 184 {alert: &types.Alert{Alert: alert1}, delete: true, expectedCount: 0, expectedTotalSize: 0}, 185 186 {alert: &types.Alert{Alert: alert2}, existing: false, expectedCount: 1, expectedTotalSize: alert2Size}, 187 {alert: &types.Alert{Alert: alert2}, delete: true, expectedCount: 0, expectedTotalSize: 0}, 188 } 189 190 // Prerequisite for this test. We set size limit to alert2Size, but inserting alert1 first will prevent insertion of alert2. 191 require.True(t, alert2Size > alert1Size) 192 193 testLimiter(t, &mockAlertManagerLimits{maxAlertsSizeBytes: alert2Size}, ops) 194 } 195 196 func TestAlertsLimiterWithSizeLimitAndAnnotationUpdate(t *testing.T) { 197 alert2WithMoreAnnotations := alert2 198 alert2WithMoreAnnotations.Annotations = model.LabelSet{"job": "test", "cluster": "prod", "new": "super-long-annotation"} 199 alert2WithMoreAnnotationsSize := alertSize(alert2WithMoreAnnotations) 200 201 // Updating alert with larger annotation that goes over the size limit fails. 202 testLimiter(t, &mockAlertManagerLimits{maxAlertsSizeBytes: alert2Size}, []callbackOp{ 203 {alert: &types.Alert{Alert: alert2}, existing: false, expectedCount: 1, expectedTotalSize: alert2Size}, 204 {alert: &types.Alert{Alert: alert2WithMoreAnnotations}, existing: true, expectedInsertError: fmt.Errorf(errAlertsTooBig, alert2Size), expectedCount: 1, expectedTotalSize: alert2Size}, 205 }) 206 207 // Updating alert with larger annotations in the limit works fine. 208 testLimiter(t, &mockAlertManagerLimits{maxAlertsSizeBytes: alert2WithMoreAnnotationsSize}, []callbackOp{ 209 {alert: &types.Alert{Alert: alert2}, existing: false, expectedCount: 1, expectedTotalSize: alert2Size}, 210 {alert: &types.Alert{Alert: alert2WithMoreAnnotations}, existing: true, expectedCount: 1, expectedTotalSize: alert2WithMoreAnnotationsSize}, 211 {alert: &types.Alert{Alert: alert2}, existing: true, expectedCount: 1, expectedTotalSize: alert2Size}, 212 }) 213 } 214 215 // testLimiter sends sequence of alerts to limiter, and checks if limiter updated reacted correctly. 216 func testLimiter(t *testing.T, limits Limits, ops []callbackOp) { 217 reg := prometheus.NewPedanticRegistry() 218 219 limiter := newAlertsLimiter("test", limits, reg) 220 221 for ix, op := range ops { 222 if op.delete { 223 limiter.PostDelete(op.alert) 224 } else { 225 err := limiter.PreStore(op.alert, op.existing) 226 require.Equal(t, op.expectedInsertError, err, "op %d", ix) 227 if err == nil { 228 limiter.PostStore(op.alert, op.existing) 229 } 230 } 231 232 count, totalSize := limiter.currentStats() 233 234 assert.Equal(t, op.expectedCount, count, "wrong count, op %d", ix) 235 assert.Equal(t, op.expectedTotalSize, totalSize, "wrong total size, op %d", ix) 236 } 237 }