github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/alertmanager/distributor_test.go (about) 1 package alertmanager 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "fmt" 8 "math" 9 "net/http" 10 "net/http/httptest" 11 "net/url" 12 "sync" 13 "testing" 14 "time" 15 16 "github.com/go-kit/log" 17 "github.com/grafana/dskit/flagext" 18 "github.com/grafana/dskit/kv" 19 "github.com/grafana/dskit/kv/consul" 20 "github.com/grafana/dskit/ring" 21 "github.com/grafana/dskit/services" 22 "github.com/prometheus/client_golang/prometheus" 23 "github.com/stretchr/testify/assert" 24 "github.com/stretchr/testify/require" 25 "github.com/weaveworks/common/httpgrpc" 26 "github.com/weaveworks/common/user" 27 "google.golang.org/grpc" 28 "google.golang.org/grpc/health/grpc_health_v1" 29 30 "github.com/cortexproject/cortex/pkg/alertmanager/alertmanagerpb" 31 util_log "github.com/cortexproject/cortex/pkg/util/log" 32 "github.com/cortexproject/cortex/pkg/util/test" 33 ) 34 35 func TestDistributor_DistributeRequest(t *testing.T) { 36 cases := []struct { 37 name string 38 numAM, numHappyAM int 39 replicationFactor int 40 isRead bool 41 isDelete bool 42 expStatusCode int 43 expectedTotalCalls int 44 headersNotPreserved bool 45 route string 46 // Paths where responses are merged, we need to supply a valid response body. 47 // Note that the actual merging logic is tested elsewhere (merger_test.go). 48 responseBody []byte 49 }{ 50 { 51 name: "Write /alerts, Simple AM request, all AM healthy", 52 numAM: 4, 53 numHappyAM: 4, 54 replicationFactor: 3, 55 expStatusCode: http.StatusOK, 56 expectedTotalCalls: 3, 57 route: "/alerts", 58 }, { 59 name: "Write /alerts, Less than quorum AM available", 60 numAM: 1, 61 numHappyAM: 1, 62 replicationFactor: 3, 63 expStatusCode: http.StatusInternalServerError, 64 expectedTotalCalls: 0, 65 headersNotPreserved: true, // There is nothing to preserve since it does not hit any AM. 66 route: "/alerts", 67 }, { 68 name: "Write /alerts, Less than quorum AM succeed", 69 numAM: 5, 70 numHappyAM: 3, // Though we have 3 happy, it will hit >1 unhappy AM. 71 replicationFactor: 3, 72 expStatusCode: http.StatusInternalServerError, 73 expectedTotalCalls: 3, 74 route: "/alerts", 75 }, { 76 name: "Read /v1/alerts is sent to 3 AMs", 77 numAM: 5, 78 numHappyAM: 5, 79 replicationFactor: 3, 80 isRead: true, 81 expStatusCode: http.StatusOK, 82 expectedTotalCalls: 3, 83 route: "/v1/alerts", 84 responseBody: []byte(`{"status":"success","data":[]}`), 85 }, { 86 name: "Read /v2/alerts is sent to 3 AMs", 87 numAM: 5, 88 numHappyAM: 5, 89 replicationFactor: 3, 90 isRead: true, 91 expStatusCode: http.StatusOK, 92 expectedTotalCalls: 3, 93 route: "/v2/alerts", 94 responseBody: []byte(`[]`), 95 }, { 96 name: "Read /v2/alerts/groups is sent to 3 AMs", 97 numAM: 5, 98 numHappyAM: 5, 99 replicationFactor: 3, 100 isRead: true, 101 expStatusCode: http.StatusOK, 102 expectedTotalCalls: 3, 103 route: "/v2/alerts/groups", 104 responseBody: []byte(`[]`), 105 }, { 106 name: "Read /v1/alerts/groups not supported", 107 numAM: 5, 108 numHappyAM: 5, 109 replicationFactor: 3, 110 expStatusCode: http.StatusNotFound, 111 expectedTotalCalls: 0, 112 headersNotPreserved: true, 113 route: "/v1/alerts/groups", 114 }, { 115 name: "Write /alerts/groups not supported", 116 numAM: 5, 117 numHappyAM: 5, 118 replicationFactor: 3, 119 expStatusCode: http.StatusNotFound, 120 expectedTotalCalls: 0, 121 headersNotPreserved: true, 122 route: "/alerts/groups", 123 }, { 124 name: "Read /v1/silences is sent to 3 AMs", 125 numAM: 5, 126 numHappyAM: 5, 127 replicationFactor: 3, 128 isRead: true, 129 expStatusCode: http.StatusOK, 130 expectedTotalCalls: 3, 131 route: "/v1/silences", 132 responseBody: []byte(`{"status":"success","data":[]}`), 133 }, { 134 name: "Read /v2/silences is sent to 3 AMs", 135 numAM: 5, 136 numHappyAM: 5, 137 replicationFactor: 3, 138 isRead: true, 139 expStatusCode: http.StatusOK, 140 expectedTotalCalls: 3, 141 route: "/v2/silences", 142 responseBody: []byte(`[]`), 143 }, { 144 name: "Write /silences is sent to only 1 AM", 145 numAM: 5, 146 numHappyAM: 5, 147 replicationFactor: 3, 148 expStatusCode: http.StatusOK, 149 expectedTotalCalls: 1, 150 route: "/silences", 151 }, { 152 name: "Read /v1/silence/id is sent to 3 AMs", 153 numAM: 5, 154 numHappyAM: 5, 155 replicationFactor: 3, 156 isRead: true, 157 expStatusCode: http.StatusOK, 158 expectedTotalCalls: 3, 159 route: "/v1/silence/id", 160 responseBody: []byte(`{"status":"success","data":{"id":"aaa","updatedAt":"2020-01-01T00:00:00Z"}}`), 161 }, { 162 name: "Read /v2/silence/id is sent to 3 AMs", 163 numAM: 5, 164 numHappyAM: 5, 165 replicationFactor: 3, 166 isRead: true, 167 expStatusCode: http.StatusOK, 168 expectedTotalCalls: 3, 169 route: "/v2/silence/id", 170 responseBody: []byte(`{"id":"aaa","updatedAt":"2020-01-01T00:00:00Z"}`), 171 }, 172 { 173 name: "Write /silence/id not supported", 174 numAM: 5, 175 numHappyAM: 5, 176 replicationFactor: 3, 177 expStatusCode: http.StatusNotFound, 178 expectedTotalCalls: 0, 179 headersNotPreserved: true, 180 route: "/silence/id", 181 }, { 182 name: "Delete /silence/id is sent to only 1 AM", 183 numAM: 5, 184 numHappyAM: 5, 185 replicationFactor: 3, 186 isDelete: true, 187 expStatusCode: http.StatusOK, 188 expectedTotalCalls: 1, 189 route: "/silence/id", 190 }, { 191 name: "Read /status is sent to only 1 AM", 192 numAM: 5, 193 numHappyAM: 5, 194 replicationFactor: 3, 195 isRead: true, 196 expStatusCode: http.StatusOK, 197 expectedTotalCalls: 1, 198 route: "/status", 199 }, { 200 name: "Write /status not supported", 201 numAM: 5, 202 numHappyAM: 5, 203 replicationFactor: 3, 204 expStatusCode: http.StatusNotFound, 205 expectedTotalCalls: 0, 206 headersNotPreserved: true, 207 route: "/status", 208 }, { 209 name: "Read /receivers is sent to only 1 AM", 210 numAM: 5, 211 numHappyAM: 5, 212 replicationFactor: 3, 213 isRead: true, 214 expStatusCode: http.StatusOK, 215 expectedTotalCalls: 1, 216 route: "/receivers", 217 }, { 218 name: "Write /receivers not supported", 219 numAM: 5, 220 numHappyAM: 5, 221 replicationFactor: 3, 222 expStatusCode: http.StatusNotFound, 223 expectedTotalCalls: 0, 224 headersNotPreserved: true, 225 route: "/receivers", 226 }, 227 } 228 229 for _, c := range cases { 230 t.Run(c.name, func(t *testing.T) { 231 route := "/alertmanager/api/v1" + c.route 232 d, ams, cleanup := prepare(t, c.numAM, c.numHappyAM, c.replicationFactor, c.responseBody) 233 t.Cleanup(cleanup) 234 235 ctx := user.InjectOrgID(context.Background(), "1") 236 237 url := "http://127.0.0.1:9999" + route 238 req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader([]byte{1, 2, 3, 4})) 239 require.NoError(t, err) 240 if c.isRead { 241 req.Method = http.MethodGet 242 } else if c.isDelete { 243 req.Method = http.MethodDelete 244 } 245 req.RequestURI = url 246 247 w := httptest.NewRecorder() 248 d.DistributeRequest(w, req) 249 resp := w.Result() 250 require.Equal(t, c.expStatusCode, resp.StatusCode) 251 252 if !c.headersNotPreserved { 253 // Making sure the headers are not altered. 254 contentType := []string{"it-is-ok"} 255 contentTypeOptions := []string{"ok-option-1", "ok-option-2"} 256 if resp.StatusCode != http.StatusOK { 257 contentType = []string{"it-is-not-ok"} 258 contentTypeOptions = []string{"not-ok-option-1", "not-ok-option-2"} 259 } 260 require.Equal(t, contentType, resp.Header.Values("Content-Type")) 261 require.Equal(t, contentTypeOptions, resp.Header.Values("X-Content-Type-Options")) 262 } 263 264 // Since the response is sent as soon as the quorum is reached, when we 265 // reach this point the 3rd AM may not have received the request yet. 266 // To avoid flaky test we retry until we hit the desired state within a reasonable timeout. 267 test.Poll(t, time.Second, c.expectedTotalCalls, func() interface{} { 268 totalReqCount := 0 269 for _, a := range ams { 270 reqCount := a.requestsCount(route) 271 // AM should not get duplicate requests. 272 require.True(t, reqCount <= 1, "duplicate requests %d", reqCount) 273 totalReqCount += reqCount 274 } 275 276 return totalReqCount 277 }) 278 }) 279 } 280 281 } 282 283 func TestDistributor_IsPathSupported(t *testing.T) { 284 supported := map[string]bool{ 285 "/alertmanager/api/v1/alerts": true, 286 "/alertmanager/api/v1/alerts/groups": false, 287 "/alertmanager/api/v1/silences": true, 288 "/alertmanager/api/v1/silence/id": true, 289 "/alertmanager/api/v1/silence/anything": true, 290 "/alertmanager/api/v1/silence/really": true, 291 "/alertmanager/api/v1/status": true, 292 "/alertmanager/api/v1/receivers": true, 293 "/alertmanager/api/v1/other": false, 294 "/alertmanager/api/v2/alerts": true, 295 "/alertmanager/api/v2/alerts/groups": true, 296 "/alertmanager/api/v2/silences": true, 297 "/alertmanager/api/v2/silence/id": true, 298 "/alertmanager/api/v2/silence/anything": true, 299 "/alertmanager/api/v2/silence/really": true, 300 "/alertmanager/api/v2/status": true, 301 "/alertmanager/api/v2/receivers": true, 302 "/alertmanager/api/v2/other": false, 303 "/alertmanager/other": false, 304 "/other": false, 305 } 306 307 for path, isSupported := range supported { 308 t.Run(path, func(t *testing.T) { 309 d, _, cleanup := prepare(t, 1, 1, 1, []byte{}) 310 t.Cleanup(cleanup) 311 require.Equal(t, isSupported, d.IsPathSupported(path)) 312 }) 313 } 314 } 315 316 func prepare(t *testing.T, numAM, numHappyAM, replicationFactor int, responseBody []byte) (*Distributor, []*mockAlertmanager, func()) { 317 ams := []*mockAlertmanager{} 318 for i := 0; i < numHappyAM; i++ { 319 ams = append(ams, newMockAlertmanager(i, true, responseBody)) 320 } 321 for i := numHappyAM; i < numAM; i++ { 322 ams = append(ams, newMockAlertmanager(i, false, responseBody)) 323 } 324 325 // Use a real ring with a mock KV store to test ring RF logic. 326 amDescs := map[string]ring.InstanceDesc{} 327 amByAddr := map[string]*mockAlertmanager{} 328 for i, a := range ams { 329 amDescs[a.myAddr] = ring.InstanceDesc{ 330 Addr: a.myAddr, 331 Zone: "", 332 State: ring.ACTIVE, 333 Timestamp: time.Now().Unix(), 334 RegisteredTimestamp: time.Now().Add(-2 * time.Hour).Unix(), 335 Tokens: []uint32{uint32((math.MaxUint32 / numAM) * i)}, 336 } 337 amByAddr[a.myAddr] = ams[i] 338 } 339 340 kvStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) 341 t.Cleanup(func() { assert.NoError(t, closer.Close()) }) 342 343 err := kvStore.CAS(context.Background(), RingKey, 344 func(_ interface{}) (interface{}, bool, error) { 345 return &ring.Desc{ 346 Ingesters: amDescs, 347 }, true, nil 348 }, 349 ) 350 require.NoError(t, err) 351 352 amRing, err := ring.New(ring.Config{ 353 KVStore: kv.Config{ 354 Mock: kvStore, 355 }, 356 HeartbeatTimeout: 60 * time.Minute, 357 ReplicationFactor: replicationFactor, 358 }, RingNameForServer, RingKey, nil, nil) 359 require.NoError(t, err) 360 require.NoError(t, services.StartAndAwaitRunning(context.Background(), amRing)) 361 test.Poll(t, time.Second, numAM, func() interface{} { 362 return amRing.InstancesCount() 363 }) 364 365 cfg := &MultitenantAlertmanagerConfig{} 366 flagext.DefaultValues(cfg) 367 368 d, err := NewDistributor(cfg.AlertmanagerClient, cfg.MaxRecvMsgSize, amRing, newMockAlertmanagerClientFactory(amByAddr), util_log.Logger, prometheus.NewRegistry()) 369 require.NoError(t, err) 370 require.NoError(t, services.StartAndAwaitRunning(context.Background(), d)) 371 372 return d, ams, func() { 373 require.NoError(t, services.StopAndAwaitTerminated(context.Background(), d)) 374 } 375 } 376 377 type mockAlertmanager struct { 378 alertmanagerpb.AlertmanagerClient 379 grpc_health_v1.HealthClient 380 // receivedRequests is map of route -> statusCode -> number of requests. 381 receivedRequests map[string]map[int]int 382 mtx sync.Mutex 383 myAddr string 384 happy bool 385 responseBody []byte 386 } 387 388 func newMockAlertmanager(idx int, happy bool, responseBody []byte) *mockAlertmanager { 389 return &mockAlertmanager{ 390 receivedRequests: make(map[string]map[int]int), 391 myAddr: fmt.Sprintf("127.0.0.1:%05d", 10000+idx), 392 happy: happy, 393 responseBody: responseBody, 394 } 395 } 396 397 func (am *mockAlertmanager) HandleRequest(_ context.Context, in *httpgrpc.HTTPRequest, _ ...grpc.CallOption) (*httpgrpc.HTTPResponse, error) { 398 am.mtx.Lock() 399 defer am.mtx.Unlock() 400 401 u, err := url.Parse(in.Url) 402 if err != nil { 403 return nil, err 404 } 405 path := u.Path 406 m, ok := am.receivedRequests[path] 407 if !ok { 408 m = make(map[int]int) 409 am.receivedRequests[path] = m 410 } 411 412 if am.happy { 413 m[http.StatusOK]++ 414 return &httpgrpc.HTTPResponse{ 415 Code: http.StatusOK, 416 Headers: []*httpgrpc.Header{ 417 { 418 Key: "Content-Type", 419 Values: []string{"it-is-ok"}, 420 }, { 421 Key: "X-Content-Type-Options", 422 Values: []string{"ok-option-1", "ok-option-2"}, 423 }, 424 }, 425 Body: am.responseBody, 426 }, nil 427 } 428 429 m[http.StatusInternalServerError]++ 430 return nil, httpgrpc.ErrorFromHTTPResponse(&httpgrpc.HTTPResponse{ 431 Code: http.StatusInternalServerError, 432 Headers: []*httpgrpc.Header{ 433 { 434 Key: "Content-Type", 435 Values: []string{"it-is-not-ok"}, 436 }, { 437 Key: "X-Content-Type-Options", 438 Values: []string{"not-ok-option-1", "not-ok-option-2"}, 439 }, 440 }, 441 }) 442 } 443 444 func (am *mockAlertmanager) requestsCount(route string) int { 445 am.mtx.Lock() 446 defer am.mtx.Unlock() 447 448 routeMap, ok := am.receivedRequests[route] 449 if !ok { 450 return 0 451 } 452 453 // The status could be something other than overall 454 // expected status because of quorum logic. 455 reqCount := 0 456 for _, count := range routeMap { 457 reqCount += count 458 } 459 return reqCount 460 } 461 462 func (am *mockAlertmanager) Close() error { 463 return nil 464 } 465 466 func (am *mockAlertmanager) RemoteAddress() string { 467 return am.myAddr 468 } 469 470 type mockAlertmanagerClientFactory struct { 471 alertmanagerByAddr map[string]*mockAlertmanager 472 } 473 474 func newMockAlertmanagerClientFactory(alertmanagerByAddr map[string]*mockAlertmanager) ClientsPool { 475 return &mockAlertmanagerClientFactory{alertmanagerByAddr: alertmanagerByAddr} 476 } 477 478 func (f *mockAlertmanagerClientFactory) GetClientFor(addr string) (Client, error) { 479 c, ok := f.alertmanagerByAddr[addr] 480 if !ok { 481 return nil, errors.New("client not found") 482 } 483 return Client(c), nil 484 }