github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/alertmanager/distributor_test.go (about)

     1  package alertmanager
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"errors"
     7  	"fmt"
     8  	"math"
     9  	"net/http"
    10  	"net/http/httptest"
    11  	"net/url"
    12  	"sync"
    13  	"testing"
    14  	"time"
    15  
    16  	"github.com/go-kit/log"
    17  	"github.com/grafana/dskit/flagext"
    18  	"github.com/grafana/dskit/kv"
    19  	"github.com/grafana/dskit/kv/consul"
    20  	"github.com/grafana/dskit/ring"
    21  	"github.com/grafana/dskit/services"
    22  	"github.com/prometheus/client_golang/prometheus"
    23  	"github.com/stretchr/testify/assert"
    24  	"github.com/stretchr/testify/require"
    25  	"github.com/weaveworks/common/httpgrpc"
    26  	"github.com/weaveworks/common/user"
    27  	"google.golang.org/grpc"
    28  	"google.golang.org/grpc/health/grpc_health_v1"
    29  
    30  	"github.com/cortexproject/cortex/pkg/alertmanager/alertmanagerpb"
    31  	util_log "github.com/cortexproject/cortex/pkg/util/log"
    32  	"github.com/cortexproject/cortex/pkg/util/test"
    33  )
    34  
    35  func TestDistributor_DistributeRequest(t *testing.T) {
    36  	cases := []struct {
    37  		name                string
    38  		numAM, numHappyAM   int
    39  		replicationFactor   int
    40  		isRead              bool
    41  		isDelete            bool
    42  		expStatusCode       int
    43  		expectedTotalCalls  int
    44  		headersNotPreserved bool
    45  		route               string
    46  		// Paths where responses are merged, we need to supply a valid response body.
    47  		// Note that the actual merging logic is tested elsewhere (merger_test.go).
    48  		responseBody []byte
    49  	}{
    50  		{
    51  			name:               "Write /alerts, Simple AM request, all AM healthy",
    52  			numAM:              4,
    53  			numHappyAM:         4,
    54  			replicationFactor:  3,
    55  			expStatusCode:      http.StatusOK,
    56  			expectedTotalCalls: 3,
    57  			route:              "/alerts",
    58  		}, {
    59  			name:                "Write /alerts, Less than quorum AM available",
    60  			numAM:               1,
    61  			numHappyAM:          1,
    62  			replicationFactor:   3,
    63  			expStatusCode:       http.StatusInternalServerError,
    64  			expectedTotalCalls:  0,
    65  			headersNotPreserved: true, // There is nothing to preserve since it does not hit any AM.
    66  			route:               "/alerts",
    67  		}, {
    68  			name:               "Write /alerts, Less than quorum AM succeed",
    69  			numAM:              5,
    70  			numHappyAM:         3, // Though we have 3 happy, it will hit >1 unhappy AM.
    71  			replicationFactor:  3,
    72  			expStatusCode:      http.StatusInternalServerError,
    73  			expectedTotalCalls: 3,
    74  			route:              "/alerts",
    75  		}, {
    76  			name:               "Read /v1/alerts is sent to 3 AMs",
    77  			numAM:              5,
    78  			numHappyAM:         5,
    79  			replicationFactor:  3,
    80  			isRead:             true,
    81  			expStatusCode:      http.StatusOK,
    82  			expectedTotalCalls: 3,
    83  			route:              "/v1/alerts",
    84  			responseBody:       []byte(`{"status":"success","data":[]}`),
    85  		}, {
    86  			name:               "Read /v2/alerts is sent to 3 AMs",
    87  			numAM:              5,
    88  			numHappyAM:         5,
    89  			replicationFactor:  3,
    90  			isRead:             true,
    91  			expStatusCode:      http.StatusOK,
    92  			expectedTotalCalls: 3,
    93  			route:              "/v2/alerts",
    94  			responseBody:       []byte(`[]`),
    95  		}, {
    96  			name:               "Read /v2/alerts/groups is sent to 3 AMs",
    97  			numAM:              5,
    98  			numHappyAM:         5,
    99  			replicationFactor:  3,
   100  			isRead:             true,
   101  			expStatusCode:      http.StatusOK,
   102  			expectedTotalCalls: 3,
   103  			route:              "/v2/alerts/groups",
   104  			responseBody:       []byte(`[]`),
   105  		}, {
   106  			name:                "Read /v1/alerts/groups not supported",
   107  			numAM:               5,
   108  			numHappyAM:          5,
   109  			replicationFactor:   3,
   110  			expStatusCode:       http.StatusNotFound,
   111  			expectedTotalCalls:  0,
   112  			headersNotPreserved: true,
   113  			route:               "/v1/alerts/groups",
   114  		}, {
   115  			name:                "Write /alerts/groups not supported",
   116  			numAM:               5,
   117  			numHappyAM:          5,
   118  			replicationFactor:   3,
   119  			expStatusCode:       http.StatusNotFound,
   120  			expectedTotalCalls:  0,
   121  			headersNotPreserved: true,
   122  			route:               "/alerts/groups",
   123  		}, {
   124  			name:               "Read /v1/silences is sent to 3 AMs",
   125  			numAM:              5,
   126  			numHappyAM:         5,
   127  			replicationFactor:  3,
   128  			isRead:             true,
   129  			expStatusCode:      http.StatusOK,
   130  			expectedTotalCalls: 3,
   131  			route:              "/v1/silences",
   132  			responseBody:       []byte(`{"status":"success","data":[]}`),
   133  		}, {
   134  			name:               "Read /v2/silences is sent to 3 AMs",
   135  			numAM:              5,
   136  			numHappyAM:         5,
   137  			replicationFactor:  3,
   138  			isRead:             true,
   139  			expStatusCode:      http.StatusOK,
   140  			expectedTotalCalls: 3,
   141  			route:              "/v2/silences",
   142  			responseBody:       []byte(`[]`),
   143  		}, {
   144  			name:               "Write /silences is sent to only 1 AM",
   145  			numAM:              5,
   146  			numHappyAM:         5,
   147  			replicationFactor:  3,
   148  			expStatusCode:      http.StatusOK,
   149  			expectedTotalCalls: 1,
   150  			route:              "/silences",
   151  		}, {
   152  			name:               "Read /v1/silence/id is sent to 3 AMs",
   153  			numAM:              5,
   154  			numHappyAM:         5,
   155  			replicationFactor:  3,
   156  			isRead:             true,
   157  			expStatusCode:      http.StatusOK,
   158  			expectedTotalCalls: 3,
   159  			route:              "/v1/silence/id",
   160  			responseBody:       []byte(`{"status":"success","data":{"id":"aaa","updatedAt":"2020-01-01T00:00:00Z"}}`),
   161  		}, {
   162  			name:               "Read /v2/silence/id is sent to 3 AMs",
   163  			numAM:              5,
   164  			numHappyAM:         5,
   165  			replicationFactor:  3,
   166  			isRead:             true,
   167  			expStatusCode:      http.StatusOK,
   168  			expectedTotalCalls: 3,
   169  			route:              "/v2/silence/id",
   170  			responseBody:       []byte(`{"id":"aaa","updatedAt":"2020-01-01T00:00:00Z"}`),
   171  		},
   172  		{
   173  			name:                "Write /silence/id not supported",
   174  			numAM:               5,
   175  			numHappyAM:          5,
   176  			replicationFactor:   3,
   177  			expStatusCode:       http.StatusNotFound,
   178  			expectedTotalCalls:  0,
   179  			headersNotPreserved: true,
   180  			route:               "/silence/id",
   181  		}, {
   182  			name:               "Delete /silence/id is sent to only 1 AM",
   183  			numAM:              5,
   184  			numHappyAM:         5,
   185  			replicationFactor:  3,
   186  			isDelete:           true,
   187  			expStatusCode:      http.StatusOK,
   188  			expectedTotalCalls: 1,
   189  			route:              "/silence/id",
   190  		}, {
   191  			name:               "Read /status is sent to only 1 AM",
   192  			numAM:              5,
   193  			numHappyAM:         5,
   194  			replicationFactor:  3,
   195  			isRead:             true,
   196  			expStatusCode:      http.StatusOK,
   197  			expectedTotalCalls: 1,
   198  			route:              "/status",
   199  		}, {
   200  			name:                "Write /status not supported",
   201  			numAM:               5,
   202  			numHappyAM:          5,
   203  			replicationFactor:   3,
   204  			expStatusCode:       http.StatusNotFound,
   205  			expectedTotalCalls:  0,
   206  			headersNotPreserved: true,
   207  			route:               "/status",
   208  		}, {
   209  			name:               "Read /receivers is sent to only 1 AM",
   210  			numAM:              5,
   211  			numHappyAM:         5,
   212  			replicationFactor:  3,
   213  			isRead:             true,
   214  			expStatusCode:      http.StatusOK,
   215  			expectedTotalCalls: 1,
   216  			route:              "/receivers",
   217  		}, {
   218  			name:                "Write /receivers not supported",
   219  			numAM:               5,
   220  			numHappyAM:          5,
   221  			replicationFactor:   3,
   222  			expStatusCode:       http.StatusNotFound,
   223  			expectedTotalCalls:  0,
   224  			headersNotPreserved: true,
   225  			route:               "/receivers",
   226  		},
   227  	}
   228  
   229  	for _, c := range cases {
   230  		t.Run(c.name, func(t *testing.T) {
   231  			route := "/alertmanager/api/v1" + c.route
   232  			d, ams, cleanup := prepare(t, c.numAM, c.numHappyAM, c.replicationFactor, c.responseBody)
   233  			t.Cleanup(cleanup)
   234  
   235  			ctx := user.InjectOrgID(context.Background(), "1")
   236  
   237  			url := "http://127.0.0.1:9999" + route
   238  			req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader([]byte{1, 2, 3, 4}))
   239  			require.NoError(t, err)
   240  			if c.isRead {
   241  				req.Method = http.MethodGet
   242  			} else if c.isDelete {
   243  				req.Method = http.MethodDelete
   244  			}
   245  			req.RequestURI = url
   246  
   247  			w := httptest.NewRecorder()
   248  			d.DistributeRequest(w, req)
   249  			resp := w.Result()
   250  			require.Equal(t, c.expStatusCode, resp.StatusCode)
   251  
   252  			if !c.headersNotPreserved {
   253  				// Making sure the headers are not altered.
   254  				contentType := []string{"it-is-ok"}
   255  				contentTypeOptions := []string{"ok-option-1", "ok-option-2"}
   256  				if resp.StatusCode != http.StatusOK {
   257  					contentType = []string{"it-is-not-ok"}
   258  					contentTypeOptions = []string{"not-ok-option-1", "not-ok-option-2"}
   259  				}
   260  				require.Equal(t, contentType, resp.Header.Values("Content-Type"))
   261  				require.Equal(t, contentTypeOptions, resp.Header.Values("X-Content-Type-Options"))
   262  			}
   263  
   264  			// Since the response is sent as soon as the quorum is reached, when we
   265  			// reach this point the 3rd AM may not have received the request yet.
   266  			// To avoid flaky test we retry until we hit the desired state within a reasonable timeout.
   267  			test.Poll(t, time.Second, c.expectedTotalCalls, func() interface{} {
   268  				totalReqCount := 0
   269  				for _, a := range ams {
   270  					reqCount := a.requestsCount(route)
   271  					// AM should not get duplicate requests.
   272  					require.True(t, reqCount <= 1, "duplicate requests %d", reqCount)
   273  					totalReqCount += reqCount
   274  				}
   275  
   276  				return totalReqCount
   277  			})
   278  		})
   279  	}
   280  
   281  }
   282  
   283  func TestDistributor_IsPathSupported(t *testing.T) {
   284  	supported := map[string]bool{
   285  		"/alertmanager/api/v1/alerts":           true,
   286  		"/alertmanager/api/v1/alerts/groups":    false,
   287  		"/alertmanager/api/v1/silences":         true,
   288  		"/alertmanager/api/v1/silence/id":       true,
   289  		"/alertmanager/api/v1/silence/anything": true,
   290  		"/alertmanager/api/v1/silence/really":   true,
   291  		"/alertmanager/api/v1/status":           true,
   292  		"/alertmanager/api/v1/receivers":        true,
   293  		"/alertmanager/api/v1/other":            false,
   294  		"/alertmanager/api/v2/alerts":           true,
   295  		"/alertmanager/api/v2/alerts/groups":    true,
   296  		"/alertmanager/api/v2/silences":         true,
   297  		"/alertmanager/api/v2/silence/id":       true,
   298  		"/alertmanager/api/v2/silence/anything": true,
   299  		"/alertmanager/api/v2/silence/really":   true,
   300  		"/alertmanager/api/v2/status":           true,
   301  		"/alertmanager/api/v2/receivers":        true,
   302  		"/alertmanager/api/v2/other":            false,
   303  		"/alertmanager/other":                   false,
   304  		"/other":                                false,
   305  	}
   306  
   307  	for path, isSupported := range supported {
   308  		t.Run(path, func(t *testing.T) {
   309  			d, _, cleanup := prepare(t, 1, 1, 1, []byte{})
   310  			t.Cleanup(cleanup)
   311  			require.Equal(t, isSupported, d.IsPathSupported(path))
   312  		})
   313  	}
   314  }
   315  
   316  func prepare(t *testing.T, numAM, numHappyAM, replicationFactor int, responseBody []byte) (*Distributor, []*mockAlertmanager, func()) {
   317  	ams := []*mockAlertmanager{}
   318  	for i := 0; i < numHappyAM; i++ {
   319  		ams = append(ams, newMockAlertmanager(i, true, responseBody))
   320  	}
   321  	for i := numHappyAM; i < numAM; i++ {
   322  		ams = append(ams, newMockAlertmanager(i, false, responseBody))
   323  	}
   324  
   325  	// Use a real ring with a mock KV store to test ring RF logic.
   326  	amDescs := map[string]ring.InstanceDesc{}
   327  	amByAddr := map[string]*mockAlertmanager{}
   328  	for i, a := range ams {
   329  		amDescs[a.myAddr] = ring.InstanceDesc{
   330  			Addr:                a.myAddr,
   331  			Zone:                "",
   332  			State:               ring.ACTIVE,
   333  			Timestamp:           time.Now().Unix(),
   334  			RegisteredTimestamp: time.Now().Add(-2 * time.Hour).Unix(),
   335  			Tokens:              []uint32{uint32((math.MaxUint32 / numAM) * i)},
   336  		}
   337  		amByAddr[a.myAddr] = ams[i]
   338  	}
   339  
   340  	kvStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil)
   341  	t.Cleanup(func() { assert.NoError(t, closer.Close()) })
   342  
   343  	err := kvStore.CAS(context.Background(), RingKey,
   344  		func(_ interface{}) (interface{}, bool, error) {
   345  			return &ring.Desc{
   346  				Ingesters: amDescs,
   347  			}, true, nil
   348  		},
   349  	)
   350  	require.NoError(t, err)
   351  
   352  	amRing, err := ring.New(ring.Config{
   353  		KVStore: kv.Config{
   354  			Mock: kvStore,
   355  		},
   356  		HeartbeatTimeout:  60 * time.Minute,
   357  		ReplicationFactor: replicationFactor,
   358  	}, RingNameForServer, RingKey, nil, nil)
   359  	require.NoError(t, err)
   360  	require.NoError(t, services.StartAndAwaitRunning(context.Background(), amRing))
   361  	test.Poll(t, time.Second, numAM, func() interface{} {
   362  		return amRing.InstancesCount()
   363  	})
   364  
   365  	cfg := &MultitenantAlertmanagerConfig{}
   366  	flagext.DefaultValues(cfg)
   367  
   368  	d, err := NewDistributor(cfg.AlertmanagerClient, cfg.MaxRecvMsgSize, amRing, newMockAlertmanagerClientFactory(amByAddr), util_log.Logger, prometheus.NewRegistry())
   369  	require.NoError(t, err)
   370  	require.NoError(t, services.StartAndAwaitRunning(context.Background(), d))
   371  
   372  	return d, ams, func() {
   373  		require.NoError(t, services.StopAndAwaitTerminated(context.Background(), d))
   374  	}
   375  }
   376  
   377  type mockAlertmanager struct {
   378  	alertmanagerpb.AlertmanagerClient
   379  	grpc_health_v1.HealthClient
   380  	// receivedRequests is map of route -> statusCode -> number of requests.
   381  	receivedRequests map[string]map[int]int
   382  	mtx              sync.Mutex
   383  	myAddr           string
   384  	happy            bool
   385  	responseBody     []byte
   386  }
   387  
   388  func newMockAlertmanager(idx int, happy bool, responseBody []byte) *mockAlertmanager {
   389  	return &mockAlertmanager{
   390  		receivedRequests: make(map[string]map[int]int),
   391  		myAddr:           fmt.Sprintf("127.0.0.1:%05d", 10000+idx),
   392  		happy:            happy,
   393  		responseBody:     responseBody,
   394  	}
   395  }
   396  
   397  func (am *mockAlertmanager) HandleRequest(_ context.Context, in *httpgrpc.HTTPRequest, _ ...grpc.CallOption) (*httpgrpc.HTTPResponse, error) {
   398  	am.mtx.Lock()
   399  	defer am.mtx.Unlock()
   400  
   401  	u, err := url.Parse(in.Url)
   402  	if err != nil {
   403  		return nil, err
   404  	}
   405  	path := u.Path
   406  	m, ok := am.receivedRequests[path]
   407  	if !ok {
   408  		m = make(map[int]int)
   409  		am.receivedRequests[path] = m
   410  	}
   411  
   412  	if am.happy {
   413  		m[http.StatusOK]++
   414  		return &httpgrpc.HTTPResponse{
   415  			Code: http.StatusOK,
   416  			Headers: []*httpgrpc.Header{
   417  				{
   418  					Key:    "Content-Type",
   419  					Values: []string{"it-is-ok"},
   420  				}, {
   421  					Key:    "X-Content-Type-Options",
   422  					Values: []string{"ok-option-1", "ok-option-2"},
   423  				},
   424  			},
   425  			Body: am.responseBody,
   426  		}, nil
   427  	}
   428  
   429  	m[http.StatusInternalServerError]++
   430  	return nil, httpgrpc.ErrorFromHTTPResponse(&httpgrpc.HTTPResponse{
   431  		Code: http.StatusInternalServerError,
   432  		Headers: []*httpgrpc.Header{
   433  			{
   434  				Key:    "Content-Type",
   435  				Values: []string{"it-is-not-ok"},
   436  			}, {
   437  				Key:    "X-Content-Type-Options",
   438  				Values: []string{"not-ok-option-1", "not-ok-option-2"},
   439  			},
   440  		},
   441  	})
   442  }
   443  
   444  func (am *mockAlertmanager) requestsCount(route string) int {
   445  	am.mtx.Lock()
   446  	defer am.mtx.Unlock()
   447  
   448  	routeMap, ok := am.receivedRequests[route]
   449  	if !ok {
   450  		return 0
   451  	}
   452  
   453  	// The status could be something other than overall
   454  	// expected status because of quorum logic.
   455  	reqCount := 0
   456  	for _, count := range routeMap {
   457  		reqCount += count
   458  	}
   459  	return reqCount
   460  }
   461  
   462  func (am *mockAlertmanager) Close() error {
   463  	return nil
   464  }
   465  
   466  func (am *mockAlertmanager) RemoteAddress() string {
   467  	return am.myAddr
   468  }
   469  
   470  type mockAlertmanagerClientFactory struct {
   471  	alertmanagerByAddr map[string]*mockAlertmanager
   472  }
   473  
   474  func newMockAlertmanagerClientFactory(alertmanagerByAddr map[string]*mockAlertmanager) ClientsPool {
   475  	return &mockAlertmanagerClientFactory{alertmanagerByAddr: alertmanagerByAddr}
   476  }
   477  
   478  func (f *mockAlertmanagerClientFactory) GetClientFor(addr string) (Client, error) {
   479  	c, ok := f.alertmanagerByAddr[addr]
   480  	if !ok {
   481  		return nil, errors.New("client not found")
   482  	}
   483  	return Client(c), nil
   484  }