bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/sched/check_test.go (about)

     1  package sched
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"net/http"
     7  	"net/http/httptest"
     8  	"net/url"
     9  	"testing"
    10  	"time"
    11  
    12  	"bosun.org/host"
    13  	"bosun.org/util"
    14  
    15  	"bosun.org/cmd/bosun/conf"
    16  	"bosun.org/cmd/bosun/conf/rule"
    17  	"bosun.org/models"
    18  	"bosun.org/opentsdb"
    19  )
    20  
    21  func TestCheckFlapping(t *testing.T) {
    22  	hm, err := host.NewManager(false)
    23  	if err != nil {
    24  		t.Error(err)
    25  	}
    26  	util.SetHostManager(hm)
    27  
    28  	defer setup()()
    29  	c, err := rule.NewConf("", conf.EnabledBackends{}, nil, `
    30  		template t {
    31  			subject = 1
    32  			body = 2
    33  		}
    34  		notification n {
    35  			print = true
    36  		}
    37  		alert a {
    38  			warnNotification = n
    39  			warn = 1
    40  			critNotification = n
    41  			crit = 1
    42  			template = t
    43  		}
    44  	`)
    45  	if err != nil {
    46  		t.Fatal(err)
    47  	}
    48  	s, _ := initSched(&conf.SystemConf{}, c)
    49  	ak := models.NewAlertKey("a", nil)
    50  	r := &RunHistory{
    51  		Events: map[models.AlertKey]*models.Event{
    52  			ak: {Status: models.StWarning},
    53  		},
    54  	}
    55  	hasNots := func() bool {
    56  		defer func() {
    57  			s.pendingNotifications = nil
    58  		}()
    59  		if len(s.pendingNotifications) != 1 {
    60  			return false
    61  		}
    62  		for k, v := range s.pendingNotifications {
    63  			if k.Name != "n" || len(v) != 1 || v[0].Alert != "a" {
    64  				return false
    65  			}
    66  			return true
    67  		}
    68  		return false
    69  	}
    70  
    71  	type stateTransition struct {
    72  		S          models.Status
    73  		ExpectNots bool
    74  	}
    75  	transitions := []stateTransition{
    76  		{models.StWarning, true},
    77  		{models.StNormal, false},
    78  		{models.StWarning, false},
    79  		{models.StNormal, false},
    80  		{models.StCritical, true},
    81  		{models.StWarning, false},
    82  		{models.StCritical, false},
    83  	}
    84  
    85  	for i, trans := range transitions {
    86  		r.Events[ak].Status = trans.S
    87  		s.RunHistory(r)
    88  		has := hasNots()
    89  		if has && !trans.ExpectNots {
    90  			t.Fatalf("unexpected notifications for transition %d.", i)
    91  		} else if !has && trans.ExpectNots {
    92  			t.Fatalf("expected notifications for transition %d.", i)
    93  		}
    94  	}
    95  	r.Events[ak].Status = models.StNormal
    96  	s.RunHistory(r)
    97  	// Close the alert, so it should notify next time.
    98  	if err := s.ActionByAlertKey("", "", models.ActionClose, nil, ak); err != nil {
    99  		t.Fatal(err)
   100  	}
   101  	r.Events[ak].Status = models.StWarning
   102  	s.RunHistory(r)
   103  	if !hasNots() {
   104  		t.Fatal("expected notification")
   105  	}
   106  }
   107  
   108  func TestCheckSilence(t *testing.T) {
   109  	defer setup()()
   110  	done := make(chan bool, 1)
   111  	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   112  		done <- true
   113  	}))
   114  	defer ts.Close()
   115  	u, err := url.Parse(ts.URL)
   116  	if err != nil {
   117  		t.Fatal(err)
   118  	}
   119  	c, err := rule.NewConf("", conf.EnabledBackends{}, nil, fmt.Sprintf(`
   120  		template t {
   121  			subject = "test"
   122  			body = "test"
   123  		}
   124  		notification n {
   125  			post = http://%s/
   126  		}
   127  		alert a {
   128  			template = t
   129  			warnNotification = n
   130  			warn = 1
   131  		}
   132  	`, u.Host))
   133  	if err != nil {
   134  		t.Fatal(err)
   135  	}
   136  	s, err := initSched(&conf.SystemConf{}, c)
   137  	if err != nil {
   138  		t.Fatal(err)
   139  	}
   140  	_, err = s.AddSilence(utcNow().Add(-time.Hour), utcNow().Add(time.Hour), "a", "", false, true, "", "user", "message")
   141  	if err != nil {
   142  		t.Fatal(err)
   143  	}
   144  	check(s, utcNow())
   145  	s.CheckNotifications()
   146  	select {
   147  	case <-done:
   148  		t.Fatal("silenced notification was sent")
   149  	case <-time.After(time.Second * 2):
   150  		// Timeout *probably* means the silence worked
   151  	}
   152  }
   153  
   154  func TestDelayedClose(t *testing.T) {
   155  	defer setup()()
   156  	c, err := rule.NewConf("", conf.EnabledBackends{}, nil, `
   157  		alert a {
   158  			warn = 1
   159  			crit = 1
   160  			warnNotification = test
   161  			critNotification = test
   162  			template = test
   163  		}
   164  		template test {
   165  			subject = test
   166  			body = test2
   167  		}
   168  		notification test {
   169  			print = true
   170  		}
   171  	`)
   172  	if err != nil {
   173  		t.Fatal(err)
   174  	}
   175  	s, _ := initSched(&conf.SystemConf{}, c)
   176  	now := time.Now()
   177  	ak := models.NewAlertKey("a", nil)
   178  	r := &RunHistory{
   179  		Start: now,
   180  		Events: map[models.AlertKey]*models.Event{
   181  			ak: {Status: models.StWarning},
   182  		},
   183  	}
   184  	expect := func(id int64, active bool, open bool) {
   185  		incident, err := s.DataAccess.State().GetLatestIncident(ak)
   186  		if err != nil {
   187  			t.Fatal(err)
   188  		}
   189  		if incident.Id != id {
   190  			t.Fatalf("expected incident id %d. Got %d.", id, incident.Id)
   191  		}
   192  		if incident.IsActive() != active {
   193  			t.Fatalf("expected incident active status to be %v but got %v", active, incident.IsActive())
   194  		}
   195  		if incident.Open != open {
   196  			t.Fatalf("expected incident closed boolean to be %v but got %v", open, incident.Open)
   197  		}
   198  	}
   199  	expectPendingNotifications := func(i int) {
   200  		if len(s.pendingNotifications[s.RuleConf.GetNotification("test")]) != i {
   201  			t.Fatalf("expencted %v pending notifications but got %v", i, len(s.pendingNotifications[s.RuleConf.GetNotification("test")]))
   202  		}
   203  		s.pendingNotifications = nil
   204  	}
   205  	advance := func(i int64) {
   206  		r.Start = r.Start.Add(time.Second * time.Duration(i))
   207  	}
   208  	s.RunHistory(r)
   209  	expect(1, true, true)
   210  	expectPendingNotifications(1)
   211  	s.pendingNotifications = nil
   212  
   213  	// Test case where close issue and alert goes to normal before deadline
   214  	fiveMin := r.Start.Add(time.Minute * 5)
   215  	err = s.ActionByAlertKey("", "", models.ActionClose, &fiveMin, ak)
   216  	if err != nil {
   217  		t.Fatal(err)
   218  	}
   219  	advance(1)
   220  	s.RunHistory(r)
   221  	expect(1, true, true)
   222  
   223  	r.Events[ak].Status = models.StNormal
   224  	advance(1)
   225  	s.RunHistory(r)
   226  	expect(1, false, false)
   227  
   228  	r.Events[ak].Status = models.StWarning
   229  	advance(1)
   230  	s.RunHistory(r)
   231  	expect(2, true, true)
   232  	expectPendingNotifications(1)
   233  
   234  	// Test case where close issue and alert does not go normal before deadline
   235  	// which should result in a force closing
   236  	fiveMin = r.Start.Add(time.Minute * 5)
   237  	err = s.ActionByAlertKey("", "", models.ActionClose, &fiveMin, ak)
   238  	if err != nil {
   239  		t.Fatal(err)
   240  	}
   241  
   242  	advance(301)
   243  	s.RunHistory(r)
   244  	expect(2, true, false)
   245  
   246  	r.Events[ak].Status = models.StWarning
   247  	advance(1)
   248  	s.RunHistory(r)
   249  	expect(3, true, true)
   250  	expectPendingNotifications(1)
   251  
   252  	// Test cancelling a delayed close
   253  	fiveMin = r.Start.Add(time.Minute * 5)
   254  	err = s.ActionByAlertKey("", "", models.ActionClose, &fiveMin, ak)
   255  	if err != nil {
   256  		t.Fatal(err)
   257  	}
   258  	advance(1)
   259  	s.RunHistory(r)
   260  	expect(3, true, true)
   261  
   262  	err = s.ActionByAlertKey("", "", models.ActionCancelClose, nil, ak)
   263  	if err != nil {
   264  		t.Fatal(err)
   265  	}
   266  	advance(300)
   267  	s.RunHistory(r)
   268  	expect(3, true, true)
   269  
   270  	// Make sure delayed close works after a previous delayed close was cancelled
   271  	fiveMin = r.Start.Add(time.Minute * 5)
   272  	err = s.ActionByAlertKey("", "", models.ActionClose, &fiveMin, ak)
   273  	if err != nil {
   274  		t.Fatal(err)
   275  	}
   276  	advance(301)
   277  	s.RunHistory(r)
   278  	expect(3, true, false)
   279  
   280  	r.Events[ak].Status = models.StWarning
   281  	advance(1)
   282  	s.RunHistory(r)
   283  	expect(4, true, true)
   284  	expectPendingNotifications(1)
   285  
   286  	// Make sure escalation cancels a delayed close
   287  	fiveMin = r.Start.Add(time.Minute * 5)
   288  	err = s.ActionByAlertKey("", "", models.ActionClose, &fiveMin, ak)
   289  	if err != nil {
   290  		t.Fatal(err)
   291  	}
   292  	r.Events[ak].Status = models.StCritical
   293  	advance(1)
   294  	s.RunHistory(r)
   295  	expect(4, true, true)
   296  	expectPendingNotifications(1)
   297  
   298  	advance(300)
   299  	s.RunHistory(r)
   300  	expect(4, true, true)
   301  	expectPendingNotifications(0)
   302  }
   303  
   304  func TestIncidentIds(t *testing.T) {
   305  	defer setup()()
   306  	c, err := rule.NewConf("", conf.EnabledBackends{}, nil, `
   307  		alert a {
   308  			crit = 1
   309  		}
   310  	`)
   311  	if err != nil {
   312  		t.Fatal(err)
   313  	}
   314  	s, _ := initSched(&conf.SystemConf{}, c)
   315  	ak := models.NewAlertKey("a", nil)
   316  	r := &RunHistory{
   317  		Events: map[models.AlertKey]*models.Event{
   318  			ak: {Status: models.StWarning},
   319  		},
   320  	}
   321  	expect := func(id int64) {
   322  		incident, err := s.DataAccess.State().GetLatestIncident(ak)
   323  		if err != nil {
   324  			t.Fatal(err)
   325  		}
   326  		if incident.Id != id {
   327  			t.Fatalf("Expeted incident id %d. Got %d.", id, incident.Id)
   328  		}
   329  	}
   330  	s.RunHistory(r)
   331  	expect(1)
   332  
   333  	r.Events[ak].Status = models.StNormal
   334  	s.RunHistory(r)
   335  	expect(1)
   336  
   337  	r.Events[ak].Status = models.StWarning
   338  	s.RunHistory(r)
   339  	expect(1)
   340  
   341  	r.Events[ak].Status = models.StNormal
   342  	s.RunHistory(r)
   343  	err = s.ActionByAlertKey("", "", models.ActionClose, nil, ak)
   344  	if err != nil {
   345  		t.Fatal(err)
   346  	}
   347  	r.Events[ak].Status = models.StWarning
   348  	s.RunHistory(r)
   349  	expect(2)
   350  }
   351  
   352  func TestCheckNotify(t *testing.T) {
   353  	defer setup()()
   354  	nc := make(chan string)
   355  	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   356  		b, _ := ioutil.ReadAll(r.Body)
   357  		nc <- string(b)
   358  	}))
   359  	defer ts.Close()
   360  	u, err := url.Parse(ts.URL)
   361  	if err != nil {
   362  		t.Fatal(err)
   363  	}
   364  	c, err := rule.NewConf("", conf.EnabledBackends{}, nil, fmt.Sprintf(`
   365  		template t {
   366  			subject = {{.Last.Status}}
   367  			body = b
   368  		}
   369  		notification n {
   370  			post = http://%s/
   371  		}
   372  		alert a {
   373  			template = t
   374  			warnNotification = n
   375  			warn = 1
   376  		}
   377  	`, u.Host))
   378  	if err != nil {
   379  		t.Fatal(err)
   380  	}
   381  	s, err := initSched(&conf.SystemConf{}, c)
   382  	if err != nil {
   383  		t.Fatal(err)
   384  	}
   385  	check(s, utcNow())
   386  	s.CheckNotifications()
   387  	select {
   388  	case r := <-nc:
   389  		if r != "warning" {
   390  			t.Fatalf("expected warning, got %v", r)
   391  		}
   392  	case <-time.After(time.Second):
   393  		t.Fatal("failed to receive notification before timeout")
   394  	}
   395  }
   396  
   397  func TestCheckNotifyUnknown(t *testing.T) {
   398  	defer setup()()
   399  	nc := make(chan string, 1)
   400  	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   401  		b, _ := ioutil.ReadAll(r.Body)
   402  		nc <- string(b)
   403  	}))
   404  	defer ts.Close()
   405  	u, err := url.Parse(ts.URL)
   406  	if err != nil {
   407  		t.Fatal(err)
   408  	}
   409  	c, err := rule.NewConf("", conf.EnabledBackends{}, nil, fmt.Sprintf(`
   410  		template t {
   411  			subject = s
   412  			unk = {{.Name}}: {{.Group | len}} unknown alerts
   413  			body = b
   414  		}
   415  		notification n {
   416  			post = http://%s/
   417  			unknownBody = unk
   418  		}
   419  		alert a {
   420  			template = t
   421  			critNotification = n
   422  			crit = 1
   423  		}
   424  	`, u.Host))
   425  	if err != nil {
   426  		t.Fatal(err)
   427  	}
   428  	s, err := initSched(&conf.SystemConf{MinGroupSize: 2}, c)
   429  	if err != nil {
   430  		t.Fatal(err)
   431  	}
   432  	r := &RunHistory{
   433  		Events: map[models.AlertKey]*models.Event{
   434  			models.NewAlertKey("a", opentsdb.TagSet{"h": "x"}): {Status: models.StUnknown},
   435  			models.NewAlertKey("a", opentsdb.TagSet{"h": "y"}): {Status: models.StUnknown},
   436  		},
   437  	}
   438  	s.RunHistory(r)
   439  	s.CheckNotifications()
   440  	s.sendUnknownNotifications()
   441  	gotExpected := false
   442  Loop:
   443  	for {
   444  		select {
   445  		case r := <-nc:
   446  			if r == "a: 2 unknown alerts" {
   447  				gotExpected = true
   448  			} else {
   449  				t.Fatalf("unexpected: %v", r)
   450  			}
   451  		// TODO: remove this silly timeout-based test
   452  		case <-time.After(time.Second):
   453  			break Loop
   454  		}
   455  	}
   456  	if !gotExpected {
   457  		t.Errorf("didn't get expected result")
   458  	}
   459  }
   460  
   461  // TestCheckNotifyUnknownDefault tests the default unknownTemplate.
   462  func TestCheckNotifyUnknownDefault(t *testing.T) {
   463  	defer setup()()
   464  	nc := make(chan string, 1)
   465  	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   466  		b, _ := ioutil.ReadAll(r.Body)
   467  		nc <- string(b)
   468  	}))
   469  	defer ts.Close()
   470  	u, err := url.Parse(ts.URL)
   471  	if err != nil {
   472  		t.Fatal(err)
   473  	}
   474  	c, err := rule.NewConf("", conf.EnabledBackends{}, nil, fmt.Sprintf(`
   475  		template t {
   476  			subject = template
   477  			body = b
   478  		}
   479  		notification n {
   480  			post = http://%s/
   481  		}
   482  		alert a {
   483  			template = t
   484  			critNotification = n
   485  			crit = 1
   486  		}
   487  	`, u.Host))
   488  	if err != nil {
   489  		t.Fatal(err)
   490  	}
   491  	s, err := initSched(&conf.SystemConf{MinGroupSize: 2}, c)
   492  	if err != nil {
   493  		t.Fatal(err)
   494  	}
   495  	r := &RunHistory{
   496  		Events: map[models.AlertKey]*models.Event{
   497  			models.NewAlertKey("a", opentsdb.TagSet{"h": "x"}): {Status: models.StUnknown},
   498  			models.NewAlertKey("a", opentsdb.TagSet{"h": "y"}): {Status: models.StUnknown},
   499  		},
   500  	}
   501  	s.RunHistory(r)
   502  	s.CheckNotifications()
   503  	s.sendUnknownNotifications()
   504  	gotExpected := false
   505  Loop:
   506  	for {
   507  		select {
   508  		case r := <-nc:
   509  			if r == "a: 2 unknown alerts" {
   510  				gotExpected = true
   511  			} else {
   512  				t.Fatalf("unexpected: %v", r)
   513  			}
   514  		// TODO: remove this silly timeout-based test
   515  		case <-time.After(time.Second):
   516  			break Loop
   517  		}
   518  	}
   519  	if !gotExpected {
   520  		t.Errorf("didn't get expected result")
   521  	}
   522  }
   523  
   524  func TestCheckNotifyLog(t *testing.T) {
   525  	defer setup()()
   526  	nc := make(chan string, 1)
   527  	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   528  		b, _ := ioutil.ReadAll(r.Body)
   529  		nc <- string(b)
   530  	}))
   531  	defer ts.Close()
   532  	u, err := url.Parse(ts.URL)
   533  	if err != nil {
   534  		t.Fatal(err)
   535  	}
   536  	c, err := rule.NewConf("", conf.EnabledBackends{}, nil, fmt.Sprintf(`
   537  		template t {
   538  			subject = {{.Alert.Name}}
   539  			body = b
   540  		}
   541  		notification n {
   542  			post = http://%s/
   543  		}
   544  		alert a {
   545  			template = t
   546  			critNotification = n
   547  			crit = 1
   548  		}
   549  		alert b {
   550  			template = t
   551  			critNotification = n
   552  			crit = 1
   553  			log = true
   554  		}
   555  	`, u.Host))
   556  	if err != nil {
   557  		t.Fatal(err)
   558  	}
   559  	s, err := initSched(&conf.SystemConf{}, c)
   560  	if err != nil {
   561  		t.Fatal(err)
   562  	}
   563  	check(s, utcNow())
   564  	s.CheckNotifications()
   565  	gotA := false
   566  	gotB := false
   567  Loop:
   568  	for {
   569  		select {
   570  		case r := <-nc:
   571  			if r == "a" && !gotA {
   572  				gotA = true
   573  			} else if r == "b" && !gotB {
   574  				gotB = true
   575  			} else {
   576  				t.Errorf("unexpected: %v", r)
   577  			}
   578  		// TODO: remove this silly timeout-based test
   579  		case <-time.After(time.Second):
   580  			break Loop
   581  		}
   582  	}
   583  	if !gotA {
   584  		t.Errorf("didn't get expected a")
   585  	}
   586  	if !gotB {
   587  		t.Errorf("didn't get expected b")
   588  	}
   589  	status, err := s.DataAccess.State().GetAllOpenIncidents()
   590  	if err != nil {
   591  		t.Fatal(err)
   592  	}
   593  	for _, st := range status {
   594  		switch st.AlertKey {
   595  		case "a{}":
   596  			if !st.Open {
   597  				t.Errorf("expected a to be open")
   598  			}
   599  		case "b{}":
   600  			if st.Open {
   601  				t.Errorf("expected b to be closed")
   602  			}
   603  		default:
   604  			t.Errorf("unexpected alert key %s", st.AlertKey)
   605  		}
   606  	}
   607  }