bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/sched/check_test.go (about) 1 package sched 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "net/http" 7 "net/http/httptest" 8 "net/url" 9 "testing" 10 "time" 11 12 "bosun.org/host" 13 "bosun.org/util" 14 15 "bosun.org/cmd/bosun/conf" 16 "bosun.org/cmd/bosun/conf/rule" 17 "bosun.org/models" 18 "bosun.org/opentsdb" 19 ) 20 21 func TestCheckFlapping(t *testing.T) { 22 hm, err := host.NewManager(false) 23 if err != nil { 24 t.Error(err) 25 } 26 util.SetHostManager(hm) 27 28 defer setup()() 29 c, err := rule.NewConf("", conf.EnabledBackends{}, nil, ` 30 template t { 31 subject = 1 32 body = 2 33 } 34 notification n { 35 print = true 36 } 37 alert a { 38 warnNotification = n 39 warn = 1 40 critNotification = n 41 crit = 1 42 template = t 43 } 44 `) 45 if err != nil { 46 t.Fatal(err) 47 } 48 s, _ := initSched(&conf.SystemConf{}, c) 49 ak := models.NewAlertKey("a", nil) 50 r := &RunHistory{ 51 Events: map[models.AlertKey]*models.Event{ 52 ak: {Status: models.StWarning}, 53 }, 54 } 55 hasNots := func() bool { 56 defer func() { 57 s.pendingNotifications = nil 58 }() 59 if len(s.pendingNotifications) != 1 { 60 return false 61 } 62 for k, v := range s.pendingNotifications { 63 if k.Name != "n" || len(v) != 1 || v[0].Alert != "a" { 64 return false 65 } 66 return true 67 } 68 return false 69 } 70 71 type stateTransition struct { 72 S models.Status 73 ExpectNots bool 74 } 75 transitions := []stateTransition{ 76 {models.StWarning, true}, 77 {models.StNormal, false}, 78 {models.StWarning, false}, 79 {models.StNormal, false}, 80 {models.StCritical, true}, 81 {models.StWarning, false}, 82 {models.StCritical, false}, 83 } 84 85 for i, trans := range transitions { 86 r.Events[ak].Status = trans.S 87 s.RunHistory(r) 88 has := hasNots() 89 if has && !trans.ExpectNots { 90 t.Fatalf("unexpected notifications for transition %d.", i) 91 } else if !has && trans.ExpectNots { 92 t.Fatalf("expected notifications for transition %d.", i) 93 } 94 } 95 r.Events[ak].Status = models.StNormal 96 s.RunHistory(r) 97 // Close the alert, so it should notify next time. 98 if err := s.ActionByAlertKey("", "", models.ActionClose, nil, ak); err != nil { 99 t.Fatal(err) 100 } 101 r.Events[ak].Status = models.StWarning 102 s.RunHistory(r) 103 if !hasNots() { 104 t.Fatal("expected notification") 105 } 106 } 107 108 func TestCheckSilence(t *testing.T) { 109 defer setup()() 110 done := make(chan bool, 1) 111 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 112 done <- true 113 })) 114 defer ts.Close() 115 u, err := url.Parse(ts.URL) 116 if err != nil { 117 t.Fatal(err) 118 } 119 c, err := rule.NewConf("", conf.EnabledBackends{}, nil, fmt.Sprintf(` 120 template t { 121 subject = "test" 122 body = "test" 123 } 124 notification n { 125 post = http://%s/ 126 } 127 alert a { 128 template = t 129 warnNotification = n 130 warn = 1 131 } 132 `, u.Host)) 133 if err != nil { 134 t.Fatal(err) 135 } 136 s, err := initSched(&conf.SystemConf{}, c) 137 if err != nil { 138 t.Fatal(err) 139 } 140 _, err = s.AddSilence(utcNow().Add(-time.Hour), utcNow().Add(time.Hour), "a", "", false, true, "", "user", "message") 141 if err != nil { 142 t.Fatal(err) 143 } 144 check(s, utcNow()) 145 s.CheckNotifications() 146 select { 147 case <-done: 148 t.Fatal("silenced notification was sent") 149 case <-time.After(time.Second * 2): 150 // Timeout *probably* means the silence worked 151 } 152 } 153 154 func TestDelayedClose(t *testing.T) { 155 defer setup()() 156 c, err := rule.NewConf("", conf.EnabledBackends{}, nil, ` 157 alert a { 158 warn = 1 159 crit = 1 160 warnNotification = test 161 critNotification = test 162 template = test 163 } 164 template test { 165 subject = test 166 body = test2 167 } 168 notification test { 169 print = true 170 } 171 `) 172 if err != nil { 173 t.Fatal(err) 174 } 175 s, _ := initSched(&conf.SystemConf{}, c) 176 now := time.Now() 177 ak := models.NewAlertKey("a", nil) 178 r := &RunHistory{ 179 Start: now, 180 Events: map[models.AlertKey]*models.Event{ 181 ak: {Status: models.StWarning}, 182 }, 183 } 184 expect := func(id int64, active bool, open bool) { 185 incident, err := s.DataAccess.State().GetLatestIncident(ak) 186 if err != nil { 187 t.Fatal(err) 188 } 189 if incident.Id != id { 190 t.Fatalf("expected incident id %d. Got %d.", id, incident.Id) 191 } 192 if incident.IsActive() != active { 193 t.Fatalf("expected incident active status to be %v but got %v", active, incident.IsActive()) 194 } 195 if incident.Open != open { 196 t.Fatalf("expected incident closed boolean to be %v but got %v", open, incident.Open) 197 } 198 } 199 expectPendingNotifications := func(i int) { 200 if len(s.pendingNotifications[s.RuleConf.GetNotification("test")]) != i { 201 t.Fatalf("expencted %v pending notifications but got %v", i, len(s.pendingNotifications[s.RuleConf.GetNotification("test")])) 202 } 203 s.pendingNotifications = nil 204 } 205 advance := func(i int64) { 206 r.Start = r.Start.Add(time.Second * time.Duration(i)) 207 } 208 s.RunHistory(r) 209 expect(1, true, true) 210 expectPendingNotifications(1) 211 s.pendingNotifications = nil 212 213 // Test case where close issue and alert goes to normal before deadline 214 fiveMin := r.Start.Add(time.Minute * 5) 215 err = s.ActionByAlertKey("", "", models.ActionClose, &fiveMin, ak) 216 if err != nil { 217 t.Fatal(err) 218 } 219 advance(1) 220 s.RunHistory(r) 221 expect(1, true, true) 222 223 r.Events[ak].Status = models.StNormal 224 advance(1) 225 s.RunHistory(r) 226 expect(1, false, false) 227 228 r.Events[ak].Status = models.StWarning 229 advance(1) 230 s.RunHistory(r) 231 expect(2, true, true) 232 expectPendingNotifications(1) 233 234 // Test case where close issue and alert does not go normal before deadline 235 // which should result in a force closing 236 fiveMin = r.Start.Add(time.Minute * 5) 237 err = s.ActionByAlertKey("", "", models.ActionClose, &fiveMin, ak) 238 if err != nil { 239 t.Fatal(err) 240 } 241 242 advance(301) 243 s.RunHistory(r) 244 expect(2, true, false) 245 246 r.Events[ak].Status = models.StWarning 247 advance(1) 248 s.RunHistory(r) 249 expect(3, true, true) 250 expectPendingNotifications(1) 251 252 // Test cancelling a delayed close 253 fiveMin = r.Start.Add(time.Minute * 5) 254 err = s.ActionByAlertKey("", "", models.ActionClose, &fiveMin, ak) 255 if err != nil { 256 t.Fatal(err) 257 } 258 advance(1) 259 s.RunHistory(r) 260 expect(3, true, true) 261 262 err = s.ActionByAlertKey("", "", models.ActionCancelClose, nil, ak) 263 if err != nil { 264 t.Fatal(err) 265 } 266 advance(300) 267 s.RunHistory(r) 268 expect(3, true, true) 269 270 // Make sure delayed close works after a previous delayed close was cancelled 271 fiveMin = r.Start.Add(time.Minute * 5) 272 err = s.ActionByAlertKey("", "", models.ActionClose, &fiveMin, ak) 273 if err != nil { 274 t.Fatal(err) 275 } 276 advance(301) 277 s.RunHistory(r) 278 expect(3, true, false) 279 280 r.Events[ak].Status = models.StWarning 281 advance(1) 282 s.RunHistory(r) 283 expect(4, true, true) 284 expectPendingNotifications(1) 285 286 // Make sure escalation cancels a delayed close 287 fiveMin = r.Start.Add(time.Minute * 5) 288 err = s.ActionByAlertKey("", "", models.ActionClose, &fiveMin, ak) 289 if err != nil { 290 t.Fatal(err) 291 } 292 r.Events[ak].Status = models.StCritical 293 advance(1) 294 s.RunHistory(r) 295 expect(4, true, true) 296 expectPendingNotifications(1) 297 298 advance(300) 299 s.RunHistory(r) 300 expect(4, true, true) 301 expectPendingNotifications(0) 302 } 303 304 func TestIncidentIds(t *testing.T) { 305 defer setup()() 306 c, err := rule.NewConf("", conf.EnabledBackends{}, nil, ` 307 alert a { 308 crit = 1 309 } 310 `) 311 if err != nil { 312 t.Fatal(err) 313 } 314 s, _ := initSched(&conf.SystemConf{}, c) 315 ak := models.NewAlertKey("a", nil) 316 r := &RunHistory{ 317 Events: map[models.AlertKey]*models.Event{ 318 ak: {Status: models.StWarning}, 319 }, 320 } 321 expect := func(id int64) { 322 incident, err := s.DataAccess.State().GetLatestIncident(ak) 323 if err != nil { 324 t.Fatal(err) 325 } 326 if incident.Id != id { 327 t.Fatalf("Expeted incident id %d. Got %d.", id, incident.Id) 328 } 329 } 330 s.RunHistory(r) 331 expect(1) 332 333 r.Events[ak].Status = models.StNormal 334 s.RunHistory(r) 335 expect(1) 336 337 r.Events[ak].Status = models.StWarning 338 s.RunHistory(r) 339 expect(1) 340 341 r.Events[ak].Status = models.StNormal 342 s.RunHistory(r) 343 err = s.ActionByAlertKey("", "", models.ActionClose, nil, ak) 344 if err != nil { 345 t.Fatal(err) 346 } 347 r.Events[ak].Status = models.StWarning 348 s.RunHistory(r) 349 expect(2) 350 } 351 352 func TestCheckNotify(t *testing.T) { 353 defer setup()() 354 nc := make(chan string) 355 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 356 b, _ := ioutil.ReadAll(r.Body) 357 nc <- string(b) 358 })) 359 defer ts.Close() 360 u, err := url.Parse(ts.URL) 361 if err != nil { 362 t.Fatal(err) 363 } 364 c, err := rule.NewConf("", conf.EnabledBackends{}, nil, fmt.Sprintf(` 365 template t { 366 subject = {{.Last.Status}} 367 body = b 368 } 369 notification n { 370 post = http://%s/ 371 } 372 alert a { 373 template = t 374 warnNotification = n 375 warn = 1 376 } 377 `, u.Host)) 378 if err != nil { 379 t.Fatal(err) 380 } 381 s, err := initSched(&conf.SystemConf{}, c) 382 if err != nil { 383 t.Fatal(err) 384 } 385 check(s, utcNow()) 386 s.CheckNotifications() 387 select { 388 case r := <-nc: 389 if r != "warning" { 390 t.Fatalf("expected warning, got %v", r) 391 } 392 case <-time.After(time.Second): 393 t.Fatal("failed to receive notification before timeout") 394 } 395 } 396 397 func TestCheckNotifyUnknown(t *testing.T) { 398 defer setup()() 399 nc := make(chan string, 1) 400 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 401 b, _ := ioutil.ReadAll(r.Body) 402 nc <- string(b) 403 })) 404 defer ts.Close() 405 u, err := url.Parse(ts.URL) 406 if err != nil { 407 t.Fatal(err) 408 } 409 c, err := rule.NewConf("", conf.EnabledBackends{}, nil, fmt.Sprintf(` 410 template t { 411 subject = s 412 unk = {{.Name}}: {{.Group | len}} unknown alerts 413 body = b 414 } 415 notification n { 416 post = http://%s/ 417 unknownBody = unk 418 } 419 alert a { 420 template = t 421 critNotification = n 422 crit = 1 423 } 424 `, u.Host)) 425 if err != nil { 426 t.Fatal(err) 427 } 428 s, err := initSched(&conf.SystemConf{MinGroupSize: 2}, c) 429 if err != nil { 430 t.Fatal(err) 431 } 432 r := &RunHistory{ 433 Events: map[models.AlertKey]*models.Event{ 434 models.NewAlertKey("a", opentsdb.TagSet{"h": "x"}): {Status: models.StUnknown}, 435 models.NewAlertKey("a", opentsdb.TagSet{"h": "y"}): {Status: models.StUnknown}, 436 }, 437 } 438 s.RunHistory(r) 439 s.CheckNotifications() 440 s.sendUnknownNotifications() 441 gotExpected := false 442 Loop: 443 for { 444 select { 445 case r := <-nc: 446 if r == "a: 2 unknown alerts" { 447 gotExpected = true 448 } else { 449 t.Fatalf("unexpected: %v", r) 450 } 451 // TODO: remove this silly timeout-based test 452 case <-time.After(time.Second): 453 break Loop 454 } 455 } 456 if !gotExpected { 457 t.Errorf("didn't get expected result") 458 } 459 } 460 461 // TestCheckNotifyUnknownDefault tests the default unknownTemplate. 462 func TestCheckNotifyUnknownDefault(t *testing.T) { 463 defer setup()() 464 nc := make(chan string, 1) 465 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 466 b, _ := ioutil.ReadAll(r.Body) 467 nc <- string(b) 468 })) 469 defer ts.Close() 470 u, err := url.Parse(ts.URL) 471 if err != nil { 472 t.Fatal(err) 473 } 474 c, err := rule.NewConf("", conf.EnabledBackends{}, nil, fmt.Sprintf(` 475 template t { 476 subject = template 477 body = b 478 } 479 notification n { 480 post = http://%s/ 481 } 482 alert a { 483 template = t 484 critNotification = n 485 crit = 1 486 } 487 `, u.Host)) 488 if err != nil { 489 t.Fatal(err) 490 } 491 s, err := initSched(&conf.SystemConf{MinGroupSize: 2}, c) 492 if err != nil { 493 t.Fatal(err) 494 } 495 r := &RunHistory{ 496 Events: map[models.AlertKey]*models.Event{ 497 models.NewAlertKey("a", opentsdb.TagSet{"h": "x"}): {Status: models.StUnknown}, 498 models.NewAlertKey("a", opentsdb.TagSet{"h": "y"}): {Status: models.StUnknown}, 499 }, 500 } 501 s.RunHistory(r) 502 s.CheckNotifications() 503 s.sendUnknownNotifications() 504 gotExpected := false 505 Loop: 506 for { 507 select { 508 case r := <-nc: 509 if r == "a: 2 unknown alerts" { 510 gotExpected = true 511 } else { 512 t.Fatalf("unexpected: %v", r) 513 } 514 // TODO: remove this silly timeout-based test 515 case <-time.After(time.Second): 516 break Loop 517 } 518 } 519 if !gotExpected { 520 t.Errorf("didn't get expected result") 521 } 522 } 523 524 func TestCheckNotifyLog(t *testing.T) { 525 defer setup()() 526 nc := make(chan string, 1) 527 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 528 b, _ := ioutil.ReadAll(r.Body) 529 nc <- string(b) 530 })) 531 defer ts.Close() 532 u, err := url.Parse(ts.URL) 533 if err != nil { 534 t.Fatal(err) 535 } 536 c, err := rule.NewConf("", conf.EnabledBackends{}, nil, fmt.Sprintf(` 537 template t { 538 subject = {{.Alert.Name}} 539 body = b 540 } 541 notification n { 542 post = http://%s/ 543 } 544 alert a { 545 template = t 546 critNotification = n 547 crit = 1 548 } 549 alert b { 550 template = t 551 critNotification = n 552 crit = 1 553 log = true 554 } 555 `, u.Host)) 556 if err != nil { 557 t.Fatal(err) 558 } 559 s, err := initSched(&conf.SystemConf{}, c) 560 if err != nil { 561 t.Fatal(err) 562 } 563 check(s, utcNow()) 564 s.CheckNotifications() 565 gotA := false 566 gotB := false 567 Loop: 568 for { 569 select { 570 case r := <-nc: 571 if r == "a" && !gotA { 572 gotA = true 573 } else if r == "b" && !gotB { 574 gotB = true 575 } else { 576 t.Errorf("unexpected: %v", r) 577 } 578 // TODO: remove this silly timeout-based test 579 case <-time.After(time.Second): 580 break Loop 581 } 582 } 583 if !gotA { 584 t.Errorf("didn't get expected a") 585 } 586 if !gotB { 587 t.Errorf("didn't get expected b") 588 } 589 status, err := s.DataAccess.State().GetAllOpenIncidents() 590 if err != nil { 591 t.Fatal(err) 592 } 593 for _, st := range status { 594 switch st.AlertKey { 595 case "a{}": 596 if !st.Open { 597 t.Errorf("expected a to be open") 598 } 599 case "b{}": 600 if st.Open { 601 t.Errorf("expected b to be closed") 602 } 603 default: 604 t.Errorf("unexpected alert key %s", st.AlertKey) 605 } 606 } 607 }