bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/sched/sched_test.go (about) 1 package sched 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "io/ioutil" 7 "log" 8 "net/http" 9 "net/http/httptest" 10 "net/url" 11 "sort" 12 "testing" 13 "time" 14 15 "bosun.org/cmd/bosun/conf" 16 "bosun.org/cmd/bosun/conf/rule" 17 "bosun.org/cmd/bosun/database" 18 "bosun.org/cmd/bosun/database/test" 19 "bosun.org/models" 20 "bosun.org/opentsdb" 21 "bosun.org/slog" 22 "github.com/MiniProfiler/go/miniprofiler" 23 ) 24 25 func init() { 26 //slog.Set(&slog.StdLog{Log: log.New(os.Stdout, "", log.LstdFlags)}) 27 slog.Set(&slog.StdLog{Log: log.New(ioutil.Discard, "", log.LstdFlags)}) 28 log.SetOutput(ioutil.Discard) 29 } 30 31 type schedState struct { 32 key, status string 33 } 34 35 type schedTest struct { 36 conf string 37 queries map[string]opentsdb.ResponseSet 38 // state -> active 39 state map[schedState]bool 40 touched map[models.AlertKey]time.Time 41 } 42 43 // test-only function to check all alerts immediately. 44 func check(s *Schedule, t time.Time) { 45 names := []string{} 46 for a := range s.RuleConf.GetAlerts() { 47 names = append(names, a) 48 } 49 sort.Strings(names) 50 for _, n := range names { 51 a := s.RuleConf.GetAlerts()[n] 52 s.ctx.runTime = t 53 s.checkAlert(a, s.ctx) 54 } 55 } 56 57 var db database.DataAccess 58 59 func setup() func() { 60 testDb, closer := dbtest.StartTestRedis(9992) 61 db = testDb 62 return closer 63 } 64 65 func initSched(sc conf.SystemConfProvider, c conf.RuleConfProvider) (*Schedule, error) { 66 s := new(Schedule) 67 err := s.Init("test_schedule", sc, c, db, nil, false, false) 68 return s, err 69 } 70 71 func testSched(t *testing.T, st *schedTest) (s *Schedule) { 72 bosunStartupTime = time.Date(1900, 0, 0, 0, 0, 0, 0, time.UTC) //pretend we've been running for a while. 73 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 74 var req opentsdb.Request 75 if err := json.NewDecoder(r.Body).Decode(&req); err != nil { 76 log.Fatal(err) 77 } 78 var resp opentsdb.ResponseSet 79 for _, rq := range req.Queries { 80 qs := fmt.Sprintf(`q("%s", "%v", "%v")`, rq, req.Start, req.End) 81 q, ok := st.queries[qs] 82 if !ok { 83 t.Errorf("unknown query: %s", qs) 84 return 85 } 86 if q == nil { 87 return // Put nil entry in map to simulate opentsdb error. 88 } 89 resp = append(resp, q...) 90 } 91 if err := json.NewEncoder(w).Encode(&resp); err != nil { 92 log.Fatal(err) 93 } 94 })) 95 defer ts.Close() 96 u, err := url.Parse(ts.URL) 97 if err != nil { 98 t.Fatal(err) 99 } 100 //confs := "tsdbHost = " + u.Host + "\n" + st.conf 101 c, err := rule.NewConf("testconf", conf.EnabledBackends{OpenTSDB: true}, nil, st.conf) 102 if err != nil { 103 t.Error(err) 104 t.Logf("conf:\n%s", st.conf) 105 return 106 } 107 108 time.Sleep(time.Millisecond * 250) 109 sysConf := &conf.SystemConf{CheckFrequency: conf.Duration{Duration: time.Minute * 5}, DefaultRunEvery: 1, UnknownThreshold: 5, MinGroupSize: 5, OpenTSDBConf: conf.OpenTSDBConf{Host: u.Host, ResponseLimit: 1 << 20}} 110 s, _ = initSched(sysConf, c) 111 for ak, time := range st.touched { 112 s.DataAccess.State().TouchAlertKey(ak, time) 113 } 114 check(s, queryTime) 115 groups, err := s.MarshalGroups(new(miniprofiler.Profile), "") 116 if err != nil { 117 t.Error(err) 118 return 119 } 120 var check func(g *StateGroup) 121 check = func(g *StateGroup) { 122 for _, c := range g.Children { 123 check(c) 124 } 125 if g.AlertKey == "" { 126 return 127 } 128 ss := schedState{string(g.AlertKey), g.Status.String()} 129 v, ok := st.state[ss] 130 if !ok { 131 t.Errorf("unexpected state: %s, %s", g.AlertKey, g.Status) 132 return 133 } 134 if v != g.Active { 135 t.Errorf("bad active: %s, %s", g.AlertKey, g.Status) 136 return 137 } 138 delete(st.state, ss) 139 } 140 for _, v := range groups.Groups.NeedAck { 141 check(v) 142 } 143 for _, v := range groups.Groups.Acknowledged { 144 check(v) 145 } 146 for k := range st.state { 147 t.Errorf("unused state: %s", k) 148 } 149 return s 150 } 151 152 var queryTime = time.Date(2000, 1, 1, 12, 0, 0, 0, time.UTC) 153 var window5Min = `"9.467277e+08", "9.46728e+08"` 154 155 func TestCrit(t *testing.T) { 156 defer setup()() 157 s := testSched(t, &schedTest{ 158 conf: `alert a { 159 crit = avg(q("avg:m{a=b}", "5m", "")) > 0 160 }`, 161 queries: map[string]opentsdb.ResponseSet{ 162 `q("avg:m{a=b}", ` + window5Min + `)`: { 163 { 164 Metric: "m", 165 Tags: opentsdb.TagSet{"a": "b"}, 166 DPS: map[string]opentsdb.Point{"0": 1}, 167 }, 168 }, 169 }, 170 state: map[schedState]bool{ 171 {"a{a=b}", "critical"}: true, 172 }, 173 }) 174 if !s.AlertSuccessful("a") { 175 t.Fatal("Expected alert a to be successful") 176 } 177 } 178 179 func TestBandDisableUnjoined(t *testing.T) { 180 defer setup()() 181 testSched(t, &schedTest{ 182 conf: `alert a { 183 $sum = "sum:m{a=*}" 184 $band = band($sum, "1m", "1h", 1) 185 crit = avg(q($sum, "1m", "")) > avg($band) + dev($band) 186 }`, 187 queries: map[string]opentsdb.ResponseSet{ 188 `q("sum:m{a=*}", "9.4672794e+08", "9.46728e+08")`: { 189 { 190 Metric: "m", 191 Tags: opentsdb.TagSet{"a": "b"}, 192 DPS: map[string]opentsdb.Point{"0": 1}, 193 }, 194 }, 195 `q("sum:m{a=*}", "9.4672434e+08", "9.467244e+08")`: { 196 { 197 Metric: "m", 198 Tags: opentsdb.TagSet{"a": "c"}, 199 DPS: map[string]opentsdb.Point{"0": 1}, 200 }, 201 }, 202 }, 203 }) 204 } 205 206 func TestCount(t *testing.T) { 207 defer setup()() 208 testSched(t, &schedTest{ 209 conf: `alert a { 210 crit = count("sum:m{a=*}", "5m", "") != 2 211 }`, 212 queries: map[string]opentsdb.ResponseSet{ 213 `q("sum:m{a=*}", ` + window5Min + `)`: { 214 { 215 Metric: "m", 216 Tags: opentsdb.TagSet{"a": "b"}, 217 DPS: map[string]opentsdb.Point{"0": 1}, 218 }, 219 { 220 Metric: "m", 221 Tags: opentsdb.TagSet{"a": "c"}, 222 DPS: map[string]opentsdb.Point{"0": 1}, 223 }, 224 }, 225 }, 226 }) 227 } 228 229 func TestUnknown(t *testing.T) { 230 defer setup()() 231 testSched(t, &schedTest{ 232 conf: `alert a { 233 crit = avg(q("avg:m{a=*}", "5m", "")) > 0 234 }`, 235 queries: map[string]opentsdb.ResponseSet{ 236 `q("avg:m{a=*}", ` + window5Min + `)`: {}, 237 }, 238 state: map[schedState]bool{ 239 {"a{a=b}", "unknown"}: true, 240 }, 241 touched: map[models.AlertKey]time.Time{ 242 "a{a=b}": queryTime.Add(-10 * time.Minute), 243 "a{a=c}": queryTime.Add(-9 * time.Minute), 244 }, 245 }) 246 } 247 248 func TestUnknown_HalfFreq(t *testing.T) { 249 defer setup()() 250 testSched(t, &schedTest{ 251 conf: `alert a { 252 crit = avg(q("avg:m{a=*}", "5m", "")) > 0 253 runEvery = 2 254 }`, 255 queries: map[string]opentsdb.ResponseSet{ 256 `q("avg:m{a=*}", ` + window5Min + `)`: {}, 257 }, 258 state: map[schedState]bool{ 259 {"a{a=b}", "unknown"}: true, 260 }, 261 touched: map[models.AlertKey]time.Time{ 262 "a{a=b}": queryTime.Add(-20 * time.Minute), 263 "a{a=c}": queryTime.Add(-19 * time.Minute), 264 }, 265 }) 266 } 267 268 func TestUnknown_WithError(t *testing.T) { 269 defer setup()() 270 271 s := testSched(t, &schedTest{ 272 conf: `alert a { 273 crit = avg(q("avg:m{a=*}", "5m", "")) > 0 274 }`, 275 queries: map[string]opentsdb.ResponseSet{ 276 `q("avg:m{a=*}", ` + window5Min + `)`: nil, 277 }, 278 state: map[schedState]bool{}, 279 touched: map[models.AlertKey]time.Time{ 280 "a{a=b}": queryTime.Add(-10 * time.Minute), 281 }, 282 }) 283 284 if s.AlertSuccessful("a") { 285 t.Fatal("Expected alert a to be in a failed state") 286 } 287 } 288 289 func TestRename(t *testing.T) { 290 defer setup()() 291 testSched(t, &schedTest{ 292 conf: ` 293 alert ping.host { 294 295 $q = max(rename(q("sum:bosun.ping.timeout{dst_host=*,host=ny-kbrandt02}", "5m", ""), "host=source,dst_host=host")) 296 warn = $q 297 } 298 299 alert os.cpu { 300 depends = max(rename(q("sum:bosun.ping.timeout{dst_host=*,host=ny-kbrandt02}", "5m", ""), "host=source,dst_host=host")) 301 $q = avg(q("avg:os.cpu{host=*}", "5m", "")) 302 warn = $q < 99 303 }`, 304 queries: map[string]opentsdb.ResponseSet{ 305 `q("sum:bosun.ping.timeout{dst_host=*,host=ny-kbrandt02}", ` + window5Min + `)`: { 306 { 307 Metric: "bosun.ping.timeout", 308 Tags: opentsdb.TagSet{"host": "ny-kbrandt02", "dst_host": "ny-web01"}, 309 DPS: map[string]opentsdb.Point{"0": 1}, 310 }, 311 { 312 Metric: "bosun.ping.timeout", 313 Tags: opentsdb.TagSet{"host": "ny-kbrandt02", "dst_host": "ny-web02"}, 314 DPS: map[string]opentsdb.Point{"0": 0}, 315 }, 316 { 317 Metric: "bosun.ping.timeout", 318 Tags: opentsdb.TagSet{"host": "ny-kbrandt02", "dst_host": "ny-kbrandt02"}, 319 DPS: map[string]opentsdb.Point{"0": 1}, 320 }, 321 }, 322 `q("avg:os.cpu{host=*}", ` + window5Min + `)`: { 323 { 324 Metric: "os.cpu", 325 Tags: opentsdb.TagSet{"host": "ny-web01"}, 326 DPS: map[string]opentsdb.Point{"0": 1}, 327 }, 328 { 329 Metric: "os.cpu", 330 Tags: opentsdb.TagSet{"host": "ny-web02"}, 331 DPS: map[string]opentsdb.Point{"0": 1}, 332 }, 333 }, 334 }, 335 state: map[schedState]bool{ 336 {"ping.host{host=ny-kbrandt02,source=ny-kbrandt02}", "warning"}: true, 337 {"ping.host{host=ny-web01,source=ny-kbrandt02}", "warning"}: true, 338 {"os.cpu{host=ny-web02}", "warning"}: true, 339 }, 340 }) 341 } 342 343 func TestUnknownsAreNormal(t *testing.T) { 344 defer setup()() 345 testSched(t, &schedTest{ 346 conf: `alert a { 347 unknownIsNormal = true 348 crit = avg(q("avg:m{a=*}", "5m", "")) > 0 349 }`, 350 queries: map[string]opentsdb.ResponseSet{ 351 `q("avg:m{a=*}", ` + window5Min + `)`: {}, 352 }, 353 state: map[schedState]bool{}, 354 touched: map[models.AlertKey]time.Time{ 355 "a{a=b}": queryTime.Add(-10 * time.Minute), 356 "a{a=c}": queryTime.Add(-9 * time.Minute), 357 }, 358 }) 359 }