bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/sched/sched_test.go (about)

     1  package sched
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"log"
     8  	"net/http"
     9  	"net/http/httptest"
    10  	"net/url"
    11  	"sort"
    12  	"testing"
    13  	"time"
    14  
    15  	"bosun.org/cmd/bosun/conf"
    16  	"bosun.org/cmd/bosun/conf/rule"
    17  	"bosun.org/cmd/bosun/database"
    18  	"bosun.org/cmd/bosun/database/test"
    19  	"bosun.org/models"
    20  	"bosun.org/opentsdb"
    21  	"bosun.org/slog"
    22  	"github.com/MiniProfiler/go/miniprofiler"
    23  )
    24  
    25  func init() {
    26  	//slog.Set(&slog.StdLog{Log: log.New(os.Stdout, "", log.LstdFlags)})
    27  	slog.Set(&slog.StdLog{Log: log.New(ioutil.Discard, "", log.LstdFlags)})
    28  	log.SetOutput(ioutil.Discard)
    29  }
    30  
    31  type schedState struct {
    32  	key, status string
    33  }
    34  
    35  type schedTest struct {
    36  	conf    string
    37  	queries map[string]opentsdb.ResponseSet
    38  	// state -> active
    39  	state   map[schedState]bool
    40  	touched map[models.AlertKey]time.Time
    41  }
    42  
    43  // test-only function to check all alerts immediately.
    44  func check(s *Schedule, t time.Time) {
    45  	names := []string{}
    46  	for a := range s.RuleConf.GetAlerts() {
    47  		names = append(names, a)
    48  	}
    49  	sort.Strings(names)
    50  	for _, n := range names {
    51  		a := s.RuleConf.GetAlerts()[n]
    52  		s.ctx.runTime = t
    53  		s.checkAlert(a, s.ctx)
    54  	}
    55  }
    56  
    57  var db database.DataAccess
    58  
    59  func setup() func() {
    60  	testDb, closer := dbtest.StartTestRedis(9992)
    61  	db = testDb
    62  	return closer
    63  }
    64  
    65  func initSched(sc conf.SystemConfProvider, c conf.RuleConfProvider) (*Schedule, error) {
    66  	s := new(Schedule)
    67  	err := s.Init("test_schedule", sc, c, db, nil, false, false)
    68  	return s, err
    69  }
    70  
    71  func testSched(t *testing.T, st *schedTest) (s *Schedule) {
    72  	bosunStartupTime = time.Date(1900, 0, 0, 0, 0, 0, 0, time.UTC) //pretend we've been running for a while.
    73  	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
    74  		var req opentsdb.Request
    75  		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
    76  			log.Fatal(err)
    77  		}
    78  		var resp opentsdb.ResponseSet
    79  		for _, rq := range req.Queries {
    80  			qs := fmt.Sprintf(`q("%s", "%v", "%v")`, rq, req.Start, req.End)
    81  			q, ok := st.queries[qs]
    82  			if !ok {
    83  				t.Errorf("unknown query: %s", qs)
    84  				return
    85  			}
    86  			if q == nil {
    87  				return // Put nil entry in map to simulate opentsdb error.
    88  			}
    89  			resp = append(resp, q...)
    90  		}
    91  		if err := json.NewEncoder(w).Encode(&resp); err != nil {
    92  			log.Fatal(err)
    93  		}
    94  	}))
    95  	defer ts.Close()
    96  	u, err := url.Parse(ts.URL)
    97  	if err != nil {
    98  		t.Fatal(err)
    99  	}
   100  	//confs := "tsdbHost = " + u.Host + "\n" + st.conf
   101  	c, err := rule.NewConf("testconf", conf.EnabledBackends{OpenTSDB: true}, nil, st.conf)
   102  	if err != nil {
   103  		t.Error(err)
   104  		t.Logf("conf:\n%s", st.conf)
   105  		return
   106  	}
   107  
   108  	time.Sleep(time.Millisecond * 250)
   109  	sysConf := &conf.SystemConf{CheckFrequency: conf.Duration{Duration: time.Minute * 5}, DefaultRunEvery: 1, UnknownThreshold: 5, MinGroupSize: 5, OpenTSDBConf: conf.OpenTSDBConf{Host: u.Host, ResponseLimit: 1 << 20}}
   110  	s, _ = initSched(sysConf, c)
   111  	for ak, time := range st.touched {
   112  		s.DataAccess.State().TouchAlertKey(ak, time)
   113  	}
   114  	check(s, queryTime)
   115  	groups, err := s.MarshalGroups(new(miniprofiler.Profile), "")
   116  	if err != nil {
   117  		t.Error(err)
   118  		return
   119  	}
   120  	var check func(g *StateGroup)
   121  	check = func(g *StateGroup) {
   122  		for _, c := range g.Children {
   123  			check(c)
   124  		}
   125  		if g.AlertKey == "" {
   126  			return
   127  		}
   128  		ss := schedState{string(g.AlertKey), g.Status.String()}
   129  		v, ok := st.state[ss]
   130  		if !ok {
   131  			t.Errorf("unexpected state: %s, %s", g.AlertKey, g.Status)
   132  			return
   133  		}
   134  		if v != g.Active {
   135  			t.Errorf("bad active: %s, %s", g.AlertKey, g.Status)
   136  			return
   137  		}
   138  		delete(st.state, ss)
   139  	}
   140  	for _, v := range groups.Groups.NeedAck {
   141  		check(v)
   142  	}
   143  	for _, v := range groups.Groups.Acknowledged {
   144  		check(v)
   145  	}
   146  	for k := range st.state {
   147  		t.Errorf("unused state: %s", k)
   148  	}
   149  	return s
   150  }
   151  
   152  var queryTime = time.Date(2000, 1, 1, 12, 0, 0, 0, time.UTC)
   153  var window5Min = `"9.467277e+08", "9.46728e+08"`
   154  
   155  func TestCrit(t *testing.T) {
   156  	defer setup()()
   157  	s := testSched(t, &schedTest{
   158  		conf: `alert a {
   159  			crit = avg(q("avg:m{a=b}", "5m", "")) > 0
   160  		}`,
   161  		queries: map[string]opentsdb.ResponseSet{
   162  			`q("avg:m{a=b}", ` + window5Min + `)`: {
   163  				{
   164  					Metric: "m",
   165  					Tags:   opentsdb.TagSet{"a": "b"},
   166  					DPS:    map[string]opentsdb.Point{"0": 1},
   167  				},
   168  			},
   169  		},
   170  		state: map[schedState]bool{
   171  			{"a{a=b}", "critical"}: true,
   172  		},
   173  	})
   174  	if !s.AlertSuccessful("a") {
   175  		t.Fatal("Expected alert a to be successful")
   176  	}
   177  }
   178  
   179  func TestBandDisableUnjoined(t *testing.T) {
   180  	defer setup()()
   181  	testSched(t, &schedTest{
   182  		conf: `alert a {
   183  			$sum = "sum:m{a=*}"
   184  			$band = band($sum, "1m", "1h", 1)
   185  			crit = avg(q($sum, "1m", "")) > avg($band) + dev($band)
   186  		}`,
   187  		queries: map[string]opentsdb.ResponseSet{
   188  			`q("sum:m{a=*}", "9.4672794e+08", "9.46728e+08")`: {
   189  				{
   190  					Metric: "m",
   191  					Tags:   opentsdb.TagSet{"a": "b"},
   192  					DPS:    map[string]opentsdb.Point{"0": 1},
   193  				},
   194  			},
   195  			`q("sum:m{a=*}", "9.4672434e+08", "9.467244e+08")`: {
   196  				{
   197  					Metric: "m",
   198  					Tags:   opentsdb.TagSet{"a": "c"},
   199  					DPS:    map[string]opentsdb.Point{"0": 1},
   200  				},
   201  			},
   202  		},
   203  	})
   204  }
   205  
   206  func TestCount(t *testing.T) {
   207  	defer setup()()
   208  	testSched(t, &schedTest{
   209  		conf: `alert a {
   210  			crit = count("sum:m{a=*}", "5m", "") != 2
   211  		}`,
   212  		queries: map[string]opentsdb.ResponseSet{
   213  			`q("sum:m{a=*}", ` + window5Min + `)`: {
   214  				{
   215  					Metric: "m",
   216  					Tags:   opentsdb.TagSet{"a": "b"},
   217  					DPS:    map[string]opentsdb.Point{"0": 1},
   218  				},
   219  				{
   220  					Metric: "m",
   221  					Tags:   opentsdb.TagSet{"a": "c"},
   222  					DPS:    map[string]opentsdb.Point{"0": 1},
   223  				},
   224  			},
   225  		},
   226  	})
   227  }
   228  
   229  func TestUnknown(t *testing.T) {
   230  	defer setup()()
   231  	testSched(t, &schedTest{
   232  		conf: `alert a {
   233  			crit = avg(q("avg:m{a=*}", "5m", "")) > 0
   234  		}`,
   235  		queries: map[string]opentsdb.ResponseSet{
   236  			`q("avg:m{a=*}", ` + window5Min + `)`: {},
   237  		},
   238  		state: map[schedState]bool{
   239  			{"a{a=b}", "unknown"}: true,
   240  		},
   241  		touched: map[models.AlertKey]time.Time{
   242  			"a{a=b}": queryTime.Add(-10 * time.Minute),
   243  			"a{a=c}": queryTime.Add(-9 * time.Minute),
   244  		},
   245  	})
   246  }
   247  
   248  func TestUnknown_HalfFreq(t *testing.T) {
   249  	defer setup()()
   250  	testSched(t, &schedTest{
   251  		conf: `alert a {
   252  			crit = avg(q("avg:m{a=*}", "5m", "")) > 0
   253  			runEvery = 2
   254  		}`,
   255  		queries: map[string]opentsdb.ResponseSet{
   256  			`q("avg:m{a=*}", ` + window5Min + `)`: {},
   257  		},
   258  		state: map[schedState]bool{
   259  			{"a{a=b}", "unknown"}: true,
   260  		},
   261  		touched: map[models.AlertKey]time.Time{
   262  			"a{a=b}": queryTime.Add(-20 * time.Minute),
   263  			"a{a=c}": queryTime.Add(-19 * time.Minute),
   264  		},
   265  	})
   266  }
   267  
   268  func TestUnknown_WithError(t *testing.T) {
   269  	defer setup()()
   270  
   271  	s := testSched(t, &schedTest{
   272  		conf: `alert a {
   273  			crit = avg(q("avg:m{a=*}", "5m", "")) > 0
   274  		}`,
   275  		queries: map[string]opentsdb.ResponseSet{
   276  			`q("avg:m{a=*}", ` + window5Min + `)`: nil,
   277  		},
   278  		state: map[schedState]bool{},
   279  		touched: map[models.AlertKey]time.Time{
   280  			"a{a=b}": queryTime.Add(-10 * time.Minute),
   281  		},
   282  	})
   283  
   284  	if s.AlertSuccessful("a") {
   285  		t.Fatal("Expected alert a to be in a failed state")
   286  	}
   287  }
   288  
   289  func TestRename(t *testing.T) {
   290  	defer setup()()
   291  	testSched(t, &schedTest{
   292  		conf: `
   293  		alert ping.host {
   294    
   295      $q = max(rename(q("sum:bosun.ping.timeout{dst_host=*,host=ny-kbrandt02}", "5m", ""), "host=source,dst_host=host"))
   296      warn = $q
   297  }
   298  
   299  		alert os.cpu {
   300      			depends = max(rename(q("sum:bosun.ping.timeout{dst_host=*,host=ny-kbrandt02}", "5m", ""), "host=source,dst_host=host"))
   301      			$q = avg(q("avg:os.cpu{host=*}", "5m", ""))
   302      			warn = $q < 99
   303  			}`,
   304  		queries: map[string]opentsdb.ResponseSet{
   305  			`q("sum:bosun.ping.timeout{dst_host=*,host=ny-kbrandt02}", ` + window5Min + `)`: {
   306  				{
   307  					Metric: "bosun.ping.timeout",
   308  					Tags:   opentsdb.TagSet{"host": "ny-kbrandt02", "dst_host": "ny-web01"},
   309  					DPS:    map[string]opentsdb.Point{"0": 1},
   310  				},
   311  				{
   312  					Metric: "bosun.ping.timeout",
   313  					Tags:   opentsdb.TagSet{"host": "ny-kbrandt02", "dst_host": "ny-web02"},
   314  					DPS:    map[string]opentsdb.Point{"0": 0},
   315  				},
   316  				{
   317  					Metric: "bosun.ping.timeout",
   318  					Tags:   opentsdb.TagSet{"host": "ny-kbrandt02", "dst_host": "ny-kbrandt02"},
   319  					DPS:    map[string]opentsdb.Point{"0": 1},
   320  				},
   321  			},
   322  			`q("avg:os.cpu{host=*}", ` + window5Min + `)`: {
   323  				{
   324  					Metric: "os.cpu",
   325  					Tags:   opentsdb.TagSet{"host": "ny-web01"},
   326  					DPS:    map[string]opentsdb.Point{"0": 1},
   327  				},
   328  				{
   329  					Metric: "os.cpu",
   330  					Tags:   opentsdb.TagSet{"host": "ny-web02"},
   331  					DPS:    map[string]opentsdb.Point{"0": 1},
   332  				},
   333  			},
   334  		},
   335  		state: map[schedState]bool{
   336  			{"ping.host{host=ny-kbrandt02,source=ny-kbrandt02}", "warning"}: true,
   337  			{"ping.host{host=ny-web01,source=ny-kbrandt02}", "warning"}:     true,
   338  			{"os.cpu{host=ny-web02}", "warning"}:                            true,
   339  		},
   340  	})
   341  }
   342  
   343  func TestUnknownsAreNormal(t *testing.T) {
   344  	defer setup()()
   345  	testSched(t, &schedTest{
   346  		conf: `alert a {
   347              unknownIsNormal = true
   348              crit = avg(q("avg:m{a=*}", "5m", "")) > 0
   349  		}`,
   350  		queries: map[string]opentsdb.ResponseSet{
   351  			`q("avg:m{a=*}", ` + window5Min + `)`: {},
   352  		},
   353  		state: map[schedState]bool{},
   354  		touched: map[models.AlertKey]time.Time{
   355  			"a{a=b}": queryTime.Add(-10 * time.Minute),
   356  			"a{a=c}": queryTime.Add(-9 * time.Minute),
   357  		},
   358  	})
   359  }