bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/web/expr.go (about)

     1  package web
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"net/http"
     8  	"net/mail"
     9  	"net/url"
    10  	"regexp"
    11  	"sort"
    12  	"strconv"
    13  	"strings"
    14  	"sync"
    15  	"time"
    16  
    17  	"bosun.org/cmd/bosun/cache"
    18  	"bosun.org/cmd/bosun/conf"
    19  	"bosun.org/cmd/bosun/conf/rule"
    20  	"bosun.org/cmd/bosun/expr"
    21  	"bosun.org/cmd/bosun/sched"
    22  	"bosun.org/models"
    23  	"bosun.org/opentsdb"
    24  	"github.com/MiniProfiler/go/miniprofiler"
    25  	"github.com/bradfitz/slice"
    26  )
    27  
    28  // for executing expressions/rules via the web UI, we use a cache that we retain during the lifetime of bosun
    29  // Matt and I decided not to expire the cache at given points (such as reloading rule page), but I forgot why. ?
    30  // the only risk is that if you query your store for data -5m to now and your store doesn't have the latest points up to date,
    31  // and then 5m from now you query -10min to -5m you'll get the same cached data, including the incomplete last points
    32  var cacheObj = cache.New("web", 100)
    33  
    34  func Expr(t miniprofiler.Timer, w http.ResponseWriter, r *http.Request) (v interface{}, err error) {
    35  	defer func() {
    36  		if pan := recover(); pan != nil {
    37  			v = nil
    38  			err = fmt.Errorf("%v", pan)
    39  		}
    40  	}()
    41  	text, err := ioutil.ReadAll(r.Body)
    42  	if err != nil {
    43  		return nil, err
    44  	}
    45  	rawLines := strings.Split(strings.TrimSpace(string(text)), "\n")
    46  	var lines []string
    47  	for _, line := range rawLines {
    48  		// remove comments and empty lines before processing so comments can be after the final line
    49  		if line == "" || strings.HasPrefix(line, "#") {
    50  			continue
    51  		}
    52  		lines = append(lines, line)
    53  	}
    54  	var expression string
    55  	vars := map[string]string{}
    56  	varRegex := regexp.MustCompile(`(\$\w+)\s*=(.*)`)
    57  	for i, line := range lines {
    58  		line = strings.TrimSpace(line)
    59  		if line == "" || strings.HasPrefix(line, "#") {
    60  			continue
    61  		}
    62  		// last line is expression we care about
    63  		if i == len(lines)-1 {
    64  			expression = schedule.RuleConf.Expand(line, vars, false)
    65  		} else { // must be a variable declatation
    66  			matches := varRegex.FindStringSubmatch(line)
    67  			if len(matches) == 0 {
    68  				return nil, fmt.Errorf("Expect all lines before final expression to be variable declarations of form `$foo = something`")
    69  			}
    70  			name := strings.TrimSpace(matches[1])
    71  			value := strings.TrimSpace(matches[2])
    72  			vars[name] = schedule.RuleConf.Expand(value, vars, false)
    73  		}
    74  	}
    75  	e, err := expr.New(expression, schedule.RuleConf.GetFuncs(schedule.SystemConf.EnabledBackends()))
    76  	if err != nil {
    77  		return nil, err
    78  	}
    79  	now, err := getTime(r)
    80  	if err != nil {
    81  		return nil, err
    82  	}
    83  	// it may not strictly be necessary to recreate the contexts each time, but we do to be safe
    84  	backends := &expr.Backends{
    85  		TSDBContext:       schedule.SystemConf.GetTSDBContext(),
    86  		GraphiteContext:   schedule.SystemConf.GetGraphiteContext(),
    87  		InfluxConfig:      schedule.SystemConf.GetInfluxContext(),
    88  		ElasticHosts:      schedule.SystemConf.GetElasticContext(),
    89  		AzureMonitor:      schedule.SystemConf.GetAzureMonitorContext(),
    90  		PromConfig:        schedule.SystemConf.GetPromContext(),
    91  		CloudWatchContext: schedule.SystemConf.GetCloudWatchContext(),
    92  	}
    93  	providers := &expr.BosunProviders{
    94  		Cache:     cacheObj,
    95  		Search:    schedule.Search,
    96  		Squelched: nil,
    97  		History:   nil,
    98  		Annotate:  AnnotateBackend,
    99  	}
   100  	res, queries, err := e.Execute(backends, providers, t, now, 0, false, "Web: expression execution")
   101  	if err != nil {
   102  		return nil, err
   103  	}
   104  	for _, r := range res.Results {
   105  		if r.Computations == nil {
   106  			r.Computations = make(models.Computations, 0)
   107  		}
   108  	}
   109  	ret := struct {
   110  		Type    string
   111  		Results []*expr.Result
   112  		Queries map[string]opentsdb.Request
   113  	}{
   114  		e.Tree.Root.Return().String(),
   115  		res.Results,
   116  		make(map[string]opentsdb.Request),
   117  	}
   118  	for _, q := range queries {
   119  		if e, err := url.QueryUnescape(q.String()); err == nil {
   120  			ret.Queries[e] = q
   121  		}
   122  	}
   123  	return ret, nil
   124  }
   125  
   126  func getTime(r *http.Request) (now time.Time, err error) {
   127  	now = time.Now().UTC()
   128  	if fd := r.FormValue("date"); len(fd) > 0 {
   129  		if ft := r.FormValue("time"); len(ft) > 0 {
   130  			fd += " " + ft
   131  		} else {
   132  			fd += " " + now.Format("15:04:05")
   133  		}
   134  		now, err = time.Parse("2006-01-02 15:04:05", fd)
   135  		if err != nil {
   136  			now, err = time.Parse("2006-01-02 15:04", fd)
   137  		}
   138  	}
   139  	return
   140  }
   141  
   142  type Res struct {
   143  	*models.Event
   144  	Key models.AlertKey
   145  }
   146  
   147  func procRule(t miniprofiler.Timer, ruleConf conf.RuleConfProvider, a *conf.Alert, now time.Time, summary bool, email string, template_group string, incidentID int) (*ruleResult, error) {
   148  	s := &sched.Schedule{}
   149  	s.Search = schedule.Search
   150  	if err := s.Init("web", schedule.SystemConf, ruleConf, schedule.DataAccess, AnnotateBackend, false, false); err != nil {
   151  		return nil, err
   152  	}
   153  	rh := s.NewRunHistory(now, cacheObj)
   154  	if _, err, _ := s.CheckExpr(t, rh, a, a.Warn, models.StWarning, nil); err != nil {
   155  		return nil, err
   156  	}
   157  	if _, err, _ := s.CheckExpr(t, rh, a, a.Crit, models.StCritical, nil); err != nil {
   158  		return nil, err
   159  	}
   160  	keys := make(models.AlertKeys, len(rh.Events))
   161  	criticals, warnings, normals := make([]models.AlertKey, 0), make([]models.AlertKey, 0), make([]models.AlertKey, 0)
   162  	i := 0
   163  	for k, v := range rh.Events {
   164  		v.Time = now
   165  		keys[i] = k
   166  		i++
   167  		switch v.Status {
   168  		case models.StNormal:
   169  			normals = append(normals, k)
   170  		case models.StWarning:
   171  			warnings = append(warnings, k)
   172  		case models.StCritical:
   173  			criticals = append(criticals, k)
   174  		default:
   175  			return nil, fmt.Errorf("unknown state type %v", v.Status)
   176  		}
   177  	}
   178  	sort.Sort(keys)
   179  	var rt *models.RenderedTemplates
   180  	var data interface{}
   181  	var nots map[string]*conf.PreparedNotifications
   182  	var aNots map[string]map[string]*conf.PreparedNotifications
   183  	warning := make([]string, 0)
   184  
   185  	if !summary && len(keys) > 0 {
   186  		var primaryIncident *models.IncidentState
   187  		if template_group != "" {
   188  			ts, err := opentsdb.ParseTags(template_group)
   189  			if err != nil {
   190  				return nil, err
   191  			}
   192  			for _, ak := range keys {
   193  				if ak.Group().Subset(ts) {
   194  					primaryIncident = sched.NewIncident(ak)
   195  					primaryIncident.Events = []models.Event{*rh.Events[ak]}
   196  					break
   197  				}
   198  			}
   199  		}
   200  		if primaryIncident == nil {
   201  			primaryIncident = sched.NewIncident(keys[0])
   202  			primaryIncident.Events = []models.Event{*rh.Events[keys[0]]}
   203  			if template_group != "" {
   204  				warning = append(warning, fmt.Sprintf("template group %s was not a subset of any result", template_group))
   205  			}
   206  		}
   207  		e := primaryIncident.Events[0]
   208  		if e.Crit != nil {
   209  			primaryIncident.Result = e.Crit
   210  		} else if e.Warn != nil {
   211  			primaryIncident.Result = e.Warn
   212  		}
   213  		var errs []error
   214  		primaryIncident.Id = int64(incidentID)
   215  		// See if the incidentID corresponds to a real Incident, and if so
   216  		// get some information from the real incident
   217  		if realIncident, err := schedule.DataAccess.State().GetIncidentState(primaryIncident.Id); err == nil {
   218  			primaryIncident.PreviousIds = realIncident.PreviousIds
   219  		}
   220  
   221  		primaryIncident.Start = time.Now().UTC()
   222  		primaryIncident.CurrentStatus = e.Status
   223  		primaryIncident.LastAbnormalStatus = e.Status
   224  		primaryIncident.LastAbnormalTime = models.Epoch{Time: time.Now().UTC()}
   225  		func() {
   226  			defer func() {
   227  				if err := recover(); err != nil {
   228  					s := fmt.Sprint(err)
   229  					warning = append(warning, s)
   230  					errs = append(errs, fmt.Errorf("panic rendering templates: %s", err))
   231  				}
   232  			}()
   233  			rt, errs = s.ExecuteAll(rh, a, primaryIncident, false)
   234  		}()
   235  		for _, err := range errs {
   236  			warning = append(warning, err.Error())
   237  		}
   238  		if rt == nil {
   239  			rt = &models.RenderedTemplates{}
   240  		}
   241  
   242  		if len(errs) > 0 {
   243  			var err error
   244  			rt.Subject, rt.Body, err = s.ExecuteBadTemplate(errs, rh, a, primaryIncident)
   245  			if err != nil {
   246  				rt.Subject = fmt.Sprintf("unable to create tempalate error notification: %v", err)
   247  			}
   248  		} else if email != "" {
   249  			m, err := mail.ParseAddress(email)
   250  			if err != nil {
   251  				return nil, err
   252  			}
   253  			n := conf.Notification{
   254  				Email: []*mail.Address{m},
   255  			}
   256  			n.PrepareAlert(rt, string(primaryIncident.AlertKey), rt.Attachments...).Send(s.SystemConf)
   257  		}
   258  		nots, aNots = buildNotificationPreviews(a, rt, primaryIncident, s.SystemConf, ruleConf)
   259  		data = s.Data(rh, primaryIncident, a, false)
   260  	}
   261  
   262  	rr := &ruleResult{
   263  		Criticals:           criticals,
   264  		Warnings:            warnings,
   265  		Normals:             normals,
   266  		Time:                now,
   267  		Data:                data,
   268  		Result:              rh.Events,
   269  		Warning:             warning,
   270  		RenderedTemplates:   rt,
   271  		Notifications:       nots,
   272  		ActionNotifications: aNots,
   273  	}
   274  	return rr, nil
   275  }
   276  
   277  func buildNotificationPreviews(a *conf.Alert, rt *models.RenderedTemplates, incident *models.IncidentState, c conf.SystemConfProvider, rcp conf.RuleConfProvider, attachments ...*models.Attachment) (map[string]*conf.PreparedNotifications, map[string]map[string]*conf.PreparedNotifications) {
   278  	previews := map[string]*conf.PreparedNotifications{}
   279  	actionPreviews := map[string]map[string]*conf.PreparedNotifications{}
   280  	nots := map[string]*conf.Notification{}
   281  	for name, not := range a.CritNotification.GetAllChained() {
   282  		nots[name] = not
   283  	}
   284  	for name, not := range a.WarnNotification.GetAllChained() {
   285  		nots[name] = not
   286  	}
   287  
   288  	for name, not := range nots {
   289  		previews[name] = not.PrepareAlert(rt, string(incident.AlertKey), attachments...)
   290  		actions := map[string]*conf.PreparedNotifications{}
   291  		actionPreviews[name] = actions
   292  		// for all action types. just loop through known range. Update this if any get added
   293  		for at := models.ActionAcknowledge; at <= models.ActionCancelClose; at++ {
   294  			if !not.RunOnActionType(at) {
   295  				continue
   296  			}
   297  			incidents := []*models.IncidentState{incident}
   298  			actions[at.String()] = not.PrepareAction(at, a.Template, c, incidents, "somebody", "I took care of this", rcp)
   299  		}
   300  	}
   301  	return previews, actionPreviews
   302  }
   303  
   304  type ruleResult struct {
   305  	Criticals []models.AlertKey
   306  	Warnings  []models.AlertKey
   307  	Normals   []models.AlertKey
   308  	Time      time.Time
   309  	*models.RenderedTemplates
   310  	Notifications       map[string]*conf.PreparedNotifications
   311  	ActionNotifications map[string]map[string]*conf.PreparedNotifications
   312  	Data                interface{}
   313  	Result              map[models.AlertKey]*models.Event
   314  	Warning             []string
   315  }
   316  
   317  func TestHTTPNotification(t miniprofiler.Timer, w http.ResponseWriter, r *http.Request) (interface{}, error) {
   318  	prep := &conf.PreparedHttp{}
   319  	dec := json.NewDecoder(r.Body)
   320  	if err := dec.Decode(prep); err != nil {
   321  		return nil, err
   322  	}
   323  	code, err := prep.Send()
   324  	dat := &struct {
   325  		Error  string
   326  		Status int
   327  	}{"", code}
   328  	if err != nil {
   329  		dat.Error = err.Error()
   330  	}
   331  	return dat, nil
   332  }
   333  
   334  func Rule(t miniprofiler.Timer, w http.ResponseWriter, r *http.Request) (interface{}, error) {
   335  	var from, to time.Time
   336  	var err error
   337  	if f := r.FormValue("from"); len(f) > 0 {
   338  		from, err = time.Parse(tsdbFormatSecs, f)
   339  		if err != nil {
   340  			return nil, err
   341  		}
   342  	}
   343  	if f := r.FormValue("to"); len(f) > 0 {
   344  		to, err = time.Parse(tsdbFormatSecs, f)
   345  		if err != nil {
   346  			return nil, err
   347  		}
   348  	}
   349  	intervals := 1
   350  	if i := r.FormValue("intervals"); len(i) > 0 {
   351  		intervals, err = strconv.Atoi(r.FormValue("intervals"))
   352  		if err != nil {
   353  			return nil, err
   354  		}
   355  		if intervals < 1 {
   356  			return nil, fmt.Errorf("must be > 0 intervals")
   357  		}
   358  	}
   359  	incidentID := 42
   360  	if incident := r.FormValue("incidentId"); len(incident) > 0 {
   361  		incidentID, err = strconv.Atoi(incident)
   362  		if err != nil {
   363  			return nil, err
   364  		}
   365  	}
   366  	if fz, tz := from.IsZero(), to.IsZero(); fz && tz {
   367  		from = time.Now()
   368  	} else if fz && !tz {
   369  		return nil, fmt.Errorf("cannot specify to without from")
   370  	} else if !fz && tz && intervals > 1 {
   371  		return nil, fmt.Errorf("cannot specify intervals without from and to")
   372  	}
   373  
   374  	c, a, hash, err := buildConfig(r)
   375  	if err != nil {
   376  		return nil, err
   377  	}
   378  
   379  	ch := make(chan int)
   380  	errch := make(chan error, intervals)
   381  	resch := make(chan *ruleResult, intervals)
   382  	var wg sync.WaitGroup
   383  	diff := -from.Sub(to)
   384  	if intervals > 1 {
   385  		diff /= time.Duration(intervals - 1)
   386  	}
   387  	worker := func() {
   388  		wg.Add(1)
   389  		for interval := range ch {
   390  			t.Step(fmt.Sprintf("interval %v", interval), func(t miniprofiler.Timer) {
   391  				now := from.Add(diff * time.Duration(interval))
   392  				res, err := procRule(t, c, a, now, interval != 0, r.FormValue("email"), r.FormValue("template_group"), incidentID)
   393  				resch <- res
   394  				errch <- err
   395  			})
   396  		}
   397  		defer wg.Done()
   398  	}
   399  	for i := 0; i < 20; i++ {
   400  		go worker()
   401  	}
   402  	for i := 0; i < intervals; i++ {
   403  		ch <- i
   404  	}
   405  	close(ch)
   406  	wg.Wait()
   407  	close(errch)
   408  	close(resch)
   409  	type Result struct {
   410  		Group  models.AlertKey
   411  		Result *models.Event
   412  	}
   413  	type Set struct {
   414  		Critical, Warning, Normal int
   415  		Time                      string
   416  		Results                   []*Result `json:",omitempty"`
   417  	}
   418  	type History struct {
   419  		Time, EndTime time.Time
   420  		Status        string
   421  	}
   422  	type Histories struct {
   423  		History []*History
   424  	}
   425  
   426  	ret := struct {
   427  		Errors       []string `json:",omitempty"`
   428  		Warnings     []string `json:",omitempty"`
   429  		Sets         []*Set
   430  		AlertHistory map[models.AlertKey]*Histories
   431  		*models.RenderedTemplates
   432  		Notifications       map[string]*conf.PreparedNotifications
   433  		ActionNotifications map[string]map[string]*conf.PreparedNotifications
   434  		Data                interface{} `json:",omitempty"`
   435  		Hash                string
   436  	}{
   437  		AlertHistory: make(map[models.AlertKey]*Histories),
   438  		Hash:         hash,
   439  	}
   440  	for err := range errch {
   441  		if err == nil {
   442  			continue
   443  		}
   444  		ret.Errors = append(ret.Errors, err.Error())
   445  	}
   446  	for res := range resch {
   447  		if res == nil {
   448  			continue
   449  		}
   450  		set := Set{
   451  			Critical: len(res.Criticals),
   452  			Warning:  len(res.Warnings),
   453  			Normal:   len(res.Normals),
   454  			Time:     res.Time.Format(tsdbFormatSecs),
   455  		}
   456  		if res.Data != nil {
   457  			ret.RenderedTemplates = res.RenderedTemplates
   458  			ret.Notifications = res.Notifications
   459  			ret.ActionNotifications = res.ActionNotifications
   460  			ret.Data = res.Data
   461  			for k, v := range res.Result {
   462  				set.Results = append(set.Results, &Result{
   463  					Group:  k,
   464  					Result: v,
   465  				})
   466  			}
   467  			slice.Sort(set.Results, func(i, j int) bool {
   468  				a := set.Results[i]
   469  				b := set.Results[j]
   470  				if a.Result.Status != b.Result.Status {
   471  					return a.Result.Status > b.Result.Status
   472  				}
   473  				return a.Group < b.Group
   474  			})
   475  		}
   476  		for k, v := range res.Result {
   477  			if ret.AlertHistory[k] == nil {
   478  				ret.AlertHistory[k] = new(Histories)
   479  			}
   480  			h := ret.AlertHistory[k]
   481  			h.History = append(h.History, &History{
   482  				Time:   v.Time,
   483  				Status: v.Status.String(),
   484  			})
   485  		}
   486  		ret.Sets = append(ret.Sets, &set)
   487  		ret.Warnings = append(ret.Warnings, res.Warning...)
   488  	}
   489  	slice.Sort(ret.Sets, func(i, j int) bool {
   490  		return ret.Sets[i].Time < ret.Sets[j].Time
   491  	})
   492  	for _, histories := range ret.AlertHistory {
   493  		hist := histories.History
   494  		slice.Sort(hist, func(i, j int) bool {
   495  			return hist[i].Time.Before(hist[j].Time)
   496  		})
   497  		for i := 1; i < len(hist); i++ {
   498  			if i < len(hist)-1 && hist[i].Status == hist[i-1].Status {
   499  				hist = append(hist[:i], hist[i+1:]...)
   500  				i--
   501  			}
   502  		}
   503  		for i, h := range hist[:len(hist)-1] {
   504  			h.EndTime = hist[i+1].Time
   505  		}
   506  		histories.History = hist[:len(hist)-1]
   507  	}
   508  	return &ret, nil
   509  }
   510  
   511  func buildConfig(r *http.Request) (c conf.RuleConfProvider, a *conf.Alert, hash string, err error) {
   512  	config, err := ioutil.ReadAll(r.Body)
   513  	if err != nil {
   514  		return nil, nil, "", err
   515  	}
   516  	c, err = rule.NewConf("Test Config", schedule.SystemConf.EnabledBackends(), schedule.SystemConf.GetRuleVars(), string(config))
   517  	if err != nil {
   518  		return nil, nil, "", err
   519  	}
   520  	hash, err = sched.DefaultSched.DataAccess.Configs().SaveTempConfig(string(config))
   521  	if err != nil {
   522  		return nil, nil, "", err
   523  	}
   524  	alertName := r.FormValue("alert")
   525  	if alertName == "" {
   526  		return nil, nil, "", fmt.Errorf("must supply alert to run")
   527  	}
   528  	a = c.GetAlert(alertName)
   529  	if a == nil {
   530  		return nil, nil, "", fmt.Errorf("alert %s not found", alertName)
   531  	}
   532  	return c, a, hash, nil
   533  
   534  }