bosun.org@v0.0.0-20250213104149-b8d3e981f37d/cmd/bosun/sched/sched.go (about)

     1  package sched // import "bosun.org/cmd/bosun/sched"
     2  
     3  import (
     4  	"fmt"
     5  	"net/http"
     6  	"strings"
     7  	"sync"
     8  	"time"
     9  
    10  	"golang.org/x/net/context"
    11  
    12  	"bosun.org/annotate/backend"
    13  	"bosun.org/cmd/bosun/cache"
    14  	"bosun.org/cmd/bosun/conf"
    15  	"bosun.org/cmd/bosun/database"
    16  	"bosun.org/cmd/bosun/search"
    17  	"bosun.org/collect"
    18  	"bosun.org/metadata"
    19  	"bosun.org/models"
    20  	"bosun.org/opentsdb"
    21  	"bosun.org/slog"
    22  	"github.com/MiniProfiler/go/miniprofiler"
    23  	"github.com/bradfitz/slice"
    24  	"github.com/kylebrandt/boolq"
    25  )
    26  
    27  // DefaultClient is the default http client for requests made from templates. It is configured in cmd/bosun/main.go
    28  var DefaultClient *http.Client
    29  
    30  func utcNow() time.Time {
    31  	return time.Now().UTC()
    32  }
    33  
    34  type Schedule struct {
    35  	mutex         sync.Mutex
    36  	mutexHolder   string
    37  	mutexAquired  time.Time
    38  	mutexWaitTime int64
    39  
    40  	RuleConf   conf.RuleConfProvider
    41  	SystemConf conf.SystemConfProvider
    42  
    43  	Search *search.Search
    44  
    45  	annotate backend.Backend
    46  
    47  	skipLast bool
    48  	quiet    bool
    49  
    50  	//channel signals an alert has added notifications, and notifications should be processed.
    51  	nc chan interface{}
    52  	//notifications to be sent immediately
    53  	pendingNotifications map[*conf.Notification][]*IncidentWithTemplates
    54  
    55  	//unknown states that need to be notified about. Collected and sent in batches.
    56  	pendingUnknowns map[notificationGroupKey][]*models.IncidentState
    57  
    58  	lastLogTimes map[models.AlertKey]time.Time
    59  	LastCheck    time.Time
    60  
    61  	ctx *checkContext
    62  
    63  	DataAccess database.DataAccess
    64  
    65  	// runnerContext is a context to track running alert routines
    66  	runnerContext context.Context
    67  	// cancelChecks is the function to call to cancel all alert routines
    68  	cancelChecks context.CancelFunc
    69  	// checksRunning waits for alert checks to finish before reloading
    70  	// things that take significant time should be cancelled (i.e. expression execution)
    71  	// whereas the runHistory is allowed to complete
    72  	checksRunning sync.WaitGroup
    73  }
    74  
    75  func (s *Schedule) Init(name string, systemConf conf.SystemConfProvider, ruleConf conf.RuleConfProvider, dataAccess database.DataAccess, annotate backend.Backend, skipLast, quiet bool) error {
    76  	//initialize all variables and collections so they are ready to use.
    77  	//this will be called once at app start, and also every time the rule
    78  	//page runs, so be careful not to spawn long running processes that can't
    79  	//be avoided.
    80  	//var err error
    81  	s.skipLast = skipLast
    82  	s.quiet = quiet
    83  	s.SystemConf = systemConf
    84  	s.RuleConf = ruleConf
    85  	s.annotate = annotate
    86  	s.pendingUnknowns = make(map[notificationGroupKey][]*models.IncidentState)
    87  	s.lastLogTimes = make(map[models.AlertKey]time.Time)
    88  	s.LastCheck = utcNow()
    89  	s.ctx = &checkContext{utcNow(), cache.New(name, 0)}
    90  	s.DataAccess = dataAccess
    91  	// Initialize the context and waitgroup used to gracefully shutdown bosun as well as reload
    92  	s.runnerContext, s.cancelChecks = context.WithCancel(context.Background())
    93  	s.checksRunning = sync.WaitGroup{}
    94  
    95  	if s.Search == nil {
    96  		s.Search = search.NewSearch(s.DataAccess, skipLast)
    97  	}
    98  	return nil
    99  }
   100  
   101  type checkContext struct {
   102  	runTime    time.Time
   103  	checkCache *cache.Cache
   104  }
   105  
   106  func init() {
   107  	metadata.AddMetricMeta(
   108  		"bosun.schedule.lock_time", metadata.Counter, metadata.MilliSecond,
   109  		"Length of time spent waiting for or holding the schedule lock.")
   110  	metadata.AddMetricMeta(
   111  		"bosun.schedule.lock_count", metadata.Counter, metadata.Count,
   112  		"Number of times the given caller acquired the lock.")
   113  }
   114  
   115  func (s *Schedule) Lock(method string) {
   116  	start := utcNow()
   117  	s.mutex.Lock()
   118  	s.mutexAquired = utcNow()
   119  	s.mutexHolder = method
   120  	s.mutexWaitTime = int64(s.mutexAquired.Sub(start) / time.Millisecond) // remember this so we don't have to call put until we leave the critical section.
   121  }
   122  
   123  func (s *Schedule) Unlock() {
   124  	holder := s.mutexHolder
   125  	start := s.mutexAquired
   126  	waitTime := s.mutexWaitTime
   127  	s.mutexHolder = ""
   128  	s.mutex.Unlock()
   129  	collect.Add("schedule.lock_time", opentsdb.TagSet{"caller": holder, "op": "wait"}, waitTime)
   130  	collect.Add("schedule.lock_time", opentsdb.TagSet{"caller": holder, "op": "hold"}, int64(time.Since(start)/time.Millisecond))
   131  	collect.Add("schedule.lock_count", opentsdb.TagSet{"caller": holder}, 1)
   132  }
   133  
   134  func (s *Schedule) GetLockStatus() (holder string, since time.Time) {
   135  	return s.mutexHolder, s.mutexAquired
   136  }
   137  
   138  func (s *Schedule) PutMetadata(k metadata.Metakey, v interface{}) error {
   139  
   140  	isCoreMeta := (k.Name == "desc" || k.Name == "unit" || k.Name == "rate")
   141  	if !isCoreMeta {
   142  		s.DataAccess.Metadata().PutTagMetadata(k.TagSet(), k.Name, fmt.Sprint(v), utcNow())
   143  		return nil
   144  	}
   145  	if k.Metric == "" {
   146  		err := fmt.Errorf("desc, rate, and unit require metric name")
   147  		slog.Error(err)
   148  		return err
   149  	}
   150  	return s.DataAccess.Metadata().PutMetricMetadata(k.Metric, k.Name, fmt.Sprint(v))
   151  }
   152  
   153  func (s *Schedule) DeleteMetadata(tags opentsdb.TagSet, name string) error {
   154  	return s.DataAccess.Metadata().DeleteTagMetadata(tags, name)
   155  }
   156  
   157  func (s *Schedule) MetadataMetrics(metric string) (*database.MetricMetadata, error) {
   158  	//denormalized metrics should give metric metadata for their undenormalized counterparts
   159  	if strings.HasPrefix(metric, "__") {
   160  		if idx := strings.Index(metric, "."); idx != -1 {
   161  			metric = metric[idx+1:]
   162  		}
   163  	}
   164  	mm, err := s.DataAccess.Metadata().GetMetricMetadata(metric)
   165  	if err != nil {
   166  		return nil, err
   167  	}
   168  	return mm, nil
   169  }
   170  
   171  func (s *Schedule) GetMetadata(metric string, subset opentsdb.TagSet) ([]metadata.Metasend, error) {
   172  	ms := make([]metadata.Metasend, 0)
   173  	if metric != "" {
   174  		meta, err := s.MetadataMetrics(metric)
   175  		if err != nil {
   176  			return nil, err
   177  		}
   178  		if meta == nil {
   179  			return nil, fmt.Errorf("metadata for metric %v not found", metric)
   180  		}
   181  		if meta.Desc != "" {
   182  			ms = append(ms, metadata.Metasend{
   183  				Metric: metric,
   184  				Name:   "desc",
   185  				Value:  meta.Desc,
   186  			})
   187  		}
   188  		if meta.Unit != "" {
   189  			ms = append(ms, metadata.Metasend{
   190  				Metric: metric,
   191  				Name:   "unit",
   192  				Value:  meta.Unit,
   193  			})
   194  		}
   195  		if meta.Rate != "" {
   196  			ms = append(ms, metadata.Metasend{
   197  				Metric: metric,
   198  				Name:   "rate",
   199  				Value:  meta.Rate,
   200  			})
   201  		}
   202  	} else {
   203  		meta, err := s.DataAccess.Metadata().GetTagMetadata(subset, "")
   204  		if err != nil {
   205  			return nil, err
   206  		}
   207  		for _, m := range meta {
   208  			tm := time.Unix(m.LastTouched, 0)
   209  			ms = append(ms, metadata.Metasend{
   210  				Tags:  m.Tags,
   211  				Name:  m.Name,
   212  				Value: m.Value,
   213  				Time:  &tm,
   214  			})
   215  		}
   216  	}
   217  	return ms, nil
   218  }
   219  
   220  type States map[models.AlertKey]*models.IncidentState
   221  
   222  type StateTuple struct {
   223  	NeedAck       bool
   224  	Active        bool
   225  	Status        models.Status
   226  	CurrentStatus models.Status
   227  	Silenced      bool
   228  }
   229  
   230  // GroupStates groups by NeedAck, Active, Status, and Silenced.
   231  func (states States) GroupStates(silenced SilenceTester) map[StateTuple]States {
   232  	r := make(map[StateTuple]States)
   233  	for ak, st := range states {
   234  		sil := silenced(ak) != nil
   235  		t := StateTuple{
   236  			NeedAck:       st.NeedAck,
   237  			Active:        st.IsActive(),
   238  			Status:        st.LastAbnormalStatus,
   239  			CurrentStatus: st.CurrentStatus,
   240  			Silenced:      sil,
   241  		}
   242  		if _, present := r[t]; !present {
   243  			r[t] = make(States)
   244  		}
   245  		r[t][ak] = st
   246  	}
   247  	return r
   248  }
   249  
   250  // GroupSets returns slices of TagSets, grouped by most common ancestor. Those
   251  // with no shared ancestor are grouped by alert name.
   252  func (states States) GroupSets(minGroup int) map[string]models.AlertKeys {
   253  	type Pair struct {
   254  		k, v string
   255  	}
   256  	groups := make(map[string]models.AlertKeys)
   257  	seen := make(map[*models.IncidentState]bool)
   258  	for {
   259  		counts := make(map[Pair]int)
   260  		for _, s := range states {
   261  			if seen[s] {
   262  				continue
   263  			}
   264  			for k, v := range s.AlertKey.Group() {
   265  				counts[Pair{k, v}]++
   266  			}
   267  		}
   268  		if len(counts) == 0 {
   269  			break
   270  		}
   271  		max := 0
   272  		var pair Pair
   273  		for p, c := range counts {
   274  			if c > max {
   275  				max = c
   276  				pair = p
   277  			}
   278  		}
   279  		if max < minGroup || minGroup <= 0 {
   280  			break
   281  		}
   282  		var group models.AlertKeys
   283  		for _, s := range states {
   284  			if seen[s] {
   285  				continue
   286  			}
   287  			if s.AlertKey.Group()[pair.k] != pair.v {
   288  				continue
   289  			}
   290  			seen[s] = true
   291  			group = append(group, s.AlertKey)
   292  		}
   293  		if len(group) > 0 {
   294  			groups[fmt.Sprintf("{%s=%s}", pair.k, pair.v)] = group
   295  		}
   296  	}
   297  	// alerts
   298  	groupedByAlert := map[string]models.AlertKeys{}
   299  	for _, s := range states {
   300  		if seen[s] {
   301  			continue
   302  		}
   303  		groupedByAlert[s.Alert] = append(groupedByAlert[s.Alert], s.AlertKey)
   304  	}
   305  	for a, aks := range groupedByAlert {
   306  		if len(aks) >= minGroup {
   307  			group := models.AlertKeys{}
   308  			group = append(group, aks...)
   309  			groups[a] = group
   310  		}
   311  	}
   312  	// ungrouped
   313  	for _, s := range states {
   314  		if seen[s] || len(groupedByAlert[s.Alert]) >= minGroup {
   315  			continue
   316  		}
   317  		groups[string(s.AlertKey)] = models.AlertKeys{s.AlertKey}
   318  	}
   319  	return groups
   320  }
   321  
   322  func (s *Schedule) GetOpenStates() (States, error) {
   323  	incidents, err := s.DataAccess.State().GetAllOpenIncidents()
   324  	if err != nil {
   325  		return nil, err
   326  	}
   327  	states := make(States, len(incidents))
   328  	for _, inc := range incidents {
   329  		states[inc.AlertKey] = inc
   330  	}
   331  	return states, nil
   332  }
   333  
   334  type StateGroup struct {
   335  	Active        bool `json:",omitempty"`
   336  	Status        models.Status
   337  	CurrentStatus models.Status
   338  	Silenced      bool
   339  	IsError       bool                  `json:",omitempty"`
   340  	Subject       string                `json:",omitempty"`
   341  	Alert         string                `json:",omitempty"`
   342  	AlertKey      models.AlertKey       `json:",omitempty"`
   343  	Ago           string                `json:",omitempty"`
   344  	State         *models.IncidentState `json:",omitempty"`
   345  	Children      []*StateGroup         `json:",omitempty"`
   346  }
   347  
   348  type StateGroups struct {
   349  	Groups struct {
   350  		NeedAck      []*StateGroup `json:",omitempty"`
   351  		Acknowledged []*StateGroup `json:",omitempty"`
   352  	}
   353  	TimeAndDate                   []int
   354  	FailingAlerts, UnclosedErrors int
   355  }
   356  
   357  func (s *Schedule) MarshalGroups(T miniprofiler.Timer, filter string) (*StateGroups, error) {
   358  	var silenced SilenceTester
   359  	T.Step("Silenced", func(miniprofiler.Timer) {
   360  		silenced = s.Silenced()
   361  	})
   362  	var groups map[StateTuple]States
   363  	var err error
   364  	status := make(States)
   365  	t := StateGroups{
   366  		TimeAndDate: s.SystemConf.GetTimeAndDate(),
   367  	}
   368  	t.FailingAlerts, t.UnclosedErrors = s.getErrorCounts()
   369  	T.Step("Setup", func(miniprofiler.Timer) {
   370  		status2, err2 := s.GetOpenStates()
   371  		if err2 != nil {
   372  			err = err2
   373  			return
   374  		}
   375  		var parsedExpr *boolq.Tree
   376  		parsedExpr, err2 = boolq.Parse(filter)
   377  		if err2 != nil {
   378  			err = err2
   379  			return
   380  		}
   381  		for k, v := range status2 {
   382  			a := s.RuleConf.GetAlert(k.Name())
   383  			if a == nil {
   384  				slog.Errorf("unknown alert %s. Force closing.", k.Name())
   385  				if err2 = s.ActionByAlertKey("bosun", "closing because alert doesn't exist.", models.ActionForceClose, nil, k); err2 != nil {
   386  					slog.Error(err2)
   387  				}
   388  				continue
   389  			}
   390  			is, err2 := MakeIncidentSummary(s.RuleConf, silenced, v)
   391  			if err2 != nil {
   392  				err = err2
   393  				return
   394  			}
   395  			match := false
   396  			match, err2 = boolq.AskParsedExpr(parsedExpr, is)
   397  			if err2 != nil {
   398  				err = err2
   399  				return
   400  			}
   401  			if match {
   402  				status[k] = v
   403  			}
   404  		}
   405  	})
   406  	if err != nil {
   407  		return nil, err
   408  	}
   409  	T.Step("GroupStates", func(T miniprofiler.Timer) {
   410  		groups = status.GroupStates(silenced)
   411  	})
   412  	T.Step("groups", func(T miniprofiler.Timer) {
   413  		for tuple, states := range groups {
   414  			var grouped []*StateGroup
   415  			switch tuple.Status {
   416  			case models.StWarning, models.StCritical, models.StUnknown:
   417  				var sets map[string]models.AlertKeys
   418  				T.Step(fmt.Sprintf("GroupSets (%d): %v", len(states), tuple), func(T miniprofiler.Timer) {
   419  					sets = states.GroupSets(s.SystemConf.GetMinGroupSize())
   420  				})
   421  				for name, group := range sets {
   422  					g := StateGroup{
   423  						Active:        tuple.Active,
   424  						Status:        tuple.Status,
   425  						CurrentStatus: tuple.CurrentStatus,
   426  						Silenced:      tuple.Silenced,
   427  						Subject:       fmt.Sprintf("%s - %s", tuple.Status, name),
   428  					}
   429  					for _, ak := range group {
   430  						st := status[ak]
   431  						g.Children = append(g.Children, &StateGroup{
   432  							Active:   tuple.Active,
   433  							Status:   tuple.Status,
   434  							Silenced: tuple.Silenced,
   435  							AlertKey: ak,
   436  							Alert:    ak.Name(),
   437  							Subject:  string(st.Subject),
   438  							Ago:      marshalTime(st.Last().Time),
   439  							State:    st,
   440  							IsError:  !s.AlertSuccessful(ak.Name()),
   441  						})
   442  					}
   443  					if len(g.Children) == 1 && g.Children[0].Subject != "" {
   444  						g.Subject = g.Children[0].Subject
   445  					}
   446  					grouped = append(grouped, &g)
   447  				}
   448  			default:
   449  				continue
   450  			}
   451  			if tuple.NeedAck {
   452  				t.Groups.NeedAck = append(t.Groups.NeedAck, grouped...)
   453  			} else {
   454  				t.Groups.Acknowledged = append(t.Groups.Acknowledged, grouped...)
   455  			}
   456  		}
   457  	})
   458  	T.Step("sort", func(T miniprofiler.Timer) {
   459  		gsort := func(grp []*StateGroup) func(i, j int) bool {
   460  			return func(i, j int) bool {
   461  				a := grp[i]
   462  				b := grp[j]
   463  				if a.Active && !b.Active {
   464  					return true
   465  				} else if !a.Active && b.Active {
   466  					return false
   467  				}
   468  				if a.Status != b.Status {
   469  					return a.Status > b.Status
   470  				}
   471  				if a.AlertKey != b.AlertKey {
   472  					return a.AlertKey < b.AlertKey
   473  				}
   474  				return a.Subject < b.Subject
   475  			}
   476  		}
   477  		slice.Sort(t.Groups.NeedAck, gsort(t.Groups.NeedAck))
   478  		slice.Sort(t.Groups.Acknowledged, gsort(t.Groups.Acknowledged))
   479  	})
   480  	return &t, nil
   481  }
   482  
   483  func marshalTime(t time.Time) string {
   484  	if t.IsZero() {
   485  		return ""
   486  	}
   487  	b, _ := t.MarshalText()
   488  	return string(b)
   489  }
   490  
   491  var DefaultSched = &Schedule{}
   492  
   493  // Load loads a configuration into the default schedule.
   494  func Load(systemConf conf.SystemConfProvider, ruleConf conf.RuleConfProvider, dataAccess database.DataAccess, annotate backend.Backend, skipLast, quiet bool) error {
   495  	return DefaultSched.Init("alerts", systemConf, ruleConf, dataAccess, annotate, skipLast, quiet)
   496  }
   497  
   498  // Run runs the default schedule.
   499  func Run() error {
   500  	return DefaultSched.Run()
   501  }
   502  
   503  func Close(reload bool) {
   504  	DefaultSched.Close(reload)
   505  }
   506  
   507  func (s *Schedule) Close(reload bool) {
   508  	s.cancelChecks()
   509  	s.checksRunning.Wait()
   510  	if s.skipLast || reload {
   511  		return
   512  	}
   513  	err := s.Search.BackupLast()
   514  	if err != nil {
   515  		slog.Error(err)
   516  	}
   517  }
   518  
   519  func (s *Schedule) Reset() {
   520  	DefaultSched = &Schedule{}
   521  }
   522  
   523  func Reset() {
   524  	DefaultSched.Reset()
   525  }
   526  
   527  func init() {
   528  	metadata.AddMetricMeta("bosun.statefile.size", metadata.Gauge, metadata.Bytes,
   529  		"The total size of the Bosun state file.")
   530  	metadata.AddMetricMeta("bosun.check.duration", metadata.Gauge, metadata.Second,
   531  		"The number of seconds it took Bosun to check each alert rule.")
   532  	metadata.AddMetricMeta("bosun.check.err", metadata.Gauge, metadata.Error,
   533  		"The running count of the number of errors Bosun has received while trying to evaluate an alert expression.")
   534  
   535  	metadata.AddMetricMeta("bosun.actions", metadata.Gauge, metadata.Count,
   536  		"The running count of actions performed by individual users (Closed alert, Acknowledged alert, etc).")
   537  }
   538  
   539  func (s *Schedule) ActionByAlertKey(user, message string, t models.ActionType, at *time.Time, ak models.AlertKey) error {
   540  	st, err := s.DataAccess.State().GetLatestIncident(ak)
   541  	if err != nil {
   542  		return err
   543  	}
   544  	if st == nil {
   545  		return fmt.Errorf("no such alert key: %v", ak)
   546  	}
   547  	_, err = s.action(user, message, t, at, st)
   548  	return err
   549  }
   550  
   551  func (s *Schedule) ActionByIncidentId(user, message string, t models.ActionType, at *time.Time, id int64) (models.AlertKey, error) {
   552  	st, err := s.DataAccess.State().GetIncidentState(id)
   553  	if err != nil {
   554  		return "", err
   555  	}
   556  	if st == nil {
   557  		return "", fmt.Errorf("no incident with id: %v", id)
   558  	}
   559  	return s.action(user, message, t, at, st)
   560  }
   561  
   562  func (s *Schedule) action(user, message string, t models.ActionType, at *time.Time, st *models.IncidentState) (models.AlertKey, error) {
   563  	isUnknown := st.LastAbnormalStatus == models.StUnknown
   564  	timestamp := utcNow()
   565  	action := models.Action{
   566  		Message: message,
   567  		Time:    timestamp,
   568  		Type:    t,
   569  		User:    user,
   570  	}
   571  
   572  	switch t {
   573  	case models.ActionAcknowledge:
   574  		if !st.NeedAck {
   575  			return "", fmt.Errorf("alert already acknowledged")
   576  		}
   577  		if !st.Open {
   578  			return "", fmt.Errorf("cannot acknowledge closed alert")
   579  		}
   580  		st.NeedAck = false
   581  		if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil {
   582  			return "", err
   583  		}
   584  	case models.ActionCancelClose:
   585  		found := false
   586  		for i, a := range st.Actions {
   587  			// Find first delayed close that hasn't already been fulfilled or canceled
   588  			if a.Type == models.ActionDelayedClose && !(a.Fullfilled || a.Cancelled) {
   589  				found, st.Actions[i].Cancelled = true, true
   590  				break
   591  			}
   592  		}
   593  		if !found {
   594  			return "", fmt.Errorf("no delayed close for incident %v (%v) found to cancel", st.Id, st.AlertKey)
   595  		}
   596  	case models.ActionClose:
   597  		// closing effectively acks the incident
   598  		st.NeedAck = false
   599  		if st.IsActive() { // Closing an active incident results in delayed close
   600  			var dl time.Time
   601  			if at != nil {
   602  				dl = *at
   603  			} else {
   604  				duration, err := s.GetCheckFrequency(st.AlertKey.Name())
   605  				if err != nil {
   606  					return "", err
   607  				}
   608  				dl = timestamp.Add(duration * 2)
   609  			}
   610  			// See if there is already a pending delayed close, if there is update the time and return
   611  			for i, a := range st.Actions {
   612  				if a.Type == models.ActionDelayedClose && !(a.Fullfilled || a.Cancelled) {
   613  					st.Actions[i].Deadline = &dl
   614  					_, err := s.DataAccess.State().UpdateIncidentState(st)
   615  					if err != nil {
   616  						return "", err
   617  					}
   618  				}
   619  			}
   620  			action.Type = models.ActionDelayedClose
   621  			action.Deadline = &dl
   622  		} else {
   623  			st.Open = false
   624  			st.End = &timestamp
   625  		}
   626  		if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil {
   627  			return "", err
   628  		}
   629  	case models.ActionForceClose:
   630  		st.Open = false
   631  		st.End = &timestamp
   632  		if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil {
   633  			return "", err
   634  		}
   635  	case models.ActionForget:
   636  		if !isUnknown {
   637  			return "", fmt.Errorf("can only forget unknowns")
   638  		}
   639  		if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil {
   640  			return "", err
   641  		}
   642  		fallthrough
   643  	case models.ActionPurge:
   644  		if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil {
   645  			return "", err
   646  		}
   647  		return st.AlertKey, s.DataAccess.State().Forget(st.AlertKey)
   648  	case models.ActionNote:
   649  		// pass
   650  	default:
   651  		return "", fmt.Errorf("unknown action type: %v", t)
   652  	}
   653  
   654  	st.Actions = append(st.Actions, action)
   655  	_, err := s.DataAccess.State().UpdateIncidentState(st)
   656  	if err != nil {
   657  		return "", err
   658  	}
   659  	if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": st.AlertKey.Name(), "type": t.String()}, 1); err != nil {
   660  		slog.Errorln(err)
   661  	}
   662  	return st.AlertKey, nil
   663  }
   664  
   665  type IncidentStatus struct {
   666  	IncidentID         int64
   667  	Active             bool
   668  	AlertKey           models.AlertKey
   669  	Status             models.Status
   670  	StatusTime         int64
   671  	Subject            string
   672  	Silenced           bool
   673  	LastAbnormalStatus models.Status
   674  	LastAbnormalTime   models.Epoch
   675  	NeedsAck           bool
   676  }
   677  
   678  func (s *Schedule) AlertSuccessful(name string) bool {
   679  	b, err := s.DataAccess.Errors().IsAlertFailing(name)
   680  	if err != nil {
   681  		slog.Error(err)
   682  		b = true
   683  	}
   684  	return !b
   685  }
   686  
   687  func (s *Schedule) markAlertError(name string, e error) {
   688  	d := s.DataAccess.Errors()
   689  	if err := d.MarkAlertFailure(name, e.Error()); err != nil {
   690  		slog.Error(err)
   691  		return
   692  	}
   693  
   694  }
   695  
   696  func (s *Schedule) markAlertSuccessful(name string) {
   697  	if err := s.DataAccess.Errors().MarkAlertSuccess(name); err != nil {
   698  		slog.Error(err)
   699  	}
   700  }
   701  
   702  func (s *Schedule) ClearErrors(alert string) error {
   703  	if alert == "all" {
   704  		return s.DataAccess.Errors().ClearAll()
   705  	}
   706  	return s.DataAccess.Errors().ClearAlert(alert)
   707  }
   708  
   709  func (s *Schedule) getErrorCounts() (failing, total int) {
   710  	var err error
   711  	failing, total, err = s.DataAccess.Errors().GetFailingAlertCounts()
   712  	if err != nil {
   713  		slog.Error(err)
   714  	}
   715  	return
   716  }
   717  
   718  func (s *Schedule) GetQuiet() bool {
   719  	return s.quiet
   720  }
   721  
   722  // GetCheckFrequency returns the duration between checks for the named alert. If the alert
   723  // does not exist an error is returned.
   724  func (s *Schedule) GetCheckFrequency(alertName string) (time.Duration, error) {
   725  	alertDef := s.RuleConf.GetAlert(alertName)
   726  	if alertDef == nil {
   727  		return 0, fmt.Errorf("can not get check frequency for alert %v, no such alert defined", alertName)
   728  	}
   729  	runEvery := alertDef.RunEvery
   730  	if runEvery == 0 {
   731  		runEvery = s.SystemConf.GetDefaultRunEvery()
   732  	}
   733  	return time.Duration(time.Duration(runEvery) * s.SystemConf.GetCheckFrequency()), nil
   734  
   735  }
   736  
   737  func (s *Schedule) GetSilence(T miniprofiler.Timer, ak models.AlertKey) *models.Silence {
   738  	var silenced SilenceTester
   739  	T.Step("Silenced", func(miniprofiler.Timer) {
   740  		silenced = s.Silenced()
   741  	})
   742  	return silenced(ak)
   743  }