bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/sched/sched.go (about)

     1  package sched // import "bosun.org/cmd/bosun/sched"
     2  
     3  import (
     4  	"fmt"
     5  	"net/http"
     6  	"strings"
     7  	"sync"
     8  	"time"
     9  
    10  	"golang.org/x/net/context"
    11  
    12  	"bosun.org/annotate/backend"
    13  	"bosun.org/cmd/bosun/cache"
    14  	"bosun.org/cmd/bosun/conf"
    15  	"bosun.org/cmd/bosun/database"
    16  	"bosun.org/cmd/bosun/search"
    17  	"bosun.org/collect"
    18  	"bosun.org/metadata"
    19  	"bosun.org/models"
    20  	"bosun.org/opentsdb"
    21  	"bosun.org/slog"
    22  	"github.com/MiniProfiler/go/miniprofiler"
    23  	"github.com/bradfitz/slice"
    24  	"github.com/kylebrandt/boolq"
    25  )
    26  
    27  // DefaultClient is the default http client for requests made from templates. It is configured in cmd/bosun/main.go
    28  var DefaultClient *http.Client
    29  
    30  func utcNow() time.Time {
    31  	return time.Now().UTC()
    32  }
    33  
    34  type Schedule struct {
    35  	mutex         sync.Mutex
    36  	mutexHolder   string
    37  	mutexAquired  time.Time
    38  	mutexWaitTime int64
    39  
    40  	RuleConf   conf.RuleConfProvider
    41  	SystemConf conf.SystemConfProvider
    42  
    43  	Search *search.Search
    44  
    45  	annotate backend.Backend
    46  
    47  	skipLast bool
    48  	quiet    bool
    49  
    50  	//channel signals an alert has added notifications, and notifications should be processed.
    51  	nc chan interface{}
    52  	//notifications to be sent immediately
    53  	pendingNotifications map[*conf.Notification][]*IncidentWithTemplates
    54  
    55  	//unknown states that need to be notified about. Collected and sent in batches.
    56  	pendingUnknowns map[notificationGroupKey][]*models.IncidentState
    57  
    58  	lastLogTimes map[models.AlertKey]time.Time
    59  	LastCheck    time.Time
    60  
    61  	ctx *checkContext
    62  
    63  	DataAccess database.DataAccess
    64  
    65  	// runnerContext is a context to track running alert routines
    66  	runnerContext context.Context
    67  	// cancelChecks is the function to call to cancel all alert routines
    68  	cancelChecks context.CancelFunc
    69  	// checksRunning waits for alert checks to finish before reloading
    70  	// things that take significant time should be cancelled (i.e. expression execution)
    71  	// whereas the runHistory is allowed to complete
    72  	checksRunning sync.WaitGroup
    73  }
    74  
    75  func (s *Schedule) Init(name string, systemConf conf.SystemConfProvider, ruleConf conf.RuleConfProvider, dataAccess database.DataAccess, annotate backend.Backend, skipLast, quiet bool) error {
    76  	//initialize all variables and collections so they are ready to use.
    77  	//this will be called once at app start, and also every time the rule
    78  	//page runs, so be careful not to spawn long running processes that can't
    79  	//be avoided.
    80  	//var err error
    81  	s.skipLast = skipLast
    82  	s.quiet = quiet
    83  	s.SystemConf = systemConf
    84  	s.RuleConf = ruleConf
    85  	s.annotate = annotate
    86  	s.pendingUnknowns = make(map[notificationGroupKey][]*models.IncidentState)
    87  	s.lastLogTimes = make(map[models.AlertKey]time.Time)
    88  	s.LastCheck = utcNow()
    89  	s.ctx = &checkContext{utcNow(), cache.New(name, 0)}
    90  	s.DataAccess = dataAccess
    91  	// Initialize the context and waitgroup used to gracefully shutdown bosun as well as reload
    92  	s.runnerContext, s.cancelChecks = context.WithCancel(context.Background())
    93  	s.checksRunning = sync.WaitGroup{}
    94  
    95  	if s.Search == nil {
    96  		s.Search = search.NewSearch(s.DataAccess, skipLast)
    97  	}
    98  	return nil
    99  }
   100  
   101  type checkContext struct {
   102  	runTime    time.Time
   103  	checkCache *cache.Cache
   104  }
   105  
   106  func init() {
   107  	metadata.AddMetricMeta(
   108  		"bosun.schedule.lock_time", metadata.Counter, metadata.MilliSecond,
   109  		"Length of time spent waiting for or holding the schedule lock.")
   110  	metadata.AddMetricMeta(
   111  		"bosun.schedule.lock_count", metadata.Counter, metadata.Count,
   112  		"Number of times the given caller acquired the lock.")
   113  }
   114  
   115  func (s *Schedule) Lock(method string) {
   116  	start := utcNow()
   117  	s.mutex.Lock()
   118  	s.mutexAquired = utcNow()
   119  	s.mutexHolder = method
   120  	s.mutexWaitTime = int64(s.mutexAquired.Sub(start) / time.Millisecond) // remember this so we don't have to call put until we leave the critical section.
   121  }
   122  
   123  func (s *Schedule) Unlock() {
   124  	holder := s.mutexHolder
   125  	start := s.mutexAquired
   126  	waitTime := s.mutexWaitTime
   127  	s.mutexHolder = ""
   128  	s.mutex.Unlock()
   129  	collect.Add("schedule.lock_time", opentsdb.TagSet{"caller": holder, "op": "wait"}, waitTime)
   130  	collect.Add("schedule.lock_time", opentsdb.TagSet{"caller": holder, "op": "hold"}, int64(time.Since(start)/time.Millisecond))
   131  	collect.Add("schedule.lock_count", opentsdb.TagSet{"caller": holder}, 1)
   132  }
   133  
   134  func (s *Schedule) GetLockStatus() (holder string, since time.Time) {
   135  	return s.mutexHolder, s.mutexAquired
   136  }
   137  
   138  func (s *Schedule) PutMetadata(k metadata.Metakey, v interface{}) error {
   139  
   140  	isCoreMeta := (k.Name == "desc" || k.Name == "unit" || k.Name == "rate")
   141  	if !isCoreMeta {
   142  		s.DataAccess.Metadata().PutTagMetadata(k.TagSet(), k.Name, fmt.Sprint(v), utcNow())
   143  		return nil
   144  	}
   145  	if k.Metric == "" {
   146  		err := fmt.Errorf("desc, rate, and unit require metric name")
   147  		slog.Error(err)
   148  		return err
   149  	}
   150  	return s.DataAccess.Metadata().PutMetricMetadata(k.Metric, k.Name, fmt.Sprint(v))
   151  }
   152  
   153  func (s *Schedule) DeleteMetadata(tags opentsdb.TagSet, name string) error {
   154  	return s.DataAccess.Metadata().DeleteTagMetadata(tags, name)
   155  }
   156  
   157  func (s *Schedule) MetadataMetrics(metric string) (*database.MetricMetadata, error) {
   158  	//denormalized metrics should give metric metadata for their undenormalized counterparts
   159  	if strings.HasPrefix(metric, "__") {
   160  		if idx := strings.Index(metric, "."); idx != -1 {
   161  			metric = metric[idx+1:]
   162  		}
   163  	}
   164  	mm, err := s.DataAccess.Metadata().GetMetricMetadata(metric)
   165  	if err != nil {
   166  		return nil, err
   167  	}
   168  	return mm, nil
   169  }
   170  
   171  func (s *Schedule) GetMetadata(metric string, subset opentsdb.TagSet) ([]metadata.Metasend, error) {
   172  	ms := make([]metadata.Metasend, 0)
   173  	if metric != "" {
   174  		meta, err := s.MetadataMetrics(metric)
   175  		if err != nil {
   176  			return nil, err
   177  		}
   178  		if meta == nil {
   179  			return nil, fmt.Errorf("metadata for metric %v not found", metric)
   180  		}
   181  		if meta.Desc != "" {
   182  			ms = append(ms, metadata.Metasend{
   183  				Metric: metric,
   184  				Name:   "desc",
   185  				Value:  meta.Desc,
   186  			})
   187  		}
   188  		if meta.Unit != "" {
   189  			ms = append(ms, metadata.Metasend{
   190  				Metric: metric,
   191  				Name:   "unit",
   192  				Value:  meta.Unit,
   193  			})
   194  		}
   195  		if meta.Rate != "" {
   196  			ms = append(ms, metadata.Metasend{
   197  				Metric: metric,
   198  				Name:   "rate",
   199  				Value:  meta.Rate,
   200  			})
   201  		}
   202  	} else {
   203  		meta, err := s.DataAccess.Metadata().GetTagMetadata(subset, "")
   204  		if err != nil {
   205  			return nil, err
   206  		}
   207  		for _, m := range meta {
   208  			tm := time.Unix(m.LastTouched, 0)
   209  			ms = append(ms, metadata.Metasend{
   210  				Tags:  m.Tags,
   211  				Name:  m.Name,
   212  				Value: m.Value,
   213  				Time:  &tm,
   214  			})
   215  		}
   216  	}
   217  	return ms, nil
   218  }
   219  
   220  type States map[models.AlertKey]*models.IncidentState
   221  
   222  type StateTuple struct {
   223  	NeedAck       bool
   224  	Active        bool
   225  	Status        models.Status
   226  	CurrentStatus models.Status
   227  	Silenced      bool
   228  }
   229  
   230  // GroupStates groups by NeedAck, Active, Status, and Silenced.
   231  func (states States) GroupStates(silenced SilenceTester) map[StateTuple]States {
   232  	r := make(map[StateTuple]States)
   233  	for ak, st := range states {
   234  		sil := silenced(ak) != nil
   235  		t := StateTuple{
   236  			NeedAck:       st.NeedAck,
   237  			Active:        st.IsActive(),
   238  			Status:        st.LastAbnormalStatus,
   239  			CurrentStatus: st.CurrentStatus,
   240  			Silenced:      sil,
   241  		}
   242  		if _, present := r[t]; !present {
   243  			r[t] = make(States)
   244  		}
   245  		r[t][ak] = st
   246  	}
   247  	return r
   248  }
   249  
   250  // GroupSets returns slices of TagSets, grouped by most common ancestor. Those
   251  // with no shared ancestor are grouped by alert name.
   252  func (states States) GroupSets(minGroup int) map[string]models.AlertKeys {
   253  	type Pair struct {
   254  		k, v string
   255  	}
   256  	groups := make(map[string]models.AlertKeys)
   257  	seen := make(map[*models.IncidentState]bool)
   258  	for {
   259  		counts := make(map[Pair]int)
   260  		for _, s := range states {
   261  			if seen[s] {
   262  				continue
   263  			}
   264  			for k, v := range s.AlertKey.Group() {
   265  				counts[Pair{k, v}]++
   266  			}
   267  		}
   268  		if len(counts) == 0 {
   269  			break
   270  		}
   271  		max := 0
   272  		var pair Pair
   273  		for p, c := range counts {
   274  			if c > max {
   275  				max = c
   276  				pair = p
   277  			}
   278  		}
   279  		if max < minGroup || minGroup <= 0 {
   280  			break
   281  		}
   282  		var group models.AlertKeys
   283  		for _, s := range states {
   284  			if seen[s] {
   285  				continue
   286  			}
   287  			if s.AlertKey.Group()[pair.k] != pair.v {
   288  				continue
   289  			}
   290  			seen[s] = true
   291  			group = append(group, s.AlertKey)
   292  		}
   293  		if len(group) > 0 {
   294  			groups[fmt.Sprintf("{%s=%s}", pair.k, pair.v)] = group
   295  		}
   296  	}
   297  	// alerts
   298  	groupedByAlert := map[string]models.AlertKeys{}
   299  	for _, s := range states {
   300  		if seen[s] {
   301  			continue
   302  		}
   303  		groupedByAlert[s.Alert] = append(groupedByAlert[s.Alert], s.AlertKey)
   304  	}
   305  	for a, aks := range groupedByAlert {
   306  		if len(aks) >= minGroup {
   307  			group := models.AlertKeys{}
   308  			for _, ak := range aks {
   309  				group = append(group, ak)
   310  			}
   311  			groups[a] = group
   312  		}
   313  	}
   314  	// ungrouped
   315  	for _, s := range states {
   316  		if seen[s] || len(groupedByAlert[s.Alert]) >= minGroup {
   317  			continue
   318  		}
   319  		groups[string(s.AlertKey)] = models.AlertKeys{s.AlertKey}
   320  	}
   321  	return groups
   322  }
   323  
   324  func (s *Schedule) GetOpenStates() (States, error) {
   325  	incidents, err := s.DataAccess.State().GetAllOpenIncidents()
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  	states := make(States, len(incidents))
   330  	for _, inc := range incidents {
   331  		states[inc.AlertKey] = inc
   332  	}
   333  	return states, nil
   334  }
   335  
   336  type StateGroup struct {
   337  	Active        bool `json:",omitempty"`
   338  	Status        models.Status
   339  	CurrentStatus models.Status
   340  	Silenced      bool
   341  	IsError       bool                  `json:",omitempty"`
   342  	Subject       string                `json:",omitempty"`
   343  	Alert         string                `json:",omitempty"`
   344  	AlertKey      models.AlertKey       `json:",omitempty"`
   345  	Ago           string                `json:",omitempty"`
   346  	State         *models.IncidentState `json:",omitempty"`
   347  	Children      []*StateGroup         `json:",omitempty"`
   348  }
   349  
   350  type StateGroups struct {
   351  	Groups struct {
   352  		NeedAck      []*StateGroup `json:",omitempty"`
   353  		Acknowledged []*StateGroup `json:",omitempty"`
   354  	}
   355  	TimeAndDate                   []int
   356  	FailingAlerts, UnclosedErrors int
   357  }
   358  
   359  func (s *Schedule) MarshalGroups(T miniprofiler.Timer, filter string) (*StateGroups, error) {
   360  	var silenced SilenceTester
   361  	T.Step("Silenced", func(miniprofiler.Timer) {
   362  		silenced = s.Silenced()
   363  	})
   364  	var groups map[StateTuple]States
   365  	var err error
   366  	status := make(States)
   367  	t := StateGroups{
   368  		TimeAndDate: s.SystemConf.GetTimeAndDate(),
   369  	}
   370  	t.FailingAlerts, t.UnclosedErrors = s.getErrorCounts()
   371  	T.Step("Setup", func(miniprofiler.Timer) {
   372  		status2, err2 := s.GetOpenStates()
   373  		if err2 != nil {
   374  			err = err2
   375  			return
   376  		}
   377  		var parsedExpr *boolq.Tree
   378  		parsedExpr, err2 = boolq.Parse(filter)
   379  		if err2 != nil {
   380  			err = err2
   381  			return
   382  		}
   383  		for k, v := range status2 {
   384  			a := s.RuleConf.GetAlert(k.Name())
   385  			if a == nil {
   386  				slog.Errorf("unknown alert %s. Force closing.", k.Name())
   387  				if err2 = s.ActionByAlertKey("bosun", "closing because alert doesn't exist.", models.ActionForceClose, nil, k); err2 != nil {
   388  					slog.Error(err2)
   389  				}
   390  				continue
   391  			}
   392  			is, err2 := MakeIncidentSummary(s.RuleConf, silenced, v)
   393  			if err2 != nil {
   394  				err = err2
   395  				return
   396  			}
   397  			match := false
   398  			match, err2 = boolq.AskParsedExpr(parsedExpr, is)
   399  			if err2 != nil {
   400  				err = err2
   401  				return
   402  			}
   403  			if match {
   404  				status[k] = v
   405  			}
   406  		}
   407  	})
   408  	if err != nil {
   409  		return nil, err
   410  	}
   411  	T.Step("GroupStates", func(T miniprofiler.Timer) {
   412  		groups = status.GroupStates(silenced)
   413  	})
   414  	T.Step("groups", func(T miniprofiler.Timer) {
   415  		for tuple, states := range groups {
   416  			var grouped []*StateGroup
   417  			switch tuple.Status {
   418  			case models.StWarning, models.StCritical, models.StUnknown:
   419  				var sets map[string]models.AlertKeys
   420  				T.Step(fmt.Sprintf("GroupSets (%d): %v", len(states), tuple), func(T miniprofiler.Timer) {
   421  					sets = states.GroupSets(s.SystemConf.GetMinGroupSize())
   422  				})
   423  				for name, group := range sets {
   424  					g := StateGroup{
   425  						Active:        tuple.Active,
   426  						Status:        tuple.Status,
   427  						CurrentStatus: tuple.CurrentStatus,
   428  						Silenced:      tuple.Silenced,
   429  						Subject:       fmt.Sprintf("%s - %s", tuple.Status, name),
   430  					}
   431  					for _, ak := range group {
   432  						st := status[ak]
   433  						g.Children = append(g.Children, &StateGroup{
   434  							Active:   tuple.Active,
   435  							Status:   tuple.Status,
   436  							Silenced: tuple.Silenced,
   437  							AlertKey: ak,
   438  							Alert:    ak.Name(),
   439  							Subject:  string(st.Subject),
   440  							Ago:      marshalTime(st.Last().Time),
   441  							State:    st,
   442  							IsError:  !s.AlertSuccessful(ak.Name()),
   443  						})
   444  					}
   445  					if len(g.Children) == 1 && g.Children[0].Subject != "" {
   446  						g.Subject = g.Children[0].Subject
   447  					}
   448  					grouped = append(grouped, &g)
   449  				}
   450  			default:
   451  				continue
   452  			}
   453  			if tuple.NeedAck {
   454  				t.Groups.NeedAck = append(t.Groups.NeedAck, grouped...)
   455  			} else {
   456  				t.Groups.Acknowledged = append(t.Groups.Acknowledged, grouped...)
   457  			}
   458  		}
   459  	})
   460  	T.Step("sort", func(T miniprofiler.Timer) {
   461  		gsort := func(grp []*StateGroup) func(i, j int) bool {
   462  			return func(i, j int) bool {
   463  				a := grp[i]
   464  				b := grp[j]
   465  				if a.Active && !b.Active {
   466  					return true
   467  				} else if !a.Active && b.Active {
   468  					return false
   469  				}
   470  				if a.Status != b.Status {
   471  					return a.Status > b.Status
   472  				}
   473  				if a.AlertKey != b.AlertKey {
   474  					return a.AlertKey < b.AlertKey
   475  				}
   476  				return a.Subject < b.Subject
   477  			}
   478  		}
   479  		slice.Sort(t.Groups.NeedAck, gsort(t.Groups.NeedAck))
   480  		slice.Sort(t.Groups.Acknowledged, gsort(t.Groups.Acknowledged))
   481  	})
   482  	return &t, nil
   483  }
   484  
   485  func marshalTime(t time.Time) string {
   486  	if t.IsZero() {
   487  		return ""
   488  	}
   489  	b, _ := t.MarshalText()
   490  	return string(b)
   491  }
   492  
   493  var DefaultSched = &Schedule{}
   494  
   495  // Load loads a configuration into the default schedule.
   496  func Load(systemConf conf.SystemConfProvider, ruleConf conf.RuleConfProvider, dataAccess database.DataAccess, annotate backend.Backend, skipLast, quiet bool) error {
   497  	return DefaultSched.Init("alerts", systemConf, ruleConf, dataAccess, annotate, skipLast, quiet)
   498  }
   499  
   500  // Run runs the default schedule.
   501  func Run() error {
   502  	return DefaultSched.Run()
   503  }
   504  
   505  func Close(reload bool) {
   506  	DefaultSched.Close(reload)
   507  }
   508  
   509  func (s *Schedule) Close(reload bool) {
   510  	s.cancelChecks()
   511  	s.checksRunning.Wait()
   512  	if s.skipLast || reload {
   513  		return
   514  	}
   515  	err := s.Search.BackupLast()
   516  	if err != nil {
   517  		slog.Error(err)
   518  	}
   519  }
   520  
   521  func (s *Schedule) Reset() {
   522  	DefaultSched = &Schedule{}
   523  }
   524  
   525  func Reset() {
   526  	DefaultSched.Reset()
   527  }
   528  
   529  func init() {
   530  	metadata.AddMetricMeta("bosun.statefile.size", metadata.Gauge, metadata.Bytes,
   531  		"The total size of the Bosun state file.")
   532  	metadata.AddMetricMeta("bosun.check.duration", metadata.Gauge, metadata.Second,
   533  		"The number of seconds it took Bosun to check each alert rule.")
   534  	metadata.AddMetricMeta("bosun.check.err", metadata.Gauge, metadata.Error,
   535  		"The running count of the number of errors Bosun has received while trying to evaluate an alert expression.")
   536  
   537  	metadata.AddMetricMeta("bosun.actions", metadata.Gauge, metadata.Count,
   538  		"The running count of actions performed by individual users (Closed alert, Acknowledged alert, etc).")
   539  }
   540  
   541  func (s *Schedule) ActionByAlertKey(user, message string, t models.ActionType, at *time.Time, ak models.AlertKey) error {
   542  	st, err := s.DataAccess.State().GetLatestIncident(ak)
   543  	if err != nil {
   544  		return err
   545  	}
   546  	if st == nil {
   547  		return fmt.Errorf("no such alert key: %v", ak)
   548  	}
   549  	_, err = s.action(user, message, t, at, st)
   550  	return err
   551  }
   552  
   553  func (s *Schedule) ActionByIncidentId(user, message string, t models.ActionType, at *time.Time, id int64) (models.AlertKey, error) {
   554  	st, err := s.DataAccess.State().GetIncidentState(id)
   555  	if err != nil {
   556  		return "", err
   557  	}
   558  	if st == nil {
   559  		return "", fmt.Errorf("no incident with id: %v", id)
   560  	}
   561  	return s.action(user, message, t, at, st)
   562  }
   563  
   564  func (s *Schedule) action(user, message string, t models.ActionType, at *time.Time, st *models.IncidentState) (models.AlertKey, error) {
   565  	isUnknown := st.LastAbnormalStatus == models.StUnknown
   566  	timestamp := utcNow()
   567  	action := models.Action{
   568  		Message: message,
   569  		Time:    timestamp,
   570  		Type:    t,
   571  		User:    user,
   572  	}
   573  
   574  	switch t {
   575  	case models.ActionAcknowledge:
   576  		if !st.NeedAck {
   577  			return "", fmt.Errorf("alert already acknowledged")
   578  		}
   579  		if !st.Open {
   580  			return "", fmt.Errorf("cannot acknowledge closed alert")
   581  		}
   582  		st.NeedAck = false
   583  		if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil {
   584  			return "", err
   585  		}
   586  	case models.ActionCancelClose:
   587  		found := false
   588  		for i, a := range st.Actions {
   589  			// Find first delayed close that hasn't already been fulfilled or canceled
   590  			if a.Type == models.ActionDelayedClose && !(a.Fullfilled || a.Cancelled) {
   591  				found, st.Actions[i].Cancelled = true, true
   592  				break
   593  			}
   594  		}
   595  		if !found {
   596  			return "", fmt.Errorf("no delayed close for incident %v (%v) found to cancel", st.Id, st.AlertKey)
   597  		}
   598  	case models.ActionClose:
   599  		// closing effectively acks the incident
   600  		st.NeedAck = false
   601  		if st.IsActive() { // Closing an active incident results in delayed close
   602  			var dl time.Time
   603  			if at != nil {
   604  				dl = *at
   605  			} else {
   606  				duration, err := s.GetCheckFrequency(st.AlertKey.Name())
   607  				if err != nil {
   608  					return "", err
   609  				}
   610  				dl = timestamp.Add(duration * 2)
   611  			}
   612  			// See if there is already a pending delayed close, if there is update the time and return
   613  			for i, a := range st.Actions {
   614  				if a.Type == models.ActionDelayedClose && !(a.Fullfilled || a.Cancelled) {
   615  					st.Actions[i].Deadline = &dl
   616  					_, err := s.DataAccess.State().UpdateIncidentState(st)
   617  					if err != nil {
   618  						return "", err
   619  					}
   620  				}
   621  			}
   622  			action.Type = models.ActionDelayedClose
   623  			action.Deadline = &dl
   624  		} else {
   625  			st.Open = false
   626  			st.End = &timestamp
   627  		}
   628  		if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil {
   629  			return "", err
   630  		}
   631  	case models.ActionForceClose:
   632  		st.Open = false
   633  		st.End = &timestamp
   634  		if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil {
   635  			return "", err
   636  		}
   637  	case models.ActionForget:
   638  		if !isUnknown {
   639  			return "", fmt.Errorf("can only forget unknowns")
   640  		}
   641  		if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil {
   642  			return "", err
   643  		}
   644  		fallthrough
   645  	case models.ActionPurge:
   646  		if err := s.DataAccess.Notifications().ClearNotifications(st.AlertKey); err != nil {
   647  			return "", err
   648  		}
   649  		return st.AlertKey, s.DataAccess.State().Forget(st.AlertKey)
   650  	case models.ActionNote:
   651  		// pass
   652  	default:
   653  		return "", fmt.Errorf("unknown action type: %v", t)
   654  	}
   655  
   656  	st.Actions = append(st.Actions, action)
   657  	_, err := s.DataAccess.State().UpdateIncidentState(st)
   658  	if err != nil {
   659  		return "", err
   660  	}
   661  	if err := collect.Add("actions", opentsdb.TagSet{"user": user, "alert": st.AlertKey.Name(), "type": t.String()}, 1); err != nil {
   662  		slog.Errorln(err)
   663  	}
   664  	return st.AlertKey, nil
   665  }
   666  
   667  type IncidentStatus struct {
   668  	IncidentID         int64
   669  	Active             bool
   670  	AlertKey           models.AlertKey
   671  	Status             models.Status
   672  	StatusTime         int64
   673  	Subject            string
   674  	Silenced           bool
   675  	LastAbnormalStatus models.Status
   676  	LastAbnormalTime   models.Epoch
   677  	NeedsAck           bool
   678  }
   679  
   680  func (s *Schedule) AlertSuccessful(name string) bool {
   681  	b, err := s.DataAccess.Errors().IsAlertFailing(name)
   682  	if err != nil {
   683  		slog.Error(err)
   684  		b = true
   685  	}
   686  	return !b
   687  }
   688  
   689  func (s *Schedule) markAlertError(name string, e error) {
   690  	d := s.DataAccess.Errors()
   691  	if err := d.MarkAlertFailure(name, e.Error()); err != nil {
   692  		slog.Error(err)
   693  		return
   694  	}
   695  
   696  }
   697  
   698  func (s *Schedule) markAlertSuccessful(name string) {
   699  	if err := s.DataAccess.Errors().MarkAlertSuccess(name); err != nil {
   700  		slog.Error(err)
   701  	}
   702  }
   703  
   704  func (s *Schedule) ClearErrors(alert string) error {
   705  	if alert == "all" {
   706  		return s.DataAccess.Errors().ClearAll()
   707  	}
   708  	return s.DataAccess.Errors().ClearAlert(alert)
   709  }
   710  
   711  func (s *Schedule) getErrorCounts() (failing, total int) {
   712  	var err error
   713  	failing, total, err = s.DataAccess.Errors().GetFailingAlertCounts()
   714  	if err != nil {
   715  		slog.Error(err)
   716  	}
   717  	return
   718  }
   719  
   720  func (s *Schedule) GetQuiet() bool {
   721  	return s.quiet
   722  }
   723  
   724  // GetCheckFrequency returns the duration between checks for the named alert. If the alert
   725  // does not exist an error is returned.
   726  func (s *Schedule) GetCheckFrequency(alertName string) (time.Duration, error) {
   727  	alertDef := s.RuleConf.GetAlert(alertName)
   728  	if alertDef == nil {
   729  		return 0, fmt.Errorf("can not get check frequency for alert %v, no such alert defined", alertName)
   730  	}
   731  	runEvery := alertDef.RunEvery
   732  	if runEvery == 0 {
   733  		runEvery = s.SystemConf.GetDefaultRunEvery()
   734  	}
   735  	return time.Duration(time.Duration(runEvery) * s.SystemConf.GetCheckFrequency()), nil
   736  
   737  }
   738  
   739  func (s *Schedule) GetSilence(T miniprofiler.Timer, ak models.AlertKey) *models.Silence {
   740  	var silenced SilenceTester
   741  	T.Step("Silenced", func(miniprofiler.Timer) {
   742  		silenced = s.Silenced()
   743  	})
   744  	return silenced(ak)
   745  }