bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/database/error_data.go (about)

     1  package database
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"time"
     7  
     8  	"bosun.org/models"
     9  	"github.com/garyburd/redigo/redis"
    10  )
    11  
    12  /*
    13  
    14  failingAlerts = set of currently failing alerts
    15  alertsWithErrors = set of alerts with any errors
    16  errorEvents = list of (alert) one per individual error event
    17  error:{name} = list of json objects for coalesced error events (most recent first).
    18  
    19  */
    20  
    21  type ErrorDataAccess interface {
    22  	MarkAlertSuccess(name string) error
    23  	MarkAlertFailure(name string, msg string) error
    24  	GetFailingAlertCounts() (int, int, error)
    25  
    26  	GetFailingAlerts() (map[string]bool, error)
    27  	IsAlertFailing(name string) (bool, error)
    28  
    29  	GetFullErrorHistory() (map[string][]*models.AlertError, error)
    30  	ClearAlert(name string) error
    31  	ClearAll() error
    32  }
    33  
    34  func (d *dataAccess) Errors() ErrorDataAccess {
    35  	return d
    36  }
    37  
    38  const (
    39  	failingAlerts    = "failingAlerts"
    40  	errorEvents      = "errorEvents"
    41  	alertsWithErrors = "alertsWithErrors"
    42  )
    43  
    44  func (d *dataAccess) MarkAlertSuccess(name string) error {
    45  	conn := d.Get()
    46  	defer conn.Close()
    47  	_, err := conn.Do("SREM", failingAlerts, name)
    48  	return err
    49  }
    50  
    51  func (d *dataAccess) MarkAlertFailure(name string, msg string) error {
    52  	conn := d.Get()
    53  	defer conn.Close()
    54  
    55  	failing, err := d.IsAlertFailing(name)
    56  	if err != nil {
    57  		return err
    58  	}
    59  
    60  	if _, err := conn.Do("SADD", alertsWithErrors, name); err != nil {
    61  		return err
    62  	}
    63  	if _, err := conn.Do("SADD", failingAlerts, name); err != nil {
    64  		return err
    65  	}
    66  	var event *models.AlertError
    67  	if failing {
    68  		event, err = d.getLastErrorEvent(name)
    69  		if err != nil {
    70  			return err
    71  		}
    72  	}
    73  	now := time.Now().UTC().Truncate(time.Second)
    74  	if event == nil || event.Message != msg {
    75  		event = &models.AlertError{
    76  			FirstTime: now,
    77  			LastTime:  now,
    78  			Count:     1,
    79  			Message:   msg,
    80  		}
    81  	} else {
    82  		event.Count++
    83  		event.LastTime = now
    84  		// pop prior record
    85  		_, err = conn.Do("LPOP", errorListKey(name))
    86  		if err != nil {
    87  			return err
    88  		}
    89  	}
    90  	marshalled, err := json.Marshal(event)
    91  	if err != nil {
    92  		return err
    93  	}
    94  	_, err = conn.Do("LPUSH", errorListKey(name), marshalled)
    95  	if err != nil {
    96  		return err
    97  	}
    98  	_, err = conn.Do("LPUSH", errorEvents, name)
    99  	return err
   100  }
   101  
   102  func (d *dataAccess) GetFailingAlertCounts() (int, int, error) {
   103  	conn := d.Get()
   104  	defer conn.Close()
   105  	failing, err := redis.Int(conn.Do("SCARD", failingAlerts))
   106  	if err != nil {
   107  		return 0, 0, err
   108  	}
   109  	events, err := redis.Int(conn.Do("LLEN", errorEvents))
   110  	if err != nil {
   111  		return 0, 0, err
   112  	}
   113  	return failing, events, nil
   114  }
   115  
   116  func (d *dataAccess) GetFailingAlerts() (map[string]bool, error) {
   117  	conn := d.Get()
   118  	defer conn.Close()
   119  	alerts, err := redis.Strings(conn.Do("SMEMBERS", failingAlerts))
   120  	if err != nil {
   121  		return nil, err
   122  	}
   123  	r := make(map[string]bool, len(alerts))
   124  	for _, a := range alerts {
   125  		r[a] = true
   126  	}
   127  	return r, nil
   128  }
   129  func (d *dataAccess) IsAlertFailing(name string) (bool, error) {
   130  	conn := d.Get()
   131  	defer conn.Close()
   132  	return redis.Bool(conn.Do("SISMEMBER", failingAlerts, name))
   133  }
   134  
   135  func errorListKey(name string) string {
   136  	return fmt.Sprintf("errors:%s", name)
   137  }
   138  func (d *dataAccess) getLastErrorEvent(name string) (*models.AlertError, error) {
   139  	conn := d.Get()
   140  	defer conn.Close()
   141  	str, err := redis.Bytes(conn.Do("LINDEX", errorListKey(name), "0"))
   142  	if err != nil {
   143  		if err == redis.ErrNil {
   144  			return nil, nil
   145  		}
   146  		return nil, err
   147  	}
   148  	ev := &models.AlertError{}
   149  	if err = json.Unmarshal(str, ev); err != nil {
   150  		return nil, err
   151  	}
   152  	return ev, nil
   153  }
   154  
   155  func (d *dataAccess) GetFullErrorHistory() (map[string][]*models.AlertError, error) {
   156  	conn := d.Get()
   157  	defer conn.Close()
   158  
   159  	alerts, err := redis.Strings(conn.Do("SMEMBERS", alertsWithErrors))
   160  	if err != nil {
   161  		return nil, err
   162  	}
   163  	results := make(map[string][]*models.AlertError, len(alerts))
   164  	for _, a := range alerts {
   165  		rows, err := redis.Strings(conn.Do("LRANGE", errorListKey(a), 0, -1))
   166  		if err != nil {
   167  			return nil, err
   168  		}
   169  		list := make([]*models.AlertError, len(rows))
   170  		for i, row := range rows {
   171  			ae := &models.AlertError{}
   172  			err = json.Unmarshal([]byte(row), ae)
   173  			if err != nil {
   174  				return nil, err
   175  			}
   176  			list[i] = ae
   177  		}
   178  		results[a] = list
   179  	}
   180  	return results, nil
   181  }
   182  
   183  func (d *dataAccess) ClearAlert(name string) error {
   184  	conn := d.Get()
   185  	defer conn.Close()
   186  
   187  	_, err := conn.Do("SREM", alertsWithErrors, name)
   188  	if err != nil {
   189  		return err
   190  	}
   191  	_, err = conn.Do("SREM", failingAlerts, name)
   192  	if err != nil {
   193  		return err
   194  	}
   195  	_, err = conn.Do(d.LCLEAR(), errorListKey(name))
   196  	if err != nil {
   197  		return err
   198  	}
   199  	cmd, args := d.LMCLEAR(errorEvents, name)
   200  	_, err = conn.Do(cmd, args...)
   201  	if err != nil {
   202  		return err
   203  	}
   204  
   205  	return nil
   206  }
   207  
   208  //Things could forseeably get a bit inconsistent if concurrent changes happen in just the wrong way.
   209  //Clear all should do a more thourogh cleanup to fully reset things.
   210  func (d *dataAccess) ClearAll() error {
   211  	conn := d.Get()
   212  	defer conn.Close()
   213  
   214  	alerts, err := redis.Strings(conn.Do("SMEMBERS", alertsWithErrors))
   215  	if err != nil {
   216  		return err
   217  	}
   218  	for _, a := range alerts {
   219  		if _, err := conn.Do(d.LCLEAR(), errorListKey(a)); err != nil {
   220  			return err
   221  		}
   222  	}
   223  	if _, err := conn.Do(d.SCLEAR(), alertsWithErrors); err != nil {
   224  		return err
   225  	}
   226  	if _, err := conn.Do(d.SCLEAR(), failingAlerts); err != nil {
   227  		return err
   228  	}
   229  	if _, err = conn.Do(d.LCLEAR(), errorEvents); err != nil {
   230  		return err
   231  	}
   232  
   233  	return nil
   234  }