bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/database/error_data.go (about) 1 package database 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "time" 7 8 "bosun.org/models" 9 "github.com/garyburd/redigo/redis" 10 ) 11 12 /* 13 14 failingAlerts = set of currently failing alerts 15 alertsWithErrors = set of alerts with any errors 16 errorEvents = list of (alert) one per individual error event 17 error:{name} = list of json objects for coalesced error events (most recent first). 18 19 */ 20 21 type ErrorDataAccess interface { 22 MarkAlertSuccess(name string) error 23 MarkAlertFailure(name string, msg string) error 24 GetFailingAlertCounts() (int, int, error) 25 26 GetFailingAlerts() (map[string]bool, error) 27 IsAlertFailing(name string) (bool, error) 28 29 GetFullErrorHistory() (map[string][]*models.AlertError, error) 30 ClearAlert(name string) error 31 ClearAll() error 32 } 33 34 func (d *dataAccess) Errors() ErrorDataAccess { 35 return d 36 } 37 38 const ( 39 failingAlerts = "failingAlerts" 40 errorEvents = "errorEvents" 41 alertsWithErrors = "alertsWithErrors" 42 ) 43 44 func (d *dataAccess) MarkAlertSuccess(name string) error { 45 conn := d.Get() 46 defer conn.Close() 47 _, err := conn.Do("SREM", failingAlerts, name) 48 return err 49 } 50 51 func (d *dataAccess) MarkAlertFailure(name string, msg string) error { 52 conn := d.Get() 53 defer conn.Close() 54 55 failing, err := d.IsAlertFailing(name) 56 if err != nil { 57 return err 58 } 59 60 if _, err := conn.Do("SADD", alertsWithErrors, name); err != nil { 61 return err 62 } 63 if _, err := conn.Do("SADD", failingAlerts, name); err != nil { 64 return err 65 } 66 var event *models.AlertError 67 if failing { 68 event, err = d.getLastErrorEvent(name) 69 if err != nil { 70 return err 71 } 72 } 73 now := time.Now().UTC().Truncate(time.Second) 74 if event == nil || event.Message != msg { 75 event = &models.AlertError{ 76 FirstTime: now, 77 LastTime: now, 78 Count: 1, 79 Message: msg, 80 } 81 } else { 82 event.Count++ 83 event.LastTime = now 84 // pop prior record 85 _, err = conn.Do("LPOP", errorListKey(name)) 86 if err != nil { 87 return err 88 } 89 } 90 marshalled, err := json.Marshal(event) 91 if err != nil { 92 return err 93 } 94 _, err = conn.Do("LPUSH", errorListKey(name), marshalled) 95 if err != nil { 96 return err 97 } 98 _, err = conn.Do("LPUSH", errorEvents, name) 99 return err 100 } 101 102 func (d *dataAccess) GetFailingAlertCounts() (int, int, error) { 103 conn := d.Get() 104 defer conn.Close() 105 failing, err := redis.Int(conn.Do("SCARD", failingAlerts)) 106 if err != nil { 107 return 0, 0, err 108 } 109 events, err := redis.Int(conn.Do("LLEN", errorEvents)) 110 if err != nil { 111 return 0, 0, err 112 } 113 return failing, events, nil 114 } 115 116 func (d *dataAccess) GetFailingAlerts() (map[string]bool, error) { 117 conn := d.Get() 118 defer conn.Close() 119 alerts, err := redis.Strings(conn.Do("SMEMBERS", failingAlerts)) 120 if err != nil { 121 return nil, err 122 } 123 r := make(map[string]bool, len(alerts)) 124 for _, a := range alerts { 125 r[a] = true 126 } 127 return r, nil 128 } 129 func (d *dataAccess) IsAlertFailing(name string) (bool, error) { 130 conn := d.Get() 131 defer conn.Close() 132 return redis.Bool(conn.Do("SISMEMBER", failingAlerts, name)) 133 } 134 135 func errorListKey(name string) string { 136 return fmt.Sprintf("errors:%s", name) 137 } 138 func (d *dataAccess) getLastErrorEvent(name string) (*models.AlertError, error) { 139 conn := d.Get() 140 defer conn.Close() 141 str, err := redis.Bytes(conn.Do("LINDEX", errorListKey(name), "0")) 142 if err != nil { 143 if err == redis.ErrNil { 144 return nil, nil 145 } 146 return nil, err 147 } 148 ev := &models.AlertError{} 149 if err = json.Unmarshal(str, ev); err != nil { 150 return nil, err 151 } 152 return ev, nil 153 } 154 155 func (d *dataAccess) GetFullErrorHistory() (map[string][]*models.AlertError, error) { 156 conn := d.Get() 157 defer conn.Close() 158 159 alerts, err := redis.Strings(conn.Do("SMEMBERS", alertsWithErrors)) 160 if err != nil { 161 return nil, err 162 } 163 results := make(map[string][]*models.AlertError, len(alerts)) 164 for _, a := range alerts { 165 rows, err := redis.Strings(conn.Do("LRANGE", errorListKey(a), 0, -1)) 166 if err != nil { 167 return nil, err 168 } 169 list := make([]*models.AlertError, len(rows)) 170 for i, row := range rows { 171 ae := &models.AlertError{} 172 err = json.Unmarshal([]byte(row), ae) 173 if err != nil { 174 return nil, err 175 } 176 list[i] = ae 177 } 178 results[a] = list 179 } 180 return results, nil 181 } 182 183 func (d *dataAccess) ClearAlert(name string) error { 184 conn := d.Get() 185 defer conn.Close() 186 187 _, err := conn.Do("SREM", alertsWithErrors, name) 188 if err != nil { 189 return err 190 } 191 _, err = conn.Do("SREM", failingAlerts, name) 192 if err != nil { 193 return err 194 } 195 _, err = conn.Do(d.LCLEAR(), errorListKey(name)) 196 if err != nil { 197 return err 198 } 199 cmd, args := d.LMCLEAR(errorEvents, name) 200 _, err = conn.Do(cmd, args...) 201 if err != nil { 202 return err 203 } 204 205 return nil 206 } 207 208 //Things could forseeably get a bit inconsistent if concurrent changes happen in just the wrong way. 209 //Clear all should do a more thourogh cleanup to fully reset things. 210 func (d *dataAccess) ClearAll() error { 211 conn := d.Get() 212 defer conn.Close() 213 214 alerts, err := redis.Strings(conn.Do("SMEMBERS", alertsWithErrors)) 215 if err != nil { 216 return err 217 } 218 for _, a := range alerts { 219 if _, err := conn.Do(d.LCLEAR(), errorListKey(a)); err != nil { 220 return err 221 } 222 } 223 if _, err := conn.Do(d.SCLEAR(), alertsWithErrors); err != nil { 224 return err 225 } 226 if _, err := conn.Do(d.SCLEAR(), failingAlerts); err != nil { 227 return err 228 } 229 if _, err = conn.Do(d.LCLEAR(), errorEvents); err != nil { 230 return err 231 } 232 233 return nil 234 }