github.com/choria-io/go-choria@v0.28.1-0.20240416190746-b3bf9c7d5a45/scout/cmd/watch.go (about) 1 // Copyright (c) 2020-2024, R.I. Pienaar and the Choria Project contributors 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 5 package scoutcmd 6 7 import ( 8 "bytes" 9 "context" 10 "encoding/json" 11 "errors" 12 "fmt" 13 "io" 14 "slices" 15 "strconv" 16 "strings" 17 "sync" 18 "time" 19 20 "github.com/awesome-gocui/gocui" 21 "github.com/choria-io/go-choria/inter" 22 cloudevents "github.com/cloudevents/sdk-go/v2" 23 "github.com/fatih/color" 24 "github.com/nats-io/jsm.go" 25 "github.com/nats-io/nats.go" 26 "github.com/sirupsen/logrus" 27 28 "github.com/choria-io/go-choria/aagent/machine" 29 "github.com/choria-io/go-choria/aagent/watchers/nagioswatcher" 30 "github.com/choria-io/go-choria/scout/stream" 31 ) 32 33 type WatchCommand struct { 34 identity string 35 check string 36 ignoreMachineTransitions []string 37 perf bool 38 noOK bool 39 longestCheck int 40 longestId int 41 statePattern string 42 history time.Duration 43 nc inter.Connector 44 45 transEph *stream.Ephemeral 46 stateEph *stream.Ephemeral 47 48 status map[string]map[string]string 49 seen map[string]time.Time 50 vwBuffers map[string][]string 51 52 log *logrus.Entry 53 sync.Mutex 54 } 55 56 func NewWatchCommand(idf string, checkf string, ignoreMachineTransitions []string, perf bool, noOK bool, history time.Duration, nc inter.Connector, log *logrus.Entry) (*WatchCommand, error) { 57 w := &WatchCommand{ 58 identity: idf, 59 check: checkf, 60 ignoreMachineTransitions: ignoreMachineTransitions, 61 perf: perf, 62 noOK: noOK, 63 history: history, 64 nc: nc, 65 log: log, 66 status: make(map[string]map[string]string), 67 seen: make(map[string]time.Time), 68 vwBuffers: make(map[string][]string), 69 } 70 71 return w, nil 72 } 73 74 func (w *WatchCommand) Run(ctx context.Context, wg *sync.WaitGroup) (err error) { 75 defer wg.Done() 76 77 lctx, cancel := context.WithCancel(ctx) 78 defer cancel() 79 80 if w.history > time.Hour { 81 return fmt.Errorf("maximum history that can be fetched is 1 hour") 82 } 83 84 gui, err := w.setupWindows() 85 if err != nil { 86 return err 87 } 88 defer gui.Close() 89 90 transitions := make(chan *nats.Msg, 1000) 91 states := make(chan *nats.Msg, 1000) 92 93 go func() { 94 var m *nats.Msg 95 96 for { 97 select { 98 case m = <-transitions: 99 w.handleTransition(m, gui) 100 case m = <-states: 101 w.handleState(m, gui) 102 case <-ctx.Done(): 103 return 104 } 105 106 // no history means no jetstream 107 if m.Reply == "" { 108 continue 109 } 110 111 m.Ack() 112 } 113 }() 114 115 if w.history > 0 { 116 err = w.subscribeJetStream(lctx, transitions, states) 117 } else { 118 err = w.subscribeDirect(transitions, states) 119 } 120 if err != nil { 121 return err 122 } 123 124 err = gui.MainLoop() 125 if err != gocui.ErrQuit { 126 return err 127 } 128 129 cancel() 130 w.nc.Close() 131 132 return nil 133 } 134 135 func (w *WatchCommand) dataFromCloudEventJSON(j []byte) ([]byte, error) { 136 event := cloudevents.NewEvent("1.0") 137 err := event.UnmarshalJSON(j) 138 if err != nil { 139 return nil, err 140 } 141 142 return event.Data(), nil 143 } 144 145 func (w *WatchCommand) handleTransition(m *nats.Msg, gui *gocui.Gui) { 146 if m == nil { 147 return 148 } 149 150 data, err := w.dataFromCloudEventJSON(m.Data) 151 if err != nil { 152 w.log.Errorf("could not parse cloud event: %s", err) 153 return 154 } 155 156 transition := &machine.TransitionNotification{} 157 err = json.Unmarshal(data, transition) 158 if err != nil { 159 w.log.Errorf("Could not decode received transition message: %s: %s", string(data), err) 160 return 161 } 162 163 if slices.Contains(w.ignoreMachineTransitions, transition.Machine) { 164 return 165 } 166 if w.identity != "" && !strings.Contains(transition.Identity, w.identity) { 167 return 168 } 169 if w.check != "" && !strings.Contains(transition.Machine, w.check) { 170 return 171 } 172 173 w.transEph.SetResumeSequence(m) 174 175 w.Lock() 176 defer w.Unlock() 177 178 w.updateView(gui, "Transitions", true, func(o io.Writer, _ *gocui.View) { 179 fmt.Fprintf(o, "%s %-20s %s => %s %s\n", 180 time.Unix(transition.Timestamp, 0).Format("15:04:05"), 181 transition.Identity, 182 w.colorizeState(transition.FromState), 183 w.colorizeState(transition.ToState), 184 transition.Machine) 185 }) 186 } 187 188 func (w *WatchCommand) colorizeState(state string) string { 189 switch state { 190 case "OK": 191 return color.GreenString("OK ") 192 case "WARNING": 193 return color.YellowString("WARN") 194 case "CRITICAL": 195 return color.RedString("CRIT") 196 case "UNKNOWN": 197 return color.HiWhiteString("UNKN") 198 default: 199 if state == "" { 200 return color.CyanString("INVA") 201 } 202 return color.CyanString(state) 203 } 204 } 205 206 func (w *WatchCommand) handleState(m *nats.Msg, gui *gocui.Gui) { 207 if m == nil { 208 return 209 } 210 211 data, err := w.dataFromCloudEventJSON(m.Data) 212 if err != nil { 213 w.log.Errorf("could not parse cloud event: %s", err) 214 return 215 } 216 217 var state nagioswatcher.StateNotification 218 err = json.Unmarshal(data, &state) 219 if err != nil { 220 w.log.Error(err) 221 return 222 } 223 224 if w.identity != "" && !strings.Contains(state.Identity, w.identity) { 225 return 226 } 227 if w.check != "" && !strings.Contains(state.Machine, w.check) { 228 return 229 } 230 output := strings.Split(state.Output, "|") 231 w.stateEph.SetResumeSequence(m) 232 233 w.Lock() 234 defer w.Unlock() 235 236 changed := w.updateStatus(gui, &state) 237 238 if !changed && w.noOK && state.StatusCode == 0 { 239 return 240 } 241 242 update := false 243 if w.longestCheck < len(state.Machine) { 244 w.longestCheck = len(state.Machine) 245 update = true 246 } 247 248 if w.longestId < len(state.Identity) { 249 w.longestId = len(state.Identity) 250 update = true 251 } 252 253 if w.statePattern == "" || update { 254 w.statePattern = "%s %s %" + strconv.Itoa(w.longestId) + "s %" + strconv.Itoa(w.longestCheck) + "s: " 255 } 256 257 w.updateView(gui, "Checks", true, func(o io.Writer, _ *gocui.View) { 258 pre := fmt.Sprintf(w.statePattern, time.Unix(state.Timestamp, 0).Format("15:04:05"), w.colorizeState(state.Status), state.Identity, state.Machine) 259 line := pre + output[0] 260 fmt.Fprintln(o, line) 261 262 if w.perf { 263 for _, p := range state.PerfData { 264 fmt.Fprintf(o, "%-"+strconv.Itoa(len(pre)-10)+"s %s = %v %s\n", "", p.Label, p.Value, p.Unit) 265 } 266 } 267 }) 268 } 269 270 func (w *WatchCommand) updateStatus(gui *gocui.Gui, state *nagioswatcher.StateNotification) bool { 271 _, has := w.status[state.Identity] 272 if !has { 273 w.status[state.Identity] = map[string]string{ 274 state.Machine: "UNKNOWN", 275 } 276 } 277 278 previous := w.status[state.Identity][state.Machine] 279 w.status[state.Identity][state.Machine] = state.Status 280 w.seen[state.Identity] = time.Now() 281 282 cnt := 0 283 ok, warn, crit, unknown := 0, 0, 0, 0 284 for id, node := range w.status { 285 if time.Since(w.seen[id]) > 10*time.Minute { 286 delete(w.seen, id) 287 delete(w.status, id) 288 continue 289 } 290 291 cnt++ 292 for _, val := range node { 293 switch val { 294 case "OK": 295 ok++ 296 case "CRITICAL": 297 crit++ 298 case "WARNING": 299 warn++ 300 case "UNKNOWN": 301 unknown++ 302 } 303 } 304 } 305 306 w.updateView(gui, "Status", false, func(o io.Writer, vw *gocui.View) { 307 vw.Clear() 308 309 if crit > 0 { 310 vw.FgColor = gocui.ColorRed 311 } else if warn > 0 { 312 vw.FgColor = gocui.ColorYellow 313 } else if unknown > 0 { 314 vw.FgColor = gocui.ColorDefault 315 } else if ok > 0 { 316 vw.FgColor = gocui.ColorGreen 317 } 318 319 fmt.Fprintf(o, "\t%s: IDENTITIES: %d OK: %d WARNING: %d CRITICAL: %d UNKNOWN: %d", time.Unix(state.Timestamp, 0).Format("15:04:05"), cnt, ok, warn, crit, unknown) 320 }) 321 322 return previous != state.Status 323 } 324 325 func (w *WatchCommand) updateView(gui *gocui.Gui, view string, buffered bool, t func(io.Writer, *gocui.View)) { 326 gui.Update(func(g *gocui.Gui) error { 327 vw, err := g.View(view) 328 if err != nil { 329 return nil 330 } 331 332 if !buffered { 333 t(vw, vw) 334 return nil 335 } 336 337 var buf bytes.Buffer 338 t(&buf, vw) 339 340 vb, ok := w.vwBuffers[view] 341 if !ok { 342 w.vwBuffers[view] = []string{} 343 } 344 345 if len(vb) > 300 { 346 old := w.vwBuffers[view] 347 w.vwBuffers[view] = []string{} 348 w.vwBuffers[view] = old[150:] 349 vw.Clear() 350 for _, line := range w.vwBuffers[view] { 351 fmt.Fprint(vw, line) 352 } 353 } 354 355 line := buf.String() 356 w.vwBuffers[view] = append(w.vwBuffers[view], line) 357 fmt.Fprint(vw, line) 358 359 return nil 360 }) 361 } 362 363 func (w *WatchCommand) setupWindows() (gui *gocui.Gui, err error) { 364 g, err := gocui.NewGui(gocui.Output256, false) 365 if err != nil { 366 return nil, err 367 } 368 369 offset := 0 370 layout := func(g *gocui.Gui) error { 371 maxX, maxY := g.Size() 372 midY := (maxY / 5) * 4 373 374 // dont make transitions too small 375 if midY+offset < 4 { 376 w.Lock() 377 offset = (midY * -1) + 3 378 w.Unlock() 379 } 380 381 // dont make status too small 382 if midY+offset > maxY-9 { 383 w.Lock() 384 offset = maxY - 9 - midY 385 w.Unlock() 386 } 387 388 t, err := g.SetView("Checks", 0, 0, maxX-1, midY+offset, 0) 389 if err != nil { 390 if !errors.Is(err, gocui.ErrUnknownView) { 391 panic(err) 392 } 393 t.Autoscroll = true 394 t.Overwrite = true 395 t.Title = " Checks " 396 t.Frame = true 397 } 398 399 c, err := g.SetView("Transitions", 0, midY+offset+1, maxX-1, maxY-5, 0) 400 if err != nil { 401 if !errors.Is(err, gocui.ErrUnknownView) { 402 panic(err) 403 } 404 c.Autoscroll = true 405 c.Overwrite = true 406 c.Title = " Transitions " 407 c.Frame = true 408 } 409 410 s, err := g.SetView("Status", 0, maxY-4, maxX-1, maxY-2, 0) 411 if err != nil { 412 if !errors.Is(err, gocui.ErrUnknownView) { 413 panic(err) 414 } 415 s.Frame = true 416 s.Title = " Observed Status " 417 fmt.Fprintf(s, "Waiting for updates...") 418 } 419 420 h, err := g.SetView("Help", 0, maxY-2, maxX-1, maxY, 0) 421 if err != nil { 422 if !errors.Is(err, gocui.ErrUnknownView) { 423 panic(err) 424 } 425 h.Frame = false 426 idf := "" 427 cf := "" 428 if w.identity != "" { 429 idf = fmt.Sprintf(" identity %q", w.identity) 430 } 431 if w.check != "" { 432 cf = fmt.Sprintf(" check %q", w.check) 433 } 434 435 if idf != "" || cf != "" { 436 fmt.Fprintf(h, "Choria Scout Event Viewer: showing%s%s. Arrows resize, ^R reset view, ^L clear, ^C to exit", idf, cf) 437 } else { 438 fmt.Fprintf(h, "Choria Scout Event Viewer showing all events. Arrows resize, ^R reset view, ^L clear, ^C to exit") 439 } 440 } 441 442 return nil 443 } 444 445 g.SetManagerFunc(layout) 446 err = g.SetKeybinding("", gocui.KeyArrowDown, gocui.ModNone, func(g *gocui.Gui, v *gocui.View) error { 447 w.Lock() 448 offset++ 449 w.Unlock() 450 return nil 451 }) 452 if err != nil { 453 return nil, err 454 } 455 456 err = g.SetKeybinding("", gocui.KeyArrowUp, gocui.ModNone, func(g *gocui.Gui, v *gocui.View) error { 457 w.Lock() 458 offset-- 459 w.Unlock() 460 return nil 461 }) 462 if err != nil { 463 return nil, err 464 } 465 466 err = g.SetKeybinding("", gocui.KeyCtrlR, gocui.ModNone, func(g *gocui.Gui, v *gocui.View) error { 467 w.Lock() 468 offset = 0 469 w.Unlock() 470 return nil 471 }) 472 if err != nil { 473 return nil, err 474 } 475 476 err = g.SetKeybinding("", gocui.KeyCtrlC, gocui.ModNone, func(g *gocui.Gui, v *gocui.View) error { return gocui.ErrQuit }) 477 if err != nil { 478 g.Close() 479 return nil, err 480 } 481 482 err = g.SetKeybinding("", gocui.KeyEsc, gocui.ModNone, func(g *gocui.Gui, v *gocui.View) error { return gocui.ErrQuit }) 483 if err != nil { 484 g.Close() 485 return nil, err 486 } 487 488 err = g.SetKeybinding("", gocui.KeyCtrlL, gocui.ModNone, func(g *gocui.Gui, v *gocui.View) error { 489 vw, err := g.View("Transitions") 490 if err == nil { 491 vw.Clear() 492 } 493 vw, err = g.View("Checks") 494 if err == nil { 495 vw.Clear() 496 } 497 return nil 498 }) 499 if err != nil { 500 return nil, err 501 } 502 503 return g, nil 504 } 505 506 func (w *WatchCommand) subscribeJetStream(ctx context.Context, transitions chan *nats.Msg, states chan *nats.Msg) error { 507 mgr, err := jsm.New(w.nc.Nats()) 508 if err != nil { 509 return err 510 } 511 512 str, err := mgr.LoadStream("CHORIA_MACHINE") 513 if err != nil { 514 return err 515 } 516 517 w.transEph, err = stream.NewEphemeral(ctx, w.nc.Nats(), str, time.Minute, transitions, w.log, jsm.FilterStreamBySubject("choria.machine.transition"), jsm.StartAtTimeDelta(w.history), jsm.AcknowledgeExplicit(), jsm.MaxAckPending(50), jsm.MaxDeliveryAttempts(1)) 518 if err != nil { 519 return fmt.Errorf("could not subscribe to Choria Streaming stream CHORIA_MACHINE: %s", err) 520 } 521 522 w.stateEph, err = stream.NewEphemeral(ctx, w.nc.Nats(), str, time.Minute, states, w.log, jsm.FilterStreamBySubject("choria.machine.watcher.nagios.state"), jsm.StartAtTimeDelta(w.history), jsm.AcknowledgeExplicit(), jsm.MaxAckPending(50), jsm.MaxDeliveryAttempts(1)) 523 if err != nil { 524 return fmt.Errorf("could not subscribe to Choria Streaming stream CHORIA_MACHINE: %s", err) 525 } 526 527 return nil 528 } 529 530 func (w *WatchCommand) subscribeDirect(transitions chan *nats.Msg, states chan *nats.Msg) error { 531 nc := w.nc.Nats() 532 _, err := nc.ChanSubscribe("choria.machine.transition", transitions) 533 if err != nil { 534 return fmt.Errorf("could not subscribe to transitions: %s", err) 535 } 536 537 _, err = nc.ChanSubscribe("choria.machine.watcher.nagios.state", states) 538 if err != nil { 539 return fmt.Errorf("could not subscribe to states: %s", err) 540 } 541 542 return nil 543 }