github.com/choria-io/go-choria@v0.28.1-0.20240416190746-b3bf9c7d5a45/scout/cmd/watch.go (about)

     1  // Copyright (c) 2020-2024, R.I. Pienaar and the Choria Project contributors
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  
     5  package scoutcmd
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"encoding/json"
    11  	"errors"
    12  	"fmt"
    13  	"io"
    14  	"slices"
    15  	"strconv"
    16  	"strings"
    17  	"sync"
    18  	"time"
    19  
    20  	"github.com/awesome-gocui/gocui"
    21  	"github.com/choria-io/go-choria/inter"
    22  	cloudevents "github.com/cloudevents/sdk-go/v2"
    23  	"github.com/fatih/color"
    24  	"github.com/nats-io/jsm.go"
    25  	"github.com/nats-io/nats.go"
    26  	"github.com/sirupsen/logrus"
    27  
    28  	"github.com/choria-io/go-choria/aagent/machine"
    29  	"github.com/choria-io/go-choria/aagent/watchers/nagioswatcher"
    30  	"github.com/choria-io/go-choria/scout/stream"
    31  )
    32  
    33  type WatchCommand struct {
    34  	identity                 string
    35  	check                    string
    36  	ignoreMachineTransitions []string
    37  	perf                     bool
    38  	noOK                     bool
    39  	longestCheck             int
    40  	longestId                int
    41  	statePattern             string
    42  	history                  time.Duration
    43  	nc                       inter.Connector
    44  
    45  	transEph *stream.Ephemeral
    46  	stateEph *stream.Ephemeral
    47  
    48  	status    map[string]map[string]string
    49  	seen      map[string]time.Time
    50  	vwBuffers map[string][]string
    51  
    52  	log *logrus.Entry
    53  	sync.Mutex
    54  }
    55  
    56  func NewWatchCommand(idf string, checkf string, ignoreMachineTransitions []string, perf bool, noOK bool, history time.Duration, nc inter.Connector, log *logrus.Entry) (*WatchCommand, error) {
    57  	w := &WatchCommand{
    58  		identity:                 idf,
    59  		check:                    checkf,
    60  		ignoreMachineTransitions: ignoreMachineTransitions,
    61  		perf:                     perf,
    62  		noOK:                     noOK,
    63  		history:                  history,
    64  		nc:                       nc,
    65  		log:                      log,
    66  		status:                   make(map[string]map[string]string),
    67  		seen:                     make(map[string]time.Time),
    68  		vwBuffers:                make(map[string][]string),
    69  	}
    70  
    71  	return w, nil
    72  }
    73  
    74  func (w *WatchCommand) Run(ctx context.Context, wg *sync.WaitGroup) (err error) {
    75  	defer wg.Done()
    76  
    77  	lctx, cancel := context.WithCancel(ctx)
    78  	defer cancel()
    79  
    80  	if w.history > time.Hour {
    81  		return fmt.Errorf("maximum history that can be fetched is 1 hour")
    82  	}
    83  
    84  	gui, err := w.setupWindows()
    85  	if err != nil {
    86  		return err
    87  	}
    88  	defer gui.Close()
    89  
    90  	transitions := make(chan *nats.Msg, 1000)
    91  	states := make(chan *nats.Msg, 1000)
    92  
    93  	go func() {
    94  		var m *nats.Msg
    95  
    96  		for {
    97  			select {
    98  			case m = <-transitions:
    99  				w.handleTransition(m, gui)
   100  			case m = <-states:
   101  				w.handleState(m, gui)
   102  			case <-ctx.Done():
   103  				return
   104  			}
   105  
   106  			// no history means no jetstream
   107  			if m.Reply == "" {
   108  				continue
   109  			}
   110  
   111  			m.Ack()
   112  		}
   113  	}()
   114  
   115  	if w.history > 0 {
   116  		err = w.subscribeJetStream(lctx, transitions, states)
   117  	} else {
   118  		err = w.subscribeDirect(transitions, states)
   119  	}
   120  	if err != nil {
   121  		return err
   122  	}
   123  
   124  	err = gui.MainLoop()
   125  	if err != gocui.ErrQuit {
   126  		return err
   127  	}
   128  
   129  	cancel()
   130  	w.nc.Close()
   131  
   132  	return nil
   133  }
   134  
   135  func (w *WatchCommand) dataFromCloudEventJSON(j []byte) ([]byte, error) {
   136  	event := cloudevents.NewEvent("1.0")
   137  	err := event.UnmarshalJSON(j)
   138  	if err != nil {
   139  		return nil, err
   140  	}
   141  
   142  	return event.Data(), nil
   143  }
   144  
   145  func (w *WatchCommand) handleTransition(m *nats.Msg, gui *gocui.Gui) {
   146  	if m == nil {
   147  		return
   148  	}
   149  
   150  	data, err := w.dataFromCloudEventJSON(m.Data)
   151  	if err != nil {
   152  		w.log.Errorf("could not parse cloud event: %s", err)
   153  		return
   154  	}
   155  
   156  	transition := &machine.TransitionNotification{}
   157  	err = json.Unmarshal(data, transition)
   158  	if err != nil {
   159  		w.log.Errorf("Could not decode received transition message: %s: %s", string(data), err)
   160  		return
   161  	}
   162  
   163  	if slices.Contains(w.ignoreMachineTransitions, transition.Machine) {
   164  		return
   165  	}
   166  	if w.identity != "" && !strings.Contains(transition.Identity, w.identity) {
   167  		return
   168  	}
   169  	if w.check != "" && !strings.Contains(transition.Machine, w.check) {
   170  		return
   171  	}
   172  
   173  	w.transEph.SetResumeSequence(m)
   174  
   175  	w.Lock()
   176  	defer w.Unlock()
   177  
   178  	w.updateView(gui, "Transitions", true, func(o io.Writer, _ *gocui.View) {
   179  		fmt.Fprintf(o, "%s %-20s %s => %s %s\n",
   180  			time.Unix(transition.Timestamp, 0).Format("15:04:05"),
   181  			transition.Identity,
   182  			w.colorizeState(transition.FromState),
   183  			w.colorizeState(transition.ToState),
   184  			transition.Machine)
   185  	})
   186  }
   187  
   188  func (w *WatchCommand) colorizeState(state string) string {
   189  	switch state {
   190  	case "OK":
   191  		return color.GreenString("OK  ")
   192  	case "WARNING":
   193  		return color.YellowString("WARN")
   194  	case "CRITICAL":
   195  		return color.RedString("CRIT")
   196  	case "UNKNOWN":
   197  		return color.HiWhiteString("UNKN")
   198  	default:
   199  		if state == "" {
   200  			return color.CyanString("INVA")
   201  		}
   202  		return color.CyanString(state)
   203  	}
   204  }
   205  
   206  func (w *WatchCommand) handleState(m *nats.Msg, gui *gocui.Gui) {
   207  	if m == nil {
   208  		return
   209  	}
   210  
   211  	data, err := w.dataFromCloudEventJSON(m.Data)
   212  	if err != nil {
   213  		w.log.Errorf("could not parse cloud event: %s", err)
   214  		return
   215  	}
   216  
   217  	var state nagioswatcher.StateNotification
   218  	err = json.Unmarshal(data, &state)
   219  	if err != nil {
   220  		w.log.Error(err)
   221  		return
   222  	}
   223  
   224  	if w.identity != "" && !strings.Contains(state.Identity, w.identity) {
   225  		return
   226  	}
   227  	if w.check != "" && !strings.Contains(state.Machine, w.check) {
   228  		return
   229  	}
   230  	output := strings.Split(state.Output, "|")
   231  	w.stateEph.SetResumeSequence(m)
   232  
   233  	w.Lock()
   234  	defer w.Unlock()
   235  
   236  	changed := w.updateStatus(gui, &state)
   237  
   238  	if !changed && w.noOK && state.StatusCode == 0 {
   239  		return
   240  	}
   241  
   242  	update := false
   243  	if w.longestCheck < len(state.Machine) {
   244  		w.longestCheck = len(state.Machine)
   245  		update = true
   246  	}
   247  
   248  	if w.longestId < len(state.Identity) {
   249  		w.longestId = len(state.Identity)
   250  		update = true
   251  	}
   252  
   253  	if w.statePattern == "" || update {
   254  		w.statePattern = "%s %s %" + strconv.Itoa(w.longestId) + "s %" + strconv.Itoa(w.longestCheck) + "s: "
   255  	}
   256  
   257  	w.updateView(gui, "Checks", true, func(o io.Writer, _ *gocui.View) {
   258  		pre := fmt.Sprintf(w.statePattern, time.Unix(state.Timestamp, 0).Format("15:04:05"), w.colorizeState(state.Status), state.Identity, state.Machine)
   259  		line := pre + output[0]
   260  		fmt.Fprintln(o, line)
   261  
   262  		if w.perf {
   263  			for _, p := range state.PerfData {
   264  				fmt.Fprintf(o, "%-"+strconv.Itoa(len(pre)-10)+"s %s = %v %s\n", "", p.Label, p.Value, p.Unit)
   265  			}
   266  		}
   267  	})
   268  }
   269  
   270  func (w *WatchCommand) updateStatus(gui *gocui.Gui, state *nagioswatcher.StateNotification) bool {
   271  	_, has := w.status[state.Identity]
   272  	if !has {
   273  		w.status[state.Identity] = map[string]string{
   274  			state.Machine: "UNKNOWN",
   275  		}
   276  	}
   277  
   278  	previous := w.status[state.Identity][state.Machine]
   279  	w.status[state.Identity][state.Machine] = state.Status
   280  	w.seen[state.Identity] = time.Now()
   281  
   282  	cnt := 0
   283  	ok, warn, crit, unknown := 0, 0, 0, 0
   284  	for id, node := range w.status {
   285  		if time.Since(w.seen[id]) > 10*time.Minute {
   286  			delete(w.seen, id)
   287  			delete(w.status, id)
   288  			continue
   289  		}
   290  
   291  		cnt++
   292  		for _, val := range node {
   293  			switch val {
   294  			case "OK":
   295  				ok++
   296  			case "CRITICAL":
   297  				crit++
   298  			case "WARNING":
   299  				warn++
   300  			case "UNKNOWN":
   301  				unknown++
   302  			}
   303  		}
   304  	}
   305  
   306  	w.updateView(gui, "Status", false, func(o io.Writer, vw *gocui.View) {
   307  		vw.Clear()
   308  
   309  		if crit > 0 {
   310  			vw.FgColor = gocui.ColorRed
   311  		} else if warn > 0 {
   312  			vw.FgColor = gocui.ColorYellow
   313  		} else if unknown > 0 {
   314  			vw.FgColor = gocui.ColorDefault
   315  		} else if ok > 0 {
   316  			vw.FgColor = gocui.ColorGreen
   317  		}
   318  
   319  		fmt.Fprintf(o, "\t%s: IDENTITIES: %d OK: %d WARNING: %d CRITICAL: %d UNKNOWN: %d", time.Unix(state.Timestamp, 0).Format("15:04:05"), cnt, ok, warn, crit, unknown)
   320  	})
   321  
   322  	return previous != state.Status
   323  }
   324  
   325  func (w *WatchCommand) updateView(gui *gocui.Gui, view string, buffered bool, t func(io.Writer, *gocui.View)) {
   326  	gui.Update(func(g *gocui.Gui) error {
   327  		vw, err := g.View(view)
   328  		if err != nil {
   329  			return nil
   330  		}
   331  
   332  		if !buffered {
   333  			t(vw, vw)
   334  			return nil
   335  		}
   336  
   337  		var buf bytes.Buffer
   338  		t(&buf, vw)
   339  
   340  		vb, ok := w.vwBuffers[view]
   341  		if !ok {
   342  			w.vwBuffers[view] = []string{}
   343  		}
   344  
   345  		if len(vb) > 300 {
   346  			old := w.vwBuffers[view]
   347  			w.vwBuffers[view] = []string{}
   348  			w.vwBuffers[view] = old[150:]
   349  			vw.Clear()
   350  			for _, line := range w.vwBuffers[view] {
   351  				fmt.Fprint(vw, line)
   352  			}
   353  		}
   354  
   355  		line := buf.String()
   356  		w.vwBuffers[view] = append(w.vwBuffers[view], line)
   357  		fmt.Fprint(vw, line)
   358  
   359  		return nil
   360  	})
   361  }
   362  
   363  func (w *WatchCommand) setupWindows() (gui *gocui.Gui, err error) {
   364  	g, err := gocui.NewGui(gocui.Output256, false)
   365  	if err != nil {
   366  		return nil, err
   367  	}
   368  
   369  	offset := 0
   370  	layout := func(g *gocui.Gui) error {
   371  		maxX, maxY := g.Size()
   372  		midY := (maxY / 5) * 4
   373  
   374  		// dont make transitions too small
   375  		if midY+offset < 4 {
   376  			w.Lock()
   377  			offset = (midY * -1) + 3
   378  			w.Unlock()
   379  		}
   380  
   381  		// dont make status too small
   382  		if midY+offset > maxY-9 {
   383  			w.Lock()
   384  			offset = maxY - 9 - midY
   385  			w.Unlock()
   386  		}
   387  
   388  		t, err := g.SetView("Checks", 0, 0, maxX-1, midY+offset, 0)
   389  		if err != nil {
   390  			if !errors.Is(err, gocui.ErrUnknownView) {
   391  				panic(err)
   392  			}
   393  			t.Autoscroll = true
   394  			t.Overwrite = true
   395  			t.Title = " Checks "
   396  			t.Frame = true
   397  		}
   398  
   399  		c, err := g.SetView("Transitions", 0, midY+offset+1, maxX-1, maxY-5, 0)
   400  		if err != nil {
   401  			if !errors.Is(err, gocui.ErrUnknownView) {
   402  				panic(err)
   403  			}
   404  			c.Autoscroll = true
   405  			c.Overwrite = true
   406  			c.Title = " Transitions "
   407  			c.Frame = true
   408  		}
   409  
   410  		s, err := g.SetView("Status", 0, maxY-4, maxX-1, maxY-2, 0)
   411  		if err != nil {
   412  			if !errors.Is(err, gocui.ErrUnknownView) {
   413  				panic(err)
   414  			}
   415  			s.Frame = true
   416  			s.Title = " Observed Status "
   417  			fmt.Fprintf(s, "Waiting for updates...")
   418  		}
   419  
   420  		h, err := g.SetView("Help", 0, maxY-2, maxX-1, maxY, 0)
   421  		if err != nil {
   422  			if !errors.Is(err, gocui.ErrUnknownView) {
   423  				panic(err)
   424  			}
   425  			h.Frame = false
   426  			idf := ""
   427  			cf := ""
   428  			if w.identity != "" {
   429  				idf = fmt.Sprintf(" identity %q", w.identity)
   430  			}
   431  			if w.check != "" {
   432  				cf = fmt.Sprintf(" check %q", w.check)
   433  			}
   434  
   435  			if idf != "" || cf != "" {
   436  				fmt.Fprintf(h, "Choria Scout Event Viewer: showing%s%s. Arrows resize, ^R reset view, ^L clear, ^C to exit", idf, cf)
   437  			} else {
   438  				fmt.Fprintf(h, "Choria Scout Event Viewer showing all events. Arrows resize, ^R reset view, ^L clear, ^C to exit")
   439  			}
   440  		}
   441  
   442  		return nil
   443  	}
   444  
   445  	g.SetManagerFunc(layout)
   446  	err = g.SetKeybinding("", gocui.KeyArrowDown, gocui.ModNone, func(g *gocui.Gui, v *gocui.View) error {
   447  		w.Lock()
   448  		offset++
   449  		w.Unlock()
   450  		return nil
   451  	})
   452  	if err != nil {
   453  		return nil, err
   454  	}
   455  
   456  	err = g.SetKeybinding("", gocui.KeyArrowUp, gocui.ModNone, func(g *gocui.Gui, v *gocui.View) error {
   457  		w.Lock()
   458  		offset--
   459  		w.Unlock()
   460  		return nil
   461  	})
   462  	if err != nil {
   463  		return nil, err
   464  	}
   465  
   466  	err = g.SetKeybinding("", gocui.KeyCtrlR, gocui.ModNone, func(g *gocui.Gui, v *gocui.View) error {
   467  		w.Lock()
   468  		offset = 0
   469  		w.Unlock()
   470  		return nil
   471  	})
   472  	if err != nil {
   473  		return nil, err
   474  	}
   475  
   476  	err = g.SetKeybinding("", gocui.KeyCtrlC, gocui.ModNone, func(g *gocui.Gui, v *gocui.View) error { return gocui.ErrQuit })
   477  	if err != nil {
   478  		g.Close()
   479  		return nil, err
   480  	}
   481  
   482  	err = g.SetKeybinding("", gocui.KeyEsc, gocui.ModNone, func(g *gocui.Gui, v *gocui.View) error { return gocui.ErrQuit })
   483  	if err != nil {
   484  		g.Close()
   485  		return nil, err
   486  	}
   487  
   488  	err = g.SetKeybinding("", gocui.KeyCtrlL, gocui.ModNone, func(g *gocui.Gui, v *gocui.View) error {
   489  		vw, err := g.View("Transitions")
   490  		if err == nil {
   491  			vw.Clear()
   492  		}
   493  		vw, err = g.View("Checks")
   494  		if err == nil {
   495  			vw.Clear()
   496  		}
   497  		return nil
   498  	})
   499  	if err != nil {
   500  		return nil, err
   501  	}
   502  
   503  	return g, nil
   504  }
   505  
   506  func (w *WatchCommand) subscribeJetStream(ctx context.Context, transitions chan *nats.Msg, states chan *nats.Msg) error {
   507  	mgr, err := jsm.New(w.nc.Nats())
   508  	if err != nil {
   509  		return err
   510  	}
   511  
   512  	str, err := mgr.LoadStream("CHORIA_MACHINE")
   513  	if err != nil {
   514  		return err
   515  	}
   516  
   517  	w.transEph, err = stream.NewEphemeral(ctx, w.nc.Nats(), str, time.Minute, transitions, w.log, jsm.FilterStreamBySubject("choria.machine.transition"), jsm.StartAtTimeDelta(w.history), jsm.AcknowledgeExplicit(), jsm.MaxAckPending(50), jsm.MaxDeliveryAttempts(1))
   518  	if err != nil {
   519  		return fmt.Errorf("could not subscribe to Choria Streaming stream CHORIA_MACHINE: %s", err)
   520  	}
   521  
   522  	w.stateEph, err = stream.NewEphemeral(ctx, w.nc.Nats(), str, time.Minute, states, w.log, jsm.FilterStreamBySubject("choria.machine.watcher.nagios.state"), jsm.StartAtTimeDelta(w.history), jsm.AcknowledgeExplicit(), jsm.MaxAckPending(50), jsm.MaxDeliveryAttempts(1))
   523  	if err != nil {
   524  		return fmt.Errorf("could not subscribe to Choria Streaming stream CHORIA_MACHINE: %s", err)
   525  	}
   526  
   527  	return nil
   528  }
   529  
   530  func (w *WatchCommand) subscribeDirect(transitions chan *nats.Msg, states chan *nats.Msg) error {
   531  	nc := w.nc.Nats()
   532  	_, err := nc.ChanSubscribe("choria.machine.transition", transitions)
   533  	if err != nil {
   534  		return fmt.Errorf("could not subscribe to transitions: %s", err)
   535  	}
   536  
   537  	_, err = nc.ChanSubscribe("choria.machine.watcher.nagios.state", states)
   538  	if err != nil {
   539  		return fmt.Errorf("could not subscribe to states: %s", err)
   540  	}
   541  
   542  	return nil
   543  }