github.com/choria-io/go-choria@v0.28.1-0.20240416190746-b3bf9c7d5a45/aagent/watchers/execwatcher/exec.go (about)

     1  // Copyright (c) 2019-2024, R.I. Pienaar and the Choria Project contributors
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  
     5  package execwatcher
     6  
     7  import (
     8  	"context"
     9  	"encoding/json"
    10  	"fmt"
    11  	"math/rand"
    12  	"os"
    13  	"os/exec"
    14  	"sync"
    15  	"time"
    16  
    17  	"github.com/choria-io/go-choria/aagent/model"
    18  	"github.com/choria-io/go-choria/aagent/util"
    19  	"github.com/choria-io/go-choria/aagent/watchers/event"
    20  	"github.com/choria-io/go-choria/aagent/watchers/watcher"
    21  	iu "github.com/choria-io/go-choria/internal/util"
    22  	"github.com/google/shlex"
    23  )
    24  
    25  type State int
    26  
    27  const (
    28  	Unknown State = iota
    29  	Skipped
    30  	Error
    31  	Success
    32  
    33  	wtype   = "exec"
    34  	version = "v1"
    35  )
    36  
    37  var stateNames = map[State]string{
    38  	Unknown: "unknown",
    39  	Skipped: "skipped",
    40  	Error:   "error",
    41  	Success: "success",
    42  }
    43  
    44  type Properties struct {
    45  	Command                 string
    46  	Environment             []string
    47  	Governor                string
    48  	GovernorTimeout         time.Duration `mapstructure:"governor_timeout"`
    49  	OutputAsData            bool          `mapstructure:"parse_as_data"`
    50  	SuppressSuccessAnnounce bool          `mapstructure:"suppress_success_announce"`
    51  	GatherInitialState      bool          `mapstructure:"gather_initial_state"`
    52  	Disown                  bool          `mapstructure:"disown"`
    53  	Timeout                 time.Duration
    54  }
    55  
    56  type Watcher struct {
    57  	*watcher.Watcher
    58  
    59  	name            string
    60  	machine         model.Machine
    61  	previous        State
    62  	interval        time.Duration
    63  	previousRunTime time.Duration
    64  	properties      *Properties
    65  
    66  	lastWatch time.Time
    67  
    68  	wmu *sync.Mutex
    69  	mu  *sync.Mutex
    70  }
    71  
    72  func New(machine model.Machine, name string, states []string, failEvent string, successEvent string, interval string, ai time.Duration, rawprop map[string]any) (any, error) {
    73  	var err error
    74  
    75  	exec := &Watcher{
    76  		machine: machine,
    77  		name:    name,
    78  		mu:      &sync.Mutex{},
    79  		wmu:     &sync.Mutex{},
    80  		properties: &Properties{
    81  			Environment: []string{},
    82  		},
    83  	}
    84  
    85  	exec.Watcher, err = watcher.NewWatcher(name, wtype, ai, states, machine, failEvent, successEvent)
    86  	if err != nil {
    87  		return nil, err
    88  	}
    89  
    90  	err = exec.setProperties(rawprop)
    91  	if err != nil {
    92  		return nil, fmt.Errorf("could not set properties: %v", err)
    93  	}
    94  
    95  	if interval != "" {
    96  		exec.interval, err = iu.ParseDuration(interval)
    97  		if err != nil {
    98  			return nil, fmt.Errorf("invalid interval: %v", err)
    99  		}
   100  
   101  		if exec.interval < 500*time.Millisecond {
   102  			return nil, fmt.Errorf("interval %v is too small", exec.interval)
   103  		}
   104  	}
   105  
   106  	return exec, nil
   107  }
   108  
   109  func (w *Watcher) validate() error {
   110  	if w.properties.Command == "" {
   111  		return fmt.Errorf("command is required")
   112  	}
   113  
   114  	if w.properties.Timeout == 0 {
   115  		w.properties.Timeout = time.Second
   116  	}
   117  
   118  	if w.properties.Governor != "" && w.properties.GovernorTimeout == 0 {
   119  		w.Infof("Setting Governor timeout to 5 minutes while unset")
   120  		w.properties.GovernorTimeout = 5 * time.Minute
   121  	}
   122  
   123  	if w.properties.Disown && w.properties.OutputAsData {
   124  		return fmt.Errorf("cannot parse output as data while disowning child processes")
   125  	}
   126  
   127  	return nil
   128  }
   129  
   130  func (w *Watcher) setProperties(props map[string]any) error {
   131  	if w.properties == nil {
   132  		w.properties = &Properties{Environment: []string{}}
   133  	}
   134  
   135  	err := util.ParseMapStructure(props, w.properties)
   136  	if err != nil {
   137  		return err
   138  	}
   139  
   140  	return w.validate()
   141  }
   142  
   143  func (w *Watcher) Run(ctx context.Context, wg *sync.WaitGroup) {
   144  	defer wg.Done()
   145  
   146  	w.Infof("exec watcher for %s starting", w.properties.Command)
   147  
   148  	if w.interval != 0 {
   149  		wg.Add(1)
   150  		go w.intervalWatcher(ctx, wg)
   151  	}
   152  
   153  	for {
   154  		select {
   155  		case <-w.Watcher.StateChangeC():
   156  			w.performWatch(ctx, true)
   157  
   158  		case <-ctx.Done():
   159  			w.Infof("Stopping on context interrupt")
   160  			w.CancelGovernor()
   161  			return
   162  		}
   163  	}
   164  }
   165  
   166  func (w *Watcher) intervalWatcher(ctx context.Context, wg *sync.WaitGroup) {
   167  	defer wg.Done()
   168  
   169  	tick := time.NewTicker(w.interval)
   170  	if w.properties.GatherInitialState {
   171  		splay := time.Duration(rand.Intn(30)) * time.Second
   172  		w.Infof("Performing initial execution after %v", splay)
   173  		if splay < 1 {
   174  			splay = 1
   175  		}
   176  
   177  		tick.Reset(splay)
   178  	}
   179  
   180  	for {
   181  		select {
   182  		case <-tick.C:
   183  			w.performWatch(ctx, false)
   184  			tick.Reset(w.interval)
   185  
   186  		case <-ctx.Done():
   187  			tick.Stop()
   188  			return
   189  		}
   190  	}
   191  }
   192  
   193  func (w *Watcher) performWatch(ctx context.Context, force bool) {
   194  	w.wmu.Lock()
   195  	defer w.wmu.Unlock()
   196  
   197  	if !force && time.Since(w.lastWatch) < w.interval {
   198  		return
   199  	}
   200  
   201  	err := w.handleCheck(w.watch(ctx))
   202  	if err != nil {
   203  		w.Errorf("could not handle watcher event: %s", err)
   204  	}
   205  }
   206  
   207  func (w *Watcher) handleCheck(s State, err error) error {
   208  	w.Debugf("handling check for %s %s %v", w.properties.Command, stateNames[s], err)
   209  
   210  	w.mu.Lock()
   211  	w.previous = s
   212  	w.mu.Unlock()
   213  
   214  	switch s {
   215  	case Error:
   216  		if err != nil {
   217  			w.Errorf("Check failed: %s", err)
   218  		}
   219  
   220  		w.NotifyWatcherState(w.CurrentState())
   221  		return w.FailureTransition()
   222  
   223  	case Success:
   224  		if !w.properties.SuppressSuccessAnnounce {
   225  			w.NotifyWatcherState(w.CurrentState())
   226  		}
   227  
   228  		return w.SuccessTransition()
   229  	}
   230  
   231  	return nil
   232  }
   233  
   234  func (w *Watcher) CurrentState() any {
   235  	w.mu.Lock()
   236  	defer w.mu.Unlock()
   237  
   238  	s := &StateNotification{
   239  		Event:           event.New(w.name, wtype, version, w.machine),
   240  		Command:         w.properties.Command,
   241  		PreviousOutcome: stateNames[w.previous],
   242  		PreviousRunTime: w.previousRunTime.Nanoseconds(),
   243  	}
   244  
   245  	return s
   246  }
   247  
   248  func (w *Watcher) watch(ctx context.Context) (state State, err error) {
   249  	if !w.ShouldWatch() {
   250  		return Skipped, nil
   251  	}
   252  
   253  	if w.properties.Governor != "" {
   254  		fin, err := w.EnterGovernor(ctx, w.properties.Governor, w.properties.GovernorTimeout)
   255  		if err != nil {
   256  			w.Errorf("Cannot enter Governor %s: %s", w.properties.Governor, err)
   257  			return Error, err
   258  		}
   259  		defer fin()
   260  	}
   261  
   262  	start := time.Now()
   263  	defer func() {
   264  		w.mu.Lock()
   265  		w.previousRunTime = time.Since(start)
   266  		w.mu.Unlock()
   267  	}()
   268  
   269  	w.Infof("Running %s", w.properties.Command)
   270  
   271  	timeoutCtx, cancel := context.WithTimeout(ctx, w.properties.Timeout)
   272  	defer cancel()
   273  
   274  	parsedCommand, err := w.ProcessTemplate(w.properties.Command)
   275  	if err != nil {
   276  		return Error, fmt.Errorf("could not process command template: %s", err)
   277  	}
   278  
   279  	splitcmd, err := shlex.Split(parsedCommand)
   280  	if err != nil {
   281  		w.Errorf("Exec watcher %s failed: %s", w.properties.Command, err)
   282  		return Error, err
   283  	}
   284  
   285  	if len(splitcmd) == 0 {
   286  		w.Errorf("Invalid command %q", w.properties.Command)
   287  		return Error, err
   288  	}
   289  
   290  	var args []string
   291  	if len(splitcmd) > 1 {
   292  		args = splitcmd[1:]
   293  	}
   294  
   295  	df, err := w.DataCopyFile()
   296  	if err != nil {
   297  		w.Errorf("Could not get a copy of the data into a temporary file, skipping execution: %s", err)
   298  		return Error, err
   299  	}
   300  	defer os.Remove(df)
   301  
   302  	ff, err := w.FactsFile()
   303  	if err != nil {
   304  		w.Errorf("Could not expose machine facts, skipping execution: %s", err)
   305  		return Error, err
   306  	}
   307  	defer os.Remove(ff)
   308  
   309  	var cmd *exec.Cmd
   310  	if w.properties.Disown {
   311  		cmd = exec.Command(splitcmd[0], args...)
   312  	} else {
   313  		cmd = exec.CommandContext(timeoutCtx, splitcmd[0], args...)
   314  	}
   315  	cmd.Dir = w.machine.Directory()
   316  
   317  	cmd.Env = append(cmd.Env, fmt.Sprintf("MACHINE_WATCHER_NAME=%s", w.name))
   318  	cmd.Env = append(cmd.Env, fmt.Sprintf("MACHINE_NAME=%s", w.machine.Name()))
   319  	cmd.Env = append(cmd.Env, fmt.Sprintf("PATH=%s%s%s", os.Getenv("PATH"), string(os.PathListSeparator), w.machine.Directory()))
   320  	cmd.Env = append(cmd.Env, fmt.Sprintf("WATCHER_DATA=%s", df))
   321  	cmd.Env = append(cmd.Env, fmt.Sprintf("WATCHER_FACTS=%s", ff))
   322  
   323  	for _, e := range w.properties.Environment {
   324  		es, err := w.ProcessTemplate(e)
   325  		if err != nil {
   326  			return Error, fmt.Errorf("could not process environment template: %s", err)
   327  		}
   328  		cmd.Env = append(cmd.Env, es)
   329  	}
   330  
   331  	var output []byte
   332  	if w.properties.Disown {
   333  		w.Debugf("Running command disowned from parent")
   334  		err = cmd.Start()
   335  		if err != nil {
   336  			return 0, err
   337  		}
   338  
   339  		errc := make(chan error)
   340  		go func() {
   341  			errc <- cmd.Wait()
   342  		}()
   343  
   344  		select {
   345  		case err = <-errc:
   346  		case <-ctx.Done():
   347  			err = ctx.Err()
   348  		}
   349  	} else {
   350  		output, err = cmd.CombinedOutput()
   351  	}
   352  	if err != nil {
   353  		w.Errorf("Exec watcher %s failed: %s", w.properties.Command, err)
   354  		return Error, err
   355  	}
   356  
   357  	w.Debugf("Output from %s: %s", w.properties.Command, output)
   358  
   359  	if w.properties.OutputAsData {
   360  		err = w.setOutputAsData(output)
   361  		if err != nil {
   362  			w.Errorf("Could not save output data: %s", err)
   363  			return Error, err
   364  		}
   365  	}
   366  
   367  	return Success, nil
   368  }
   369  
   370  func (w *Watcher) setOutputAsData(output []byte) error {
   371  	dat := map[string]string{}
   372  	err := json.Unmarshal(output, &dat)
   373  	if err != nil {
   374  		return err
   375  	}
   376  
   377  	for k, v := range dat {
   378  		err = w.machine.DataPut(k, v)
   379  		if err != nil {
   380  			return err
   381  		}
   382  	}
   383  
   384  	return nil
   385  }