github.com/choria-io/go-choria@v0.28.1-0.20240416190746-b3bf9c7d5a45/aagent/watchers/execwatcher/exec.go (about) 1 // Copyright (c) 2019-2024, R.I. Pienaar and the Choria Project contributors 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 5 package execwatcher 6 7 import ( 8 "context" 9 "encoding/json" 10 "fmt" 11 "math/rand" 12 "os" 13 "os/exec" 14 "sync" 15 "time" 16 17 "github.com/choria-io/go-choria/aagent/model" 18 "github.com/choria-io/go-choria/aagent/util" 19 "github.com/choria-io/go-choria/aagent/watchers/event" 20 "github.com/choria-io/go-choria/aagent/watchers/watcher" 21 iu "github.com/choria-io/go-choria/internal/util" 22 "github.com/google/shlex" 23 ) 24 25 type State int 26 27 const ( 28 Unknown State = iota 29 Skipped 30 Error 31 Success 32 33 wtype = "exec" 34 version = "v1" 35 ) 36 37 var stateNames = map[State]string{ 38 Unknown: "unknown", 39 Skipped: "skipped", 40 Error: "error", 41 Success: "success", 42 } 43 44 type Properties struct { 45 Command string 46 Environment []string 47 Governor string 48 GovernorTimeout time.Duration `mapstructure:"governor_timeout"` 49 OutputAsData bool `mapstructure:"parse_as_data"` 50 SuppressSuccessAnnounce bool `mapstructure:"suppress_success_announce"` 51 GatherInitialState bool `mapstructure:"gather_initial_state"` 52 Disown bool `mapstructure:"disown"` 53 Timeout time.Duration 54 } 55 56 type Watcher struct { 57 *watcher.Watcher 58 59 name string 60 machine model.Machine 61 previous State 62 interval time.Duration 63 previousRunTime time.Duration 64 properties *Properties 65 66 lastWatch time.Time 67 68 wmu *sync.Mutex 69 mu *sync.Mutex 70 } 71 72 func New(machine model.Machine, name string, states []string, failEvent string, successEvent string, interval string, ai time.Duration, rawprop map[string]any) (any, error) { 73 var err error 74 75 exec := &Watcher{ 76 machine: machine, 77 name: name, 78 mu: &sync.Mutex{}, 79 wmu: &sync.Mutex{}, 80 properties: &Properties{ 81 Environment: []string{}, 82 }, 83 } 84 85 exec.Watcher, err = watcher.NewWatcher(name, wtype, ai, states, machine, failEvent, successEvent) 86 if err != nil { 87 return nil, err 88 } 89 90 err = exec.setProperties(rawprop) 91 if err != nil { 92 return nil, fmt.Errorf("could not set properties: %v", err) 93 } 94 95 if interval != "" { 96 exec.interval, err = iu.ParseDuration(interval) 97 if err != nil { 98 return nil, fmt.Errorf("invalid interval: %v", err) 99 } 100 101 if exec.interval < 500*time.Millisecond { 102 return nil, fmt.Errorf("interval %v is too small", exec.interval) 103 } 104 } 105 106 return exec, nil 107 } 108 109 func (w *Watcher) validate() error { 110 if w.properties.Command == "" { 111 return fmt.Errorf("command is required") 112 } 113 114 if w.properties.Timeout == 0 { 115 w.properties.Timeout = time.Second 116 } 117 118 if w.properties.Governor != "" && w.properties.GovernorTimeout == 0 { 119 w.Infof("Setting Governor timeout to 5 minutes while unset") 120 w.properties.GovernorTimeout = 5 * time.Minute 121 } 122 123 if w.properties.Disown && w.properties.OutputAsData { 124 return fmt.Errorf("cannot parse output as data while disowning child processes") 125 } 126 127 return nil 128 } 129 130 func (w *Watcher) setProperties(props map[string]any) error { 131 if w.properties == nil { 132 w.properties = &Properties{Environment: []string{}} 133 } 134 135 err := util.ParseMapStructure(props, w.properties) 136 if err != nil { 137 return err 138 } 139 140 return w.validate() 141 } 142 143 func (w *Watcher) Run(ctx context.Context, wg *sync.WaitGroup) { 144 defer wg.Done() 145 146 w.Infof("exec watcher for %s starting", w.properties.Command) 147 148 if w.interval != 0 { 149 wg.Add(1) 150 go w.intervalWatcher(ctx, wg) 151 } 152 153 for { 154 select { 155 case <-w.Watcher.StateChangeC(): 156 w.performWatch(ctx, true) 157 158 case <-ctx.Done(): 159 w.Infof("Stopping on context interrupt") 160 w.CancelGovernor() 161 return 162 } 163 } 164 } 165 166 func (w *Watcher) intervalWatcher(ctx context.Context, wg *sync.WaitGroup) { 167 defer wg.Done() 168 169 tick := time.NewTicker(w.interval) 170 if w.properties.GatherInitialState { 171 splay := time.Duration(rand.Intn(30)) * time.Second 172 w.Infof("Performing initial execution after %v", splay) 173 if splay < 1 { 174 splay = 1 175 } 176 177 tick.Reset(splay) 178 } 179 180 for { 181 select { 182 case <-tick.C: 183 w.performWatch(ctx, false) 184 tick.Reset(w.interval) 185 186 case <-ctx.Done(): 187 tick.Stop() 188 return 189 } 190 } 191 } 192 193 func (w *Watcher) performWatch(ctx context.Context, force bool) { 194 w.wmu.Lock() 195 defer w.wmu.Unlock() 196 197 if !force && time.Since(w.lastWatch) < w.interval { 198 return 199 } 200 201 err := w.handleCheck(w.watch(ctx)) 202 if err != nil { 203 w.Errorf("could not handle watcher event: %s", err) 204 } 205 } 206 207 func (w *Watcher) handleCheck(s State, err error) error { 208 w.Debugf("handling check for %s %s %v", w.properties.Command, stateNames[s], err) 209 210 w.mu.Lock() 211 w.previous = s 212 w.mu.Unlock() 213 214 switch s { 215 case Error: 216 if err != nil { 217 w.Errorf("Check failed: %s", err) 218 } 219 220 w.NotifyWatcherState(w.CurrentState()) 221 return w.FailureTransition() 222 223 case Success: 224 if !w.properties.SuppressSuccessAnnounce { 225 w.NotifyWatcherState(w.CurrentState()) 226 } 227 228 return w.SuccessTransition() 229 } 230 231 return nil 232 } 233 234 func (w *Watcher) CurrentState() any { 235 w.mu.Lock() 236 defer w.mu.Unlock() 237 238 s := &StateNotification{ 239 Event: event.New(w.name, wtype, version, w.machine), 240 Command: w.properties.Command, 241 PreviousOutcome: stateNames[w.previous], 242 PreviousRunTime: w.previousRunTime.Nanoseconds(), 243 } 244 245 return s 246 } 247 248 func (w *Watcher) watch(ctx context.Context) (state State, err error) { 249 if !w.ShouldWatch() { 250 return Skipped, nil 251 } 252 253 if w.properties.Governor != "" { 254 fin, err := w.EnterGovernor(ctx, w.properties.Governor, w.properties.GovernorTimeout) 255 if err != nil { 256 w.Errorf("Cannot enter Governor %s: %s", w.properties.Governor, err) 257 return Error, err 258 } 259 defer fin() 260 } 261 262 start := time.Now() 263 defer func() { 264 w.mu.Lock() 265 w.previousRunTime = time.Since(start) 266 w.mu.Unlock() 267 }() 268 269 w.Infof("Running %s", w.properties.Command) 270 271 timeoutCtx, cancel := context.WithTimeout(ctx, w.properties.Timeout) 272 defer cancel() 273 274 parsedCommand, err := w.ProcessTemplate(w.properties.Command) 275 if err != nil { 276 return Error, fmt.Errorf("could not process command template: %s", err) 277 } 278 279 splitcmd, err := shlex.Split(parsedCommand) 280 if err != nil { 281 w.Errorf("Exec watcher %s failed: %s", w.properties.Command, err) 282 return Error, err 283 } 284 285 if len(splitcmd) == 0 { 286 w.Errorf("Invalid command %q", w.properties.Command) 287 return Error, err 288 } 289 290 var args []string 291 if len(splitcmd) > 1 { 292 args = splitcmd[1:] 293 } 294 295 df, err := w.DataCopyFile() 296 if err != nil { 297 w.Errorf("Could not get a copy of the data into a temporary file, skipping execution: %s", err) 298 return Error, err 299 } 300 defer os.Remove(df) 301 302 ff, err := w.FactsFile() 303 if err != nil { 304 w.Errorf("Could not expose machine facts, skipping execution: %s", err) 305 return Error, err 306 } 307 defer os.Remove(ff) 308 309 var cmd *exec.Cmd 310 if w.properties.Disown { 311 cmd = exec.Command(splitcmd[0], args...) 312 } else { 313 cmd = exec.CommandContext(timeoutCtx, splitcmd[0], args...) 314 } 315 cmd.Dir = w.machine.Directory() 316 317 cmd.Env = append(cmd.Env, fmt.Sprintf("MACHINE_WATCHER_NAME=%s", w.name)) 318 cmd.Env = append(cmd.Env, fmt.Sprintf("MACHINE_NAME=%s", w.machine.Name())) 319 cmd.Env = append(cmd.Env, fmt.Sprintf("PATH=%s%s%s", os.Getenv("PATH"), string(os.PathListSeparator), w.machine.Directory())) 320 cmd.Env = append(cmd.Env, fmt.Sprintf("WATCHER_DATA=%s", df)) 321 cmd.Env = append(cmd.Env, fmt.Sprintf("WATCHER_FACTS=%s", ff)) 322 323 for _, e := range w.properties.Environment { 324 es, err := w.ProcessTemplate(e) 325 if err != nil { 326 return Error, fmt.Errorf("could not process environment template: %s", err) 327 } 328 cmd.Env = append(cmd.Env, es) 329 } 330 331 var output []byte 332 if w.properties.Disown { 333 w.Debugf("Running command disowned from parent") 334 err = cmd.Start() 335 if err != nil { 336 return 0, err 337 } 338 339 errc := make(chan error) 340 go func() { 341 errc <- cmd.Wait() 342 }() 343 344 select { 345 case err = <-errc: 346 case <-ctx.Done(): 347 err = ctx.Err() 348 } 349 } else { 350 output, err = cmd.CombinedOutput() 351 } 352 if err != nil { 353 w.Errorf("Exec watcher %s failed: %s", w.properties.Command, err) 354 return Error, err 355 } 356 357 w.Debugf("Output from %s: %s", w.properties.Command, output) 358 359 if w.properties.OutputAsData { 360 err = w.setOutputAsData(output) 361 if err != nil { 362 w.Errorf("Could not save output data: %s", err) 363 return Error, err 364 } 365 } 366 367 return Success, nil 368 } 369 370 func (w *Watcher) setOutputAsData(output []byte) error { 371 dat := map[string]string{} 372 err := json.Unmarshal(output, &dat) 373 if err != nil { 374 return err 375 } 376 377 for k, v := range dat { 378 err = w.machine.DataPut(k, v) 379 if err != nil { 380 return err 381 } 382 } 383 384 return nil 385 }