github.com/choria-io/go-choria@v0.28.1-0.20240416190746-b3bf9c7d5a45/aagent/watchers/pluginswatcher/plugins.go (about)

     1  // Copyright (c) 2021-2023, R.I. Pienaar and the Choria Project contributors
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  
     5  package pluginswatcher
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"encoding/base64"
    11  	"encoding/hex"
    12  	"encoding/json"
    13  	"fmt"
    14  	"os"
    15  	"path/filepath"
    16  	"strings"
    17  	"sync"
    18  	"text/template"
    19  	"time"
    20  
    21  	"github.com/choria-io/go-choria/aagent/model"
    22  	"github.com/choria-io/go-choria/aagent/util"
    23  	"github.com/choria-io/go-choria/aagent/watchers/event"
    24  	"github.com/choria-io/go-choria/aagent/watchers/watcher"
    25  	iu "github.com/choria-io/go-choria/internal/util"
    26  	"github.com/mitchellh/mapstructure"
    27  )
    28  
    29  type State int
    30  
    31  var (
    32  	// PublicKey allows a public key to be compiled in to the binary during CI while using a standard
    33  	// compiled in machine.yaml, effectively this is equivalent to setting the public_key property
    34  	PublicKey = ""
    35  )
    36  
    37  const (
    38  	Unknown State = iota
    39  	Skipped
    40  	Error
    41  	Updated
    42  	Unchanged
    43  
    44  	wtype   = "plugins"
    45  	version = "v1"
    46  )
    47  
    48  var stateNames = map[State]string{
    49  	Unknown:   "unknown",
    50  	Skipped:   "skipped",
    51  	Error:     "error",
    52  	Updated:   "updated",
    53  	Unchanged: "unchanged",
    54  }
    55  
    56  type ManagedPlugin struct {
    57  	Name                     string `json:"name" yaml:"name"`
    58  	NamePrefix               string `json:"-" yaml:"-"`
    59  	Source                   string `json:"source" yaml:"source"`
    60  	Username                 string `json:"username,omitempty" yaml:"username"`
    61  	Password                 string `json:"password,omitempty" yaml:"password"`
    62  	ContentChecksumsChecksum string `json:"verify_checksum" yaml:"verify_checksum" mapstructure:"verify_checksum"`
    63  	ArchiveChecksum          string `json:"checksum" yaml:"checksum" mapstructure:"checksum"`
    64  	Matcher                  string `json:"match,omitempty" yaml:"match" mapstructure:"match"`
    65  	Governor                 string `json:"governor,omitempty" yaml:"governor" mapstructure:"governor"`
    66  
    67  	Interval string `json:"-"`
    68  	Target   string `json:"-"`
    69  }
    70  
    71  type Properties struct {
    72  	// DataItem is the data item key to get ManagedPlugin from, typically sourced from Key-Value store
    73  	DataItem string `mapstructure:"data_item"`
    74  	// PurgeUnknown will remove plugins not declared in DataItem
    75  	PurgeUnknown bool `mapstructure:"purge_unknown"`
    76  	// PluginManageInterval is the interval that created management machines will use to manage their archives
    77  	PluginManageInterval time.Duration `mapstructure:"plugin_manage_interval"`
    78  	// PublicKey is the optional ed25519 public key used to sign the specification, when set
    79  	// the specification received will be validated and any invalid specification will be discarded
    80  	PublicKey string `mapstructure:"public_key"`
    81  	// Directory sets the directory where plugins are being deployed into, when empty defaults to plugins directory like /etc/choria/machines
    82  	Directory string `mapstructure:"plugins_directory"`
    83  	// ManagerMachinePrefix the prefix used in constructing names for the management machines
    84  	ManagerMachinePrefix string `mapstructure:"manager_machine_prefix"`
    85  }
    86  
    87  type Watcher struct {
    88  	*watcher.Watcher
    89  
    90  	name            string
    91  	machine         model.Machine
    92  	previous        State
    93  	interval        time.Duration
    94  	previousRunTime time.Duration
    95  	previousManaged []*ManagedPlugin
    96  	properties      *Properties
    97  
    98  	lastWatch time.Time
    99  
   100  	wmu *sync.Mutex
   101  	mu  *sync.Mutex
   102  }
   103  
   104  func New(machine model.Machine, name string, states []string, failEvent string, successEvent string, interval string, ai time.Duration, rawprop map[string]any) (any, error) {
   105  	var err error
   106  
   107  	plugins := &Watcher{
   108  		name:       name,
   109  		machine:    machine,
   110  		properties: &Properties{},
   111  		lastWatch:  time.Time{},
   112  		wmu:        &sync.Mutex{},
   113  		mu:         &sync.Mutex{},
   114  	}
   115  
   116  	plugins.Watcher, err = watcher.NewWatcher(name, wtype, ai, states, machine, failEvent, successEvent)
   117  	if err != nil {
   118  		return nil, err
   119  	}
   120  
   121  	err = plugins.setProperties(rawprop)
   122  	if err != nil {
   123  		return nil, fmt.Errorf("could not set properties: %v", err)
   124  	}
   125  
   126  	if interval != "" {
   127  		plugins.interval, err = iu.ParseDuration(interval)
   128  		if err != nil {
   129  			return nil, fmt.Errorf("invalid interval: %v", err)
   130  		}
   131  
   132  		if plugins.interval < 2*time.Second {
   133  			return nil, fmt.Errorf("interval %v is too small", plugins.interval)
   134  		}
   135  	}
   136  
   137  	// Loads the public key from plugin.choria.machine.signing_key when set, overriding the value set here
   138  	if pk := machine.SignerKey(); pk != "" {
   139  		plugins.properties.PublicKey = pk
   140  	}
   141  
   142  	return plugins, nil
   143  }
   144  
   145  func (w *Watcher) Run(ctx context.Context, wg *sync.WaitGroup) {
   146  	defer wg.Done()
   147  
   148  	w.Infof("plugins watcher %s starting", w.name)
   149  
   150  	if w.interval != 0 {
   151  		wg.Add(1)
   152  		go w.intervalWatcher(ctx, wg)
   153  	}
   154  
   155  	w.performWatch(ctx, false)
   156  
   157  	for {
   158  		select {
   159  		case <-w.Watcher.StateChangeC():
   160  			w.performWatch(ctx, true)
   161  
   162  		case <-ctx.Done():
   163  			w.Infof("Stopping on context interrupt")
   164  			w.CancelGovernor()
   165  			return
   166  		}
   167  	}
   168  }
   169  
   170  func (w *Watcher) watch(ctx context.Context) (state State, err error) {
   171  	if !w.ShouldWatch() {
   172  		return Skipped, nil
   173  	}
   174  
   175  	start := time.Now()
   176  	defer func() {
   177  		w.mu.Lock()
   178  		w.previousRunTime = time.Since(start)
   179  		w.mu.Unlock()
   180  	}()
   181  
   182  	desired, err := w.desiredState()
   183  	if err != nil {
   184  		return Error, err
   185  	}
   186  
   187  	w.mu.Lock()
   188  	w.previousManaged = desired
   189  	w.mu.Unlock()
   190  
   191  	purged := false
   192  	updated := false
   193  
   194  	if w.properties.PurgeUnknown {
   195  		purged, err = w.purgeUnknownPlugins(ctx, desired)
   196  		if err != nil {
   197  			return Error, err
   198  		}
   199  	}
   200  
   201  	for _, m := range desired {
   202  		if m == nil || m.Name == "" {
   203  			continue
   204  		}
   205  
   206  		match, err := w.isNodeMatch(m)
   207  		if err != nil {
   208  			w.Debugf("Could not match machine %s to node: %s", m.Name, err)
   209  			continue
   210  		}
   211  		if !match {
   212  			continue
   213  		}
   214  
   215  		targetDir := w.targetDirForManagerMachine(m.Name)
   216  		target := filepath.Join(targetDir, "machine.yaml")
   217  		spec, err := w.renderMachine(m)
   218  		if err != nil {
   219  			w.Errorf("Failed to render machine %s: %v", m.Name, err)
   220  			continue
   221  		}
   222  
   223  		if iu.FileExist(target) {
   224  			specHash, err := iu.Sha256HashBytes(spec)
   225  			if err != nil {
   226  				w.Errorf("Could not determine hash for spec for %s: %s", m.Name, err)
   227  				continue
   228  			}
   229  
   230  			ok, _, err := iu.FileHasSha256Sum(target, specHash)
   231  			if err != nil {
   232  				w.Errorf("Could not compare spec with target %s: %s", target, err)
   233  				continue
   234  			}
   235  
   236  			if ok {
   237  				w.Debugf("Machine in %s has the correct content, continuing", target)
   238  				continue
   239  			}
   240  
   241  			w.Warnf("Machine in %s has incorrect content, updating", target)
   242  
   243  			err = os.RemoveAll(targetDir)
   244  			if err != nil {
   245  				w.Errorf("Could not remove unmatched machine in %s: %s", targetDir, err)
   246  				return Error, err
   247  			}
   248  		}
   249  
   250  		w.Warnf("Deploying plugin %s from %s into %s", m.Name, m.Source, m.Target)
   251  
   252  		err = os.MkdirAll(targetDir, 0700)
   253  		if err != nil {
   254  			w.Errorf("Could not create directory for %s: %s", m.Name, err)
   255  			continue
   256  		}
   257  
   258  		err = os.WriteFile(target, spec, 0600)
   259  		if err != nil {
   260  			w.Errorf("Could not write machine spec for %s: %s", m.Name, err)
   261  			os.RemoveAll(targetDir)
   262  			continue
   263  		}
   264  
   265  		updated = true
   266  	}
   267  
   268  	if purged || updated {
   269  		return Updated, nil
   270  	}
   271  
   272  	return Unchanged, nil
   273  }
   274  
   275  func (w *Watcher) handleCheck(s State, err error) error {
   276  	w.Debugf("handling state for %s %v", stateNames[s], err)
   277  
   278  	w.mu.Lock()
   279  	w.previous = s
   280  	w.mu.Unlock()
   281  
   282  	switch s {
   283  	case Error:
   284  		if err != nil {
   285  			w.Errorf("Managing plugins failed: %s", err)
   286  		}
   287  
   288  		w.NotifyWatcherState(w.CurrentState())
   289  		return w.FailureTransition()
   290  
   291  	case Updated:
   292  		w.NotifyWatcherState(w.CurrentState())
   293  		return w.SuccessTransition()
   294  
   295  	}
   296  
   297  	return nil
   298  }
   299  
   300  func (w *Watcher) renderMachine(m *ManagedPlugin) ([]byte, error) {
   301  	buf := bytes.NewBuffer([]byte{})
   302  	t := template.New("machine")
   303  
   304  	p, err := t.Parse(string(mdat))
   305  	if err != nil {
   306  		return nil, err
   307  	}
   308  
   309  	err = p.Execute(buf, m)
   310  	if err != nil {
   311  		return nil, err
   312  	}
   313  
   314  	return buf.Bytes(), nil
   315  }
   316  
   317  func (w *Watcher) targetDirForManagedPlugins() string {
   318  	if w.properties.Directory != "" {
   319  		return w.properties.Directory
   320  	}
   321  
   322  	return filepath.Dir(w.machine.Directory())
   323  }
   324  
   325  func (w *Watcher) targetDirForManagerMachine(m string) string {
   326  	return filepath.Join(filepath.Dir(w.machine.Directory()), fmt.Sprintf("%s_%s", w.properties.ManagerMachinePrefix, m))
   327  }
   328  
   329  func (w *Watcher) targetDirForManagedPlugin(m string) string {
   330  	return filepath.Join(w.targetDirForManagedPlugins(), m)
   331  }
   332  
   333  func (w *Watcher) purgeUnknownPlugins(ctx context.Context, desired []*ManagedPlugin) (bool, error) {
   334  	current, err := w.currentPlugins()
   335  	if err != nil {
   336  		return false, err
   337  	}
   338  
   339  	w.Debugf("Purging unknown plugins from current list %v", current)
   340  
   341  	purged := false
   342  	for _, m := range current {
   343  		keep := false
   344  		for _, d := range desired {
   345  			if d == nil || d.Name == "" {
   346  				continue
   347  			}
   348  
   349  			if m == d.Name {
   350  				if ok, _ := w.isNodeMatch(d); ok {
   351  					keep = true
   352  					break
   353  				}
   354  			}
   355  		}
   356  
   357  		if !keep {
   358  			w.Warnf("Removing existing managed machine %s that is not in new desired set", m)
   359  			target := w.targetDirForManagerMachine(m)
   360  			err = os.RemoveAll(target)
   361  			if err != nil {
   362  				w.Errorf("Could not remove %s: %s", target, err)
   363  				continue
   364  			}
   365  
   366  			w.Debugf("Sleeping for 2 seconds to allow manager to exit")
   367  			err = iu.InterruptibleSleep(ctx, 2*time.Second)
   368  			if err != nil {
   369  				return false, err
   370  			}
   371  
   372  			target = w.targetDirForManagedPlugin(m)
   373  			err = os.RemoveAll(target)
   374  			if err != nil {
   375  				w.Errorf("Could not remove %s: %s", target, err)
   376  				continue
   377  			}
   378  
   379  			purged = true
   380  		}
   381  	}
   382  
   383  	return purged, nil
   384  }
   385  
   386  func (w *Watcher) currentPlugins() ([]string, error) {
   387  	dirs, err := os.ReadDir(w.targetDirForManagedPlugins())
   388  	if err != nil {
   389  		return nil, err
   390  	}
   391  
   392  	var found []string
   393  
   394  	for _, e := range dirs {
   395  		if !e.IsDir() {
   396  			continue
   397  		}
   398  
   399  		parts := strings.SplitN(e.Name(), "_", 2)
   400  		if len(parts) != 2 {
   401  			continue
   402  		}
   403  
   404  		if parts[0] == w.properties.ManagerMachinePrefix {
   405  			found = append(found, parts[1])
   406  		}
   407  	}
   408  
   409  	return found, nil
   410  }
   411  
   412  func (w *Watcher) loadAndValidateData() ([]byte, error) {
   413  	dat, ok := w.machine.DataGet(w.properties.DataItem)
   414  	if !ok {
   415  		return nil, fmt.Errorf("data item %s not present", w.properties.DataItem)
   416  	}
   417  
   418  	spec := &Specification{}
   419  	decoder, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
   420  		DecodeHook:       mapstructure.ComposeDecodeHookFunc(mapstructure.StringToTimeDurationHookFunc()),
   421  		Result:           &spec,
   422  		WeaklyTypedInput: true,
   423  	})
   424  	if err != nil {
   425  		return nil, err
   426  	}
   427  
   428  	err = decoder.Decode(dat)
   429  	if err != nil {
   430  		return nil, err
   431  	}
   432  
   433  	if w.properties.PublicKey != "" {
   434  		if len(spec.Signature) == 0 {
   435  			w.Errorf("No signature found in specification, removing data")
   436  			w.machine.DataDelete(w.properties.DataItem)
   437  			return nil, fmt.Errorf("invalid data_item")
   438  		}
   439  
   440  		pk, err := hex.DecodeString(w.properties.PublicKey)
   441  		if err != nil {
   442  			w.Errorf("invalid public key %s: %s", w.properties.PublicKey, err)
   443  			return nil, fmt.Errorf("invalid data_item")
   444  		}
   445  
   446  		verified, _ := spec.VerifySignature(pk)
   447  		if !verified {
   448  			w.Errorf("Signature in data_item %s did not verify using configured public key '%s', removing data", w.properties.DataItem, w.properties.PublicKey)
   449  			w.machine.DataDelete(w.properties.DataItem)
   450  			return nil, fmt.Errorf("invalid data_item")
   451  		}
   452  	}
   453  
   454  	pb, err := base64.StdEncoding.DecodeString(spec.Plugins)
   455  	if err != nil {
   456  		return nil, err
   457  	}
   458  
   459  	return pb, nil
   460  }
   461  
   462  func (w *Watcher) desiredState() ([]*ManagedPlugin, error) {
   463  	data, err := w.loadAndValidateData()
   464  	if err != nil {
   465  		return nil, err
   466  	}
   467  
   468  	var desired []*ManagedPlugin
   469  
   470  	err = json.Unmarshal(data, &desired)
   471  	if err != nil {
   472  		return nil, fmt.Errorf("invalid plugins specification: %s", err)
   473  	}
   474  
   475  	for _, m := range desired {
   476  		m.NamePrefix = w.properties.ManagerMachinePrefix
   477  		m.Interval = w.properties.PluginManageInterval.String()
   478  		m.Target = w.targetDirForManagedPlugins()
   479  
   480  		if m.Name == "" {
   481  			return nil, fmt.Errorf("name is required")
   482  		}
   483  
   484  		if m.Source == "" {
   485  			return nil, fmt.Errorf("source is required for %s", m.Name)
   486  		}
   487  
   488  		if m.ArchiveChecksum == "" {
   489  			return nil, fmt.Errorf("checksum is required for %s", m.Name)
   490  		}
   491  
   492  		if m.Target == "" {
   493  			return nil, fmt.Errorf("could not determine target for managed plugin for %s", m.Name)
   494  		}
   495  
   496  		if m.ContentChecksumsChecksum == "" {
   497  			return nil, fmt.Errorf("verify_checksum is required for %s", m.Name)
   498  		}
   499  	}
   500  
   501  	return desired, nil
   502  }
   503  
   504  func (w *Watcher) performWatch(ctx context.Context, force bool) {
   505  	w.wmu.Lock()
   506  	defer w.wmu.Unlock()
   507  
   508  	if !force && time.Since(w.lastWatch) < w.interval {
   509  		return
   510  	}
   511  
   512  	err := w.handleCheck(w.watch(ctx))
   513  	if err != nil {
   514  		w.Errorf("could not handle watcher event: %s", err)
   515  	}
   516  }
   517  
   518  func (w *Watcher) intervalWatcher(ctx context.Context, wg *sync.WaitGroup) {
   519  	defer wg.Done()
   520  
   521  	tick := time.NewTicker(w.interval)
   522  
   523  	for {
   524  		select {
   525  		case <-tick.C:
   526  			w.performWatch(ctx, false)
   527  
   528  		case <-ctx.Done():
   529  			tick.Stop()
   530  			return
   531  		}
   532  	}
   533  }
   534  
   535  func (w *Watcher) setProperties(props map[string]any) error {
   536  	if w.properties == nil {
   537  		w.properties = &Properties{}
   538  	}
   539  
   540  	err := util.ParseMapStructure(props, w.properties)
   541  	if err != nil {
   542  		return err
   543  	}
   544  
   545  	if PublicKey != "" {
   546  		w.properties.PublicKey = PublicKey
   547  	}
   548  
   549  	if w.properties.ManagerMachinePrefix == "" {
   550  		w.properties.ManagerMachinePrefix = "mm"
   551  	}
   552  
   553  	return w.validate()
   554  }
   555  
   556  func (w *Watcher) validate() error {
   557  	if w.properties.DataItem == "" {
   558  		return fmt.Errorf("data_item is required")
   559  	}
   560  	if w.machine.Directory() == "" && w.properties.Directory == "" {
   561  		return fmt.Errorf("machine store is not configured")
   562  	}
   563  
   564  	if strings.Contains(w.properties.ManagerMachinePrefix, "_") {
   565  		return fmt.Errorf("manager_machine_prefix may not contain underscore")
   566  	}
   567  
   568  	if w.properties.PluginManageInterval == 0 {
   569  		w.properties.PluginManageInterval = 2 * time.Minute
   570  	}
   571  
   572  	return nil
   573  }
   574  
   575  func (w *Watcher) CurrentState() any {
   576  	w.mu.Lock()
   577  	defer w.mu.Unlock()
   578  
   579  	s := &StateNotification{
   580  		Event:                  event.New(w.name, wtype, version, w.machine),
   581  		PreviousManagedPlugins: []string{},
   582  		PreviousOutcome:        stateNames[w.previous],
   583  		PreviousRunTime:        w.previousRunTime.Nanoseconds(),
   584  	}
   585  
   586  	for _, m := range w.previousManaged {
   587  		s.PreviousManagedPlugins = append(s.PreviousManagedPlugins, m.Name)
   588  	}
   589  
   590  	return s
   591  }