github.com/choria-io/go-choria@v0.28.1-0.20240416190746-b3bf9c7d5a45/providers/agent/mcorpc/external/provider.go (about)

     1  // Copyright (c) 2020-2021, R.I. Pienaar and the Choria Project contributors
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  
     5  package external
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"io/fs"
    11  	"os"
    12  	"path/filepath"
    13  	"strings"
    14  	"sync"
    15  	"time"
    16  
    17  	"github.com/choria-io/go-choria/backoff"
    18  	"github.com/sirupsen/logrus"
    19  
    20  	"github.com/choria-io/go-choria/config"
    21  	"github.com/choria-io/go-choria/inter"
    22  	"github.com/choria-io/go-choria/internal/util"
    23  	"github.com/choria-io/go-choria/providers/agent/mcorpc/ddl/agent"
    24  	"github.com/choria-io/go-choria/server"
    25  )
    26  
    27  var (
    28  	// agents we do not ever wish to load from external agents
    29  	denyList = []string{"rpcutil", "choria_util", "choria_provision", "choria_registry", "discovery", "scout"}
    30  	// we only consider ddl files modified longer than this ago for reconciliation
    31  	fileChangeGrace = 5 * time.Second
    32  )
    33  
    34  // Provider is a Choria Agent Provider that supports calling agents external to the
    35  // choria process written in any language
    36  type Provider struct {
    37  	cfg    *config.Config
    38  	log    *logrus.Entry
    39  	agents []*agent.DDL
    40  	paths  map[string]string
    41  	mu     sync.Mutex
    42  }
    43  
    44  // Initialize configures the agent provider
    45  func (p *Provider) Initialize(cfg *config.Config, log *logrus.Entry) {
    46  	p.cfg = cfg
    47  	p.log = log.WithFields(logrus.Fields{"provider": "external"})
    48  	p.paths = map[string]string{}
    49  }
    50  
    51  // RegisterAgents registers known ruby agents using a shim agent and starts a background reconciliation loop to add/remove/update agents without restarts
    52  func (p *Provider) RegisterAgents(ctx context.Context, mgr server.AgentManager, connector inter.AgentConnector, log *logrus.Entry) error {
    53  	go p.watchAgents(ctx, mgr, connector)
    54  
    55  	return nil
    56  }
    57  
    58  func (p *Provider) upgradeExistingAgents(foundAgents []*agent.DDL, mgr server.AgentManager) error {
    59  	for i, currentDDL := range p.agents {
    60  		candidateDDL := findInAgentList(foundAgents, func(a *agent.DDL) bool {
    61  			if a.Metadata.Name != currentDDL.Metadata.Name {
    62  				return false
    63  			}
    64  
    65  			// we check the ddl location so that moving a agent to a different place, even when versions match will also reload it
    66  			if a.Metadata.Version == currentDDL.Metadata.Version && a.SourceLocation == currentDDL.SourceLocation {
    67  				return false
    68  			}
    69  
    70  			return p.shouldProcessModifiedDDL(a.SourceLocation)
    71  		})
    72  
    73  		if candidateDDL == nil {
    74  			continue
    75  		}
    76  
    77  		newAgent, err := p.newExternalAgent(candidateDDL, mgr)
    78  		if err != nil {
    79  			p.log.Errorf("Could not create upgraded external agent %v: %v", candidateDDL.Metadata.Name, err)
    80  			continue
    81  		}
    82  
    83  		err = mgr.ReplaceAgent(candidateDDL.Metadata.Name, newAgent)
    84  		if err != nil {
    85  			p.log.Errorf("Could not replace upgraded external agent %v: %v", candidateDDL.Metadata.Name, err)
    86  			continue
    87  		}
    88  
    89  		p.agents[i] = candidateDDL
    90  		p.paths[candidateDDL.Metadata.Name] = candidateDDL.SourceLocation
    91  	}
    92  
    93  	return nil
    94  }
    95  
    96  func (p *Provider) removeOrphanAgents(foundAgents []*agent.DDL, mgr server.AgentManager, connector inter.AgentConnector) error {
    97  	var remove []int
    98  
    99  	for i, known := range p.agents {
   100  		found := findInAgentList(foundAgents, func(a *agent.DDL) bool {
   101  			return a.Metadata.Name == known.Metadata.Name
   102  		})
   103  
   104  		if found == nil {
   105  			p.log.Infof("Removing agent %s after the DDL %s was removed", known.Metadata.Name, known.SourceLocation)
   106  			err := mgr.UnregisterAgent(known.Metadata.Name, connector)
   107  			if err != nil {
   108  				p.log.Errorf("Could not unregister agent %v: %v", known.Metadata.Name, err)
   109  				continue
   110  			}
   111  
   112  			delete(p.paths, known.Metadata.Name)
   113  			remove = append(remove, i)
   114  		}
   115  	}
   116  
   117  	for _, i := range remove {
   118  		p.agents = append(p.agents[:i], p.agents[i+1:]...)
   119  	}
   120  
   121  	return nil
   122  }
   123  
   124  func (p *Provider) registerNewAgents(ctx context.Context, foundAgents []*agent.DDL, mgr server.AgentManager, connector inter.AgentConnector) error {
   125  	for _, candidateDDL := range foundAgents {
   126  		found := findInAgentList(p.agents, func(a *agent.DDL) bool {
   127  			return candidateDDL.Metadata.Name == a.Metadata.Name
   128  		})
   129  
   130  		if found == nil && p.shouldProcessModifiedDDL(candidateDDL.SourceLocation) {
   131  			p.log.Debugf("Registering new agent %v version %v from %s", candidateDDL.Metadata.Name, candidateDDL.Metadata.Version, candidateDDL.SourceLocation)
   132  			agent, err := p.newExternalAgent(candidateDDL, mgr)
   133  			if err != nil {
   134  				p.log.Errorf("Could not register external agent %s: %s", agent.Name(), err)
   135  				continue
   136  			}
   137  
   138  			err = mgr.RegisterAgent(ctx, agent.Name(), agent, connector)
   139  			if err != nil {
   140  				p.log.Errorf("Could not register external agent %s: %s", agent.Name(), err)
   141  				continue
   142  			}
   143  
   144  			p.agents = append(p.agents, candidateDDL)
   145  			p.paths[candidateDDL.Metadata.Name] = candidateDDL.SourceLocation
   146  		}
   147  	}
   148  
   149  	return nil
   150  }
   151  
   152  func (p *Provider) shouldProcessModifiedDDL(path string) bool {
   153  	if path == "" {
   154  		return false
   155  	}
   156  
   157  	stat, err := os.Stat(path)
   158  	if err != nil {
   159  		p.log.Errorf("Could not determine age of DDL file %v: %v", path, err)
   160  		return false
   161  	}
   162  
   163  	since := time.Since(stat.ModTime())
   164  	if since < fileChangeGrace {
   165  		p.log.Debugf("Skipping updated DDL file %v that is %v old", path, since)
   166  		return false
   167  	}
   168  
   169  	return true
   170  }
   171  
   172  func (p *Provider) reconcileAgents(ctx context.Context, mgr server.AgentManager, connector inter.AgentConnector) error {
   173  	p.mu.Lock()
   174  	defer p.mu.Unlock()
   175  
   176  	p.log.Debugf("Reconciling external agents from disk with running agents")
   177  
   178  	var foundAgents []*agent.DDL
   179  	p.eachAgent(func(candidateDDL *agent.DDL) {
   180  		if candidateDDL.SourceLocation == "" {
   181  			return
   182  		}
   183  
   184  		foundAgents = append(foundAgents, candidateDDL)
   185  	})
   186  
   187  	p.log.Debugf("Found %d external agents on disk", len(foundAgents))
   188  
   189  	err := p.registerNewAgents(ctx, foundAgents, mgr, connector)
   190  	if err != nil {
   191  		p.log.Warnf("Could not register new agents: %v", err)
   192  	}
   193  
   194  	err = p.upgradeExistingAgents(foundAgents, mgr)
   195  	if err != nil {
   196  		p.log.Warnf("Could not upgrade existing agents: %v", err)
   197  	}
   198  
   199  	err = p.removeOrphanAgents(foundAgents, mgr, connector)
   200  	if err != nil {
   201  		p.log.Warnf("Could not remove orphaned agents: %v", err)
   202  	}
   203  
   204  	return nil
   205  }
   206  
   207  func (p *Provider) watchAgents(ctx context.Context, mgr server.AgentManager, connector inter.AgentConnector) {
   208  	err := p.reconcileAgents(ctx, mgr, connector)
   209  	if err != nil {
   210  		p.log.Errorf("Initial agent reconcile failed: %v", err)
   211  	}
   212  
   213  	count := 1
   214  	ticker := time.NewTicker(backoff.TwentySec.Duration(count))
   215  
   216  	for {
   217  		select {
   218  		case <-ticker.C:
   219  			err := p.reconcileAgents(ctx, mgr, connector)
   220  			if err != nil {
   221  				p.log.Errorf("Reconciling agents failed: %v", err)
   222  			}
   223  
   224  			count++
   225  			ticker.Reset(backoff.TwentySec.Duration(count))
   226  
   227  		case <-ctx.Done():
   228  			return
   229  		}
   230  	}
   231  }
   232  
   233  // Agents provides a list of loaded agent DDLs
   234  func (p *Provider) Agents() []*agent.DDL {
   235  	p.mu.Lock()
   236  	defer p.mu.Unlock()
   237  
   238  	dst := make([]*agent.DDL, len(p.agents))
   239  	copy(dst, p.agents)
   240  
   241  	return dst
   242  }
   243  
   244  // Version reports the version for this provider
   245  func (p *Provider) Version() string {
   246  	return fmt.Sprintf("%s version %s", p.PluginName(), p.PluginVersion())
   247  }
   248  
   249  func (p *Provider) agentDDL(a string) (*agent.DDL, bool) {
   250  	p.mu.Lock()
   251  	defer p.mu.Unlock()
   252  
   253  	for _, agent := range p.agents {
   254  		if agent.Metadata.Name == a {
   255  			return agent, true
   256  		}
   257  	}
   258  
   259  	return nil, false
   260  }
   261  
   262  // walks the plugin.choria.agent_provider.mcorpc.libdir directories looking for agents.
   263  //
   264  // we support $dir/agent.json and $dir/agent/agent.json
   265  func (p *Provider) eachAgent(cb func(ddl *agent.DDL)) {
   266  	for _, libDir := range p.cfg.Choria.RubyLibdir {
   267  		agentsDir := filepath.Join(libDir, "mcollective", "agent")
   268  
   269  		p.log.Debugf("Attempting to load External agents from %s", agentsDir)
   270  
   271  		err := filepath.WalkDir(agentsDir, func(path string, info fs.DirEntry, err error) error {
   272  			if err != nil || path == agentsDir {
   273  				return err
   274  			}
   275  
   276  			// if early on we decide to skip dir, this will hold that and used everywhere we return on error
   277  			var retErr error
   278  
   279  			// either x.json or x in the case of a directory holding a ddl
   280  			fname := info.Name()
   281  
   282  			if fname == "tmp" {
   283  				return retErr
   284  			}
   285  
   286  			// full path, which in the case of a directory holding a ddl will be adjusted to the nested one
   287  			ddlPath := path
   288  
   289  			if info.IsDir() {
   290  				// We dont want to keep walking into directory so we check if the
   291  				// ddl matching fname exist then just use that, but we avoid
   292  				// traversing nested directories
   293  				ddlPath = filepath.Join(path, fmt.Sprintf("%s.json", fname))
   294  				retErr = fs.SkipDir
   295  			}
   296  
   297  			if !util.FileExist(ddlPath) {
   298  				return retErr
   299  			}
   300  
   301  			ext := filepath.Ext(ddlPath)
   302  			name := strings.TrimSuffix(fname, ext)
   303  
   304  			if ext != ".json" {
   305  				return retErr
   306  			}
   307  
   308  			p.log.Debugf("Attempting to load %s as an agent DDL", ddlPath)
   309  			ddl, err := agent.New(ddlPath)
   310  			if err != nil {
   311  				p.log.Errorf("Could not load agent DDL %s: %s", ddlPath, err)
   312  				return retErr
   313  			}
   314  
   315  			if ddl.Metadata.Provider != "external" {
   316  				return nil
   317  			}
   318  
   319  			if !shouldLoadAgent(name) {
   320  				p.log.Warnf("External agents are not allowed to supply an agent called '%s', skipping", name)
   321  				return retErr
   322  			}
   323  
   324  			cb(ddl)
   325  
   326  			return retErr
   327  		})
   328  
   329  		if err != nil {
   330  			p.log.Errorf("Could not find agents in %s: %s", agentsDir, err)
   331  		}
   332  	}
   333  }
   334  
   335  func findInAgentList(agents []*agent.DDL, cb func(*agent.DDL) bool) *agent.DDL {
   336  	for _, d := range agents {
   337  		if cb(d) {
   338  			return d
   339  		}
   340  	}
   341  
   342  	return nil
   343  }
   344  
   345  func shouldLoadAgent(name string) bool {
   346  	for _, a := range denyList {
   347  		if a == name {
   348  			return false
   349  		}
   350  	}
   351  
   352  	return true
   353  }