github.com/choria-io/go-choria@v0.28.1-0.20240416190746-b3bf9c7d5a45/providers/agent/mcorpc/external/provider.go (about) 1 // Copyright (c) 2020-2021, R.I. Pienaar and the Choria Project contributors 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 5 package external 6 7 import ( 8 "context" 9 "fmt" 10 "io/fs" 11 "os" 12 "path/filepath" 13 "strings" 14 "sync" 15 "time" 16 17 "github.com/choria-io/go-choria/backoff" 18 "github.com/sirupsen/logrus" 19 20 "github.com/choria-io/go-choria/config" 21 "github.com/choria-io/go-choria/inter" 22 "github.com/choria-io/go-choria/internal/util" 23 "github.com/choria-io/go-choria/providers/agent/mcorpc/ddl/agent" 24 "github.com/choria-io/go-choria/server" 25 ) 26 27 var ( 28 // agents we do not ever wish to load from external agents 29 denyList = []string{"rpcutil", "choria_util", "choria_provision", "choria_registry", "discovery", "scout"} 30 // we only consider ddl files modified longer than this ago for reconciliation 31 fileChangeGrace = 5 * time.Second 32 ) 33 34 // Provider is a Choria Agent Provider that supports calling agents external to the 35 // choria process written in any language 36 type Provider struct { 37 cfg *config.Config 38 log *logrus.Entry 39 agents []*agent.DDL 40 paths map[string]string 41 mu sync.Mutex 42 } 43 44 // Initialize configures the agent provider 45 func (p *Provider) Initialize(cfg *config.Config, log *logrus.Entry) { 46 p.cfg = cfg 47 p.log = log.WithFields(logrus.Fields{"provider": "external"}) 48 p.paths = map[string]string{} 49 } 50 51 // RegisterAgents registers known ruby agents using a shim agent and starts a background reconciliation loop to add/remove/update agents without restarts 52 func (p *Provider) RegisterAgents(ctx context.Context, mgr server.AgentManager, connector inter.AgentConnector, log *logrus.Entry) error { 53 go p.watchAgents(ctx, mgr, connector) 54 55 return nil 56 } 57 58 func (p *Provider) upgradeExistingAgents(foundAgents []*agent.DDL, mgr server.AgentManager) error { 59 for i, currentDDL := range p.agents { 60 candidateDDL := findInAgentList(foundAgents, func(a *agent.DDL) bool { 61 if a.Metadata.Name != currentDDL.Metadata.Name { 62 return false 63 } 64 65 // we check the ddl location so that moving a agent to a different place, even when versions match will also reload it 66 if a.Metadata.Version == currentDDL.Metadata.Version && a.SourceLocation == currentDDL.SourceLocation { 67 return false 68 } 69 70 return p.shouldProcessModifiedDDL(a.SourceLocation) 71 }) 72 73 if candidateDDL == nil { 74 continue 75 } 76 77 newAgent, err := p.newExternalAgent(candidateDDL, mgr) 78 if err != nil { 79 p.log.Errorf("Could not create upgraded external agent %v: %v", candidateDDL.Metadata.Name, err) 80 continue 81 } 82 83 err = mgr.ReplaceAgent(candidateDDL.Metadata.Name, newAgent) 84 if err != nil { 85 p.log.Errorf("Could not replace upgraded external agent %v: %v", candidateDDL.Metadata.Name, err) 86 continue 87 } 88 89 p.agents[i] = candidateDDL 90 p.paths[candidateDDL.Metadata.Name] = candidateDDL.SourceLocation 91 } 92 93 return nil 94 } 95 96 func (p *Provider) removeOrphanAgents(foundAgents []*agent.DDL, mgr server.AgentManager, connector inter.AgentConnector) error { 97 var remove []int 98 99 for i, known := range p.agents { 100 found := findInAgentList(foundAgents, func(a *agent.DDL) bool { 101 return a.Metadata.Name == known.Metadata.Name 102 }) 103 104 if found == nil { 105 p.log.Infof("Removing agent %s after the DDL %s was removed", known.Metadata.Name, known.SourceLocation) 106 err := mgr.UnregisterAgent(known.Metadata.Name, connector) 107 if err != nil { 108 p.log.Errorf("Could not unregister agent %v: %v", known.Metadata.Name, err) 109 continue 110 } 111 112 delete(p.paths, known.Metadata.Name) 113 remove = append(remove, i) 114 } 115 } 116 117 for _, i := range remove { 118 p.agents = append(p.agents[:i], p.agents[i+1:]...) 119 } 120 121 return nil 122 } 123 124 func (p *Provider) registerNewAgents(ctx context.Context, foundAgents []*agent.DDL, mgr server.AgentManager, connector inter.AgentConnector) error { 125 for _, candidateDDL := range foundAgents { 126 found := findInAgentList(p.agents, func(a *agent.DDL) bool { 127 return candidateDDL.Metadata.Name == a.Metadata.Name 128 }) 129 130 if found == nil && p.shouldProcessModifiedDDL(candidateDDL.SourceLocation) { 131 p.log.Debugf("Registering new agent %v version %v from %s", candidateDDL.Metadata.Name, candidateDDL.Metadata.Version, candidateDDL.SourceLocation) 132 agent, err := p.newExternalAgent(candidateDDL, mgr) 133 if err != nil { 134 p.log.Errorf("Could not register external agent %s: %s", agent.Name(), err) 135 continue 136 } 137 138 err = mgr.RegisterAgent(ctx, agent.Name(), agent, connector) 139 if err != nil { 140 p.log.Errorf("Could not register external agent %s: %s", agent.Name(), err) 141 continue 142 } 143 144 p.agents = append(p.agents, candidateDDL) 145 p.paths[candidateDDL.Metadata.Name] = candidateDDL.SourceLocation 146 } 147 } 148 149 return nil 150 } 151 152 func (p *Provider) shouldProcessModifiedDDL(path string) bool { 153 if path == "" { 154 return false 155 } 156 157 stat, err := os.Stat(path) 158 if err != nil { 159 p.log.Errorf("Could not determine age of DDL file %v: %v", path, err) 160 return false 161 } 162 163 since := time.Since(stat.ModTime()) 164 if since < fileChangeGrace { 165 p.log.Debugf("Skipping updated DDL file %v that is %v old", path, since) 166 return false 167 } 168 169 return true 170 } 171 172 func (p *Provider) reconcileAgents(ctx context.Context, mgr server.AgentManager, connector inter.AgentConnector) error { 173 p.mu.Lock() 174 defer p.mu.Unlock() 175 176 p.log.Debugf("Reconciling external agents from disk with running agents") 177 178 var foundAgents []*agent.DDL 179 p.eachAgent(func(candidateDDL *agent.DDL) { 180 if candidateDDL.SourceLocation == "" { 181 return 182 } 183 184 foundAgents = append(foundAgents, candidateDDL) 185 }) 186 187 p.log.Debugf("Found %d external agents on disk", len(foundAgents)) 188 189 err := p.registerNewAgents(ctx, foundAgents, mgr, connector) 190 if err != nil { 191 p.log.Warnf("Could not register new agents: %v", err) 192 } 193 194 err = p.upgradeExistingAgents(foundAgents, mgr) 195 if err != nil { 196 p.log.Warnf("Could not upgrade existing agents: %v", err) 197 } 198 199 err = p.removeOrphanAgents(foundAgents, mgr, connector) 200 if err != nil { 201 p.log.Warnf("Could not remove orphaned agents: %v", err) 202 } 203 204 return nil 205 } 206 207 func (p *Provider) watchAgents(ctx context.Context, mgr server.AgentManager, connector inter.AgentConnector) { 208 err := p.reconcileAgents(ctx, mgr, connector) 209 if err != nil { 210 p.log.Errorf("Initial agent reconcile failed: %v", err) 211 } 212 213 count := 1 214 ticker := time.NewTicker(backoff.TwentySec.Duration(count)) 215 216 for { 217 select { 218 case <-ticker.C: 219 err := p.reconcileAgents(ctx, mgr, connector) 220 if err != nil { 221 p.log.Errorf("Reconciling agents failed: %v", err) 222 } 223 224 count++ 225 ticker.Reset(backoff.TwentySec.Duration(count)) 226 227 case <-ctx.Done(): 228 return 229 } 230 } 231 } 232 233 // Agents provides a list of loaded agent DDLs 234 func (p *Provider) Agents() []*agent.DDL { 235 p.mu.Lock() 236 defer p.mu.Unlock() 237 238 dst := make([]*agent.DDL, len(p.agents)) 239 copy(dst, p.agents) 240 241 return dst 242 } 243 244 // Version reports the version for this provider 245 func (p *Provider) Version() string { 246 return fmt.Sprintf("%s version %s", p.PluginName(), p.PluginVersion()) 247 } 248 249 func (p *Provider) agentDDL(a string) (*agent.DDL, bool) { 250 p.mu.Lock() 251 defer p.mu.Unlock() 252 253 for _, agent := range p.agents { 254 if agent.Metadata.Name == a { 255 return agent, true 256 } 257 } 258 259 return nil, false 260 } 261 262 // walks the plugin.choria.agent_provider.mcorpc.libdir directories looking for agents. 263 // 264 // we support $dir/agent.json and $dir/agent/agent.json 265 func (p *Provider) eachAgent(cb func(ddl *agent.DDL)) { 266 for _, libDir := range p.cfg.Choria.RubyLibdir { 267 agentsDir := filepath.Join(libDir, "mcollective", "agent") 268 269 p.log.Debugf("Attempting to load External agents from %s", agentsDir) 270 271 err := filepath.WalkDir(agentsDir, func(path string, info fs.DirEntry, err error) error { 272 if err != nil || path == agentsDir { 273 return err 274 } 275 276 // if early on we decide to skip dir, this will hold that and used everywhere we return on error 277 var retErr error 278 279 // either x.json or x in the case of a directory holding a ddl 280 fname := info.Name() 281 282 if fname == "tmp" { 283 return retErr 284 } 285 286 // full path, which in the case of a directory holding a ddl will be adjusted to the nested one 287 ddlPath := path 288 289 if info.IsDir() { 290 // We dont want to keep walking into directory so we check if the 291 // ddl matching fname exist then just use that, but we avoid 292 // traversing nested directories 293 ddlPath = filepath.Join(path, fmt.Sprintf("%s.json", fname)) 294 retErr = fs.SkipDir 295 } 296 297 if !util.FileExist(ddlPath) { 298 return retErr 299 } 300 301 ext := filepath.Ext(ddlPath) 302 name := strings.TrimSuffix(fname, ext) 303 304 if ext != ".json" { 305 return retErr 306 } 307 308 p.log.Debugf("Attempting to load %s as an agent DDL", ddlPath) 309 ddl, err := agent.New(ddlPath) 310 if err != nil { 311 p.log.Errorf("Could not load agent DDL %s: %s", ddlPath, err) 312 return retErr 313 } 314 315 if ddl.Metadata.Provider != "external" { 316 return nil 317 } 318 319 if !shouldLoadAgent(name) { 320 p.log.Warnf("External agents are not allowed to supply an agent called '%s', skipping", name) 321 return retErr 322 } 323 324 cb(ddl) 325 326 return retErr 327 }) 328 329 if err != nil { 330 p.log.Errorf("Could not find agents in %s: %s", agentsDir, err) 331 } 332 } 333 } 334 335 func findInAgentList(agents []*agent.DDL, cb func(*agent.DDL) bool) *agent.DDL { 336 for _, d := range agents { 337 if cb(d) { 338 return d 339 } 340 } 341 342 return nil 343 } 344 345 func shouldLoadAgent(name string) bool { 346 for _, a := range denyList { 347 if a == name { 348 return false 349 } 350 } 351 352 return true 353 }