github.com/netdata/go.d.plugin@v0.58.1/agent/agent.go (about) 1 // SPDX-License-Identifier: GPL-3.0-or-later 2 3 package agent 4 5 import ( 6 "context" 7 "io" 8 "log/slog" 9 "os" 10 "os/signal" 11 "sync" 12 "syscall" 13 "time" 14 15 "github.com/netdata/go.d.plugin/agent/confgroup" 16 "github.com/netdata/go.d.plugin/agent/discovery" 17 "github.com/netdata/go.d.plugin/agent/filelock" 18 "github.com/netdata/go.d.plugin/agent/filestatus" 19 "github.com/netdata/go.d.plugin/agent/functions" 20 "github.com/netdata/go.d.plugin/agent/jobmgr" 21 "github.com/netdata/go.d.plugin/agent/module" 22 "github.com/netdata/go.d.plugin/agent/netdataapi" 23 "github.com/netdata/go.d.plugin/agent/safewriter" 24 "github.com/netdata/go.d.plugin/agent/vnodes" 25 "github.com/netdata/go.d.plugin/logger" 26 "github.com/netdata/go.d.plugin/pkg/multipath" 27 28 "github.com/mattn/go-isatty" 29 ) 30 31 var isTerminal = isatty.IsTerminal(os.Stdout.Fd()) 32 33 // Config is an Agent configuration. 34 type Config struct { 35 Name string 36 ConfDir []string 37 ModulesConfDir []string 38 ModulesSDConfPath []string 39 VnodesConfDir []string 40 StateFile string 41 LockDir string 42 ModuleRegistry module.Registry 43 RunModule string 44 MinUpdateEvery int 45 } 46 47 // Agent represents orchestrator. 48 type Agent struct { 49 *logger.Logger 50 51 Name string 52 ConfDir multipath.MultiPath 53 ModulesConfDir multipath.MultiPath 54 ModulesSDConfPath []string 55 VnodesConfDir multipath.MultiPath 56 StateFile string 57 LockDir string 58 RunModule string 59 MinUpdateEvery int 60 ModuleRegistry module.Registry 61 Out io.Writer 62 63 api *netdataapi.API 64 } 65 66 // New creates a new Agent. 67 func New(cfg Config) *Agent { 68 return &Agent{ 69 Logger: logger.New().With( 70 slog.String("component", "agent"), 71 ), 72 Name: cfg.Name, 73 ConfDir: cfg.ConfDir, 74 ModulesConfDir: cfg.ModulesConfDir, 75 ModulesSDConfPath: cfg.ModulesSDConfPath, 76 VnodesConfDir: cfg.VnodesConfDir, 77 StateFile: cfg.StateFile, 78 LockDir: cfg.LockDir, 79 RunModule: cfg.RunModule, 80 MinUpdateEvery: cfg.MinUpdateEvery, 81 ModuleRegistry: module.DefaultRegistry, 82 Out: safewriter.Stdout, 83 api: netdataapi.New(safewriter.Stdout), 84 } 85 } 86 87 // Run starts the Agent. 88 func (a *Agent) Run() { 89 go a.keepAlive() 90 serve(a) 91 } 92 93 func serve(a *Agent) { 94 ch := make(chan os.Signal, 1) 95 signal.Notify(ch, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM) 96 var wg sync.WaitGroup 97 98 var exit bool 99 var reload bool 100 101 for { 102 ctx, cancel := context.WithCancel(context.Background()) 103 ctx = context.WithValue(ctx, "reload", reload) 104 105 wg.Add(1) 106 go func() { defer wg.Done(); a.run(ctx) }() 107 108 switch sig := <-ch; sig { 109 case syscall.SIGHUP: 110 a.Infof("received %s signal (%d). Restarting running instance", sig, sig) 111 default: 112 a.Infof("received %s signal (%d). Terminating...", sig, sig) 113 module.DontObsoleteCharts() 114 exit = true 115 } 116 117 cancel() 118 119 func() { 120 timeout := time.Second * 10 121 t := time.NewTimer(timeout) 122 defer t.Stop() 123 done := make(chan struct{}) 124 125 go func() { wg.Wait(); close(done) }() 126 127 select { 128 case <-t.C: 129 a.Errorf("stopping all goroutines timed out after %s. Exiting...", timeout) 130 os.Exit(0) 131 case <-done: 132 } 133 }() 134 135 if exit { 136 os.Exit(0) 137 } 138 139 reload = true 140 time.Sleep(time.Second) 141 } 142 } 143 144 func (a *Agent) run(ctx context.Context) { 145 a.Info("instance is started") 146 defer func() { a.Info("instance is stopped") }() 147 148 cfg := a.loadPluginConfig() 149 a.Infof("using config: %s", cfg.String()) 150 151 if !cfg.Enabled { 152 a.Info("plugin is disabled in the configuration file, exiting...") 153 if isTerminal { 154 os.Exit(0) 155 } 156 _ = a.api.DISABLE() 157 return 158 } 159 160 enabledModules := a.loadEnabledModules(cfg) 161 if len(enabledModules) == 0 { 162 a.Info("no modules to run") 163 if isTerminal { 164 os.Exit(0) 165 } 166 _ = a.api.DISABLE() 167 return 168 } 169 170 discCfg := a.buildDiscoveryConf(enabledModules) 171 172 discoveryManager, err := discovery.NewManager(discCfg) 173 if err != nil { 174 a.Error(err) 175 if isTerminal { 176 os.Exit(0) 177 } 178 return 179 } 180 181 functionsManager := functions.NewManager() 182 183 jobsManager := jobmgr.NewManager() 184 jobsManager.PluginName = a.Name 185 jobsManager.Out = a.Out 186 jobsManager.Modules = enabledModules 187 188 // TODO: API will be changed in https://github.com/netdata/netdata/pull/16702 189 //if logger.Level.Enabled(slog.LevelDebug) { 190 // dyncfgDiscovery, _ := dyncfg.NewDiscovery(dyncfg.Config{ 191 // Plugin: a.Name, 192 // API: netdataapi.New(a.Out), 193 // Modules: enabledModules, 194 // ModuleConfigDefaults: discCfg.Registry, 195 // Functions: functionsManager, 196 // }) 197 // 198 // discoveryManager.Add(dyncfgDiscovery) 199 // 200 // jobsManager.Dyncfg = dyncfgDiscovery 201 //} 202 203 if reg := a.setupVnodeRegistry(); reg == nil || reg.Len() == 0 { 204 vnodes.Disabled = true 205 } else { 206 jobsManager.Vnodes = reg 207 } 208 209 if a.LockDir != "" { 210 jobsManager.FileLock = filelock.New(a.LockDir) 211 } 212 213 var statusSaveManager *filestatus.Manager 214 if !isTerminal && a.StateFile != "" { 215 statusSaveManager = filestatus.NewManager(a.StateFile) 216 jobsManager.StatusSaver = statusSaveManager 217 if store, err := filestatus.LoadStore(a.StateFile); err != nil { 218 a.Warningf("couldn't load state file: %v", err) 219 } else { 220 jobsManager.StatusStore = store 221 } 222 } 223 224 in := make(chan []*confgroup.Group) 225 var wg sync.WaitGroup 226 227 wg.Add(1) 228 go func() { defer wg.Done(); functionsManager.Run(ctx) }() 229 230 wg.Add(1) 231 go func() { defer wg.Done(); jobsManager.Run(ctx, in) }() 232 233 wg.Add(1) 234 go func() { defer wg.Done(); discoveryManager.Run(ctx, in) }() 235 236 if statusSaveManager != nil { 237 wg.Add(1) 238 go func() { defer wg.Done(); statusSaveManager.Run(ctx) }() 239 } 240 241 wg.Wait() 242 <-ctx.Done() 243 } 244 245 func (a *Agent) keepAlive() { 246 if isTerminal { 247 return 248 } 249 250 tk := time.NewTicker(time.Second) 251 defer tk.Stop() 252 253 var n int 254 for range tk.C { 255 if err := a.api.EMPTYLINE(); err != nil { 256 a.Infof("keepAlive: %v", err) 257 n++ 258 } else { 259 n = 0 260 } 261 if n == 3 { 262 a.Info("too many keepAlive errors. Terminating...") 263 os.Exit(0) 264 } 265 } 266 }