github.com/netdata/go.d.plugin@v0.58.1/agent/agent.go (about)

     1  // SPDX-License-Identifier: GPL-3.0-or-later
     2  
     3  package agent
     4  
     5  import (
     6  	"context"
     7  	"io"
     8  	"log/slog"
     9  	"os"
    10  	"os/signal"
    11  	"sync"
    12  	"syscall"
    13  	"time"
    14  
    15  	"github.com/netdata/go.d.plugin/agent/confgroup"
    16  	"github.com/netdata/go.d.plugin/agent/discovery"
    17  	"github.com/netdata/go.d.plugin/agent/filelock"
    18  	"github.com/netdata/go.d.plugin/agent/filestatus"
    19  	"github.com/netdata/go.d.plugin/agent/functions"
    20  	"github.com/netdata/go.d.plugin/agent/jobmgr"
    21  	"github.com/netdata/go.d.plugin/agent/module"
    22  	"github.com/netdata/go.d.plugin/agent/netdataapi"
    23  	"github.com/netdata/go.d.plugin/agent/safewriter"
    24  	"github.com/netdata/go.d.plugin/agent/vnodes"
    25  	"github.com/netdata/go.d.plugin/logger"
    26  	"github.com/netdata/go.d.plugin/pkg/multipath"
    27  
    28  	"github.com/mattn/go-isatty"
    29  )
    30  
    31  var isTerminal = isatty.IsTerminal(os.Stdout.Fd())
    32  
    33  // Config is an Agent configuration.
    34  type Config struct {
    35  	Name              string
    36  	ConfDir           []string
    37  	ModulesConfDir    []string
    38  	ModulesSDConfPath []string
    39  	VnodesConfDir     []string
    40  	StateFile         string
    41  	LockDir           string
    42  	ModuleRegistry    module.Registry
    43  	RunModule         string
    44  	MinUpdateEvery    int
    45  }
    46  
    47  // Agent represents orchestrator.
    48  type Agent struct {
    49  	*logger.Logger
    50  
    51  	Name              string
    52  	ConfDir           multipath.MultiPath
    53  	ModulesConfDir    multipath.MultiPath
    54  	ModulesSDConfPath []string
    55  	VnodesConfDir     multipath.MultiPath
    56  	StateFile         string
    57  	LockDir           string
    58  	RunModule         string
    59  	MinUpdateEvery    int
    60  	ModuleRegistry    module.Registry
    61  	Out               io.Writer
    62  
    63  	api *netdataapi.API
    64  }
    65  
    66  // New creates a new Agent.
    67  func New(cfg Config) *Agent {
    68  	return &Agent{
    69  		Logger: logger.New().With(
    70  			slog.String("component", "agent"),
    71  		),
    72  		Name:              cfg.Name,
    73  		ConfDir:           cfg.ConfDir,
    74  		ModulesConfDir:    cfg.ModulesConfDir,
    75  		ModulesSDConfPath: cfg.ModulesSDConfPath,
    76  		VnodesConfDir:     cfg.VnodesConfDir,
    77  		StateFile:         cfg.StateFile,
    78  		LockDir:           cfg.LockDir,
    79  		RunModule:         cfg.RunModule,
    80  		MinUpdateEvery:    cfg.MinUpdateEvery,
    81  		ModuleRegistry:    module.DefaultRegistry,
    82  		Out:               safewriter.Stdout,
    83  		api:               netdataapi.New(safewriter.Stdout),
    84  	}
    85  }
    86  
    87  // Run starts the Agent.
    88  func (a *Agent) Run() {
    89  	go a.keepAlive()
    90  	serve(a)
    91  }
    92  
    93  func serve(a *Agent) {
    94  	ch := make(chan os.Signal, 1)
    95  	signal.Notify(ch, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM)
    96  	var wg sync.WaitGroup
    97  
    98  	var exit bool
    99  	var reload bool
   100  
   101  	for {
   102  		ctx, cancel := context.WithCancel(context.Background())
   103  		ctx = context.WithValue(ctx, "reload", reload)
   104  
   105  		wg.Add(1)
   106  		go func() { defer wg.Done(); a.run(ctx) }()
   107  
   108  		switch sig := <-ch; sig {
   109  		case syscall.SIGHUP:
   110  			a.Infof("received %s signal (%d). Restarting running instance", sig, sig)
   111  		default:
   112  			a.Infof("received %s signal (%d). Terminating...", sig, sig)
   113  			module.DontObsoleteCharts()
   114  			exit = true
   115  		}
   116  
   117  		cancel()
   118  
   119  		func() {
   120  			timeout := time.Second * 10
   121  			t := time.NewTimer(timeout)
   122  			defer t.Stop()
   123  			done := make(chan struct{})
   124  
   125  			go func() { wg.Wait(); close(done) }()
   126  
   127  			select {
   128  			case <-t.C:
   129  				a.Errorf("stopping all goroutines timed out after %s. Exiting...", timeout)
   130  				os.Exit(0)
   131  			case <-done:
   132  			}
   133  		}()
   134  
   135  		if exit {
   136  			os.Exit(0)
   137  		}
   138  
   139  		reload = true
   140  		time.Sleep(time.Second)
   141  	}
   142  }
   143  
   144  func (a *Agent) run(ctx context.Context) {
   145  	a.Info("instance is started")
   146  	defer func() { a.Info("instance is stopped") }()
   147  
   148  	cfg := a.loadPluginConfig()
   149  	a.Infof("using config: %s", cfg.String())
   150  
   151  	if !cfg.Enabled {
   152  		a.Info("plugin is disabled in the configuration file, exiting...")
   153  		if isTerminal {
   154  			os.Exit(0)
   155  		}
   156  		_ = a.api.DISABLE()
   157  		return
   158  	}
   159  
   160  	enabledModules := a.loadEnabledModules(cfg)
   161  	if len(enabledModules) == 0 {
   162  		a.Info("no modules to run")
   163  		if isTerminal {
   164  			os.Exit(0)
   165  		}
   166  		_ = a.api.DISABLE()
   167  		return
   168  	}
   169  
   170  	discCfg := a.buildDiscoveryConf(enabledModules)
   171  
   172  	discoveryManager, err := discovery.NewManager(discCfg)
   173  	if err != nil {
   174  		a.Error(err)
   175  		if isTerminal {
   176  			os.Exit(0)
   177  		}
   178  		return
   179  	}
   180  
   181  	functionsManager := functions.NewManager()
   182  
   183  	jobsManager := jobmgr.NewManager()
   184  	jobsManager.PluginName = a.Name
   185  	jobsManager.Out = a.Out
   186  	jobsManager.Modules = enabledModules
   187  
   188  	// TODO: API will be changed in https://github.com/netdata/netdata/pull/16702
   189  	//if logger.Level.Enabled(slog.LevelDebug) {
   190  	//	dyncfgDiscovery, _ := dyncfg.NewDiscovery(dyncfg.Config{
   191  	//		Plugin:               a.Name,
   192  	//		API:                  netdataapi.New(a.Out),
   193  	//		Modules:              enabledModules,
   194  	//		ModuleConfigDefaults: discCfg.Registry,
   195  	//		Functions:            functionsManager,
   196  	//	})
   197  	//
   198  	//	discoveryManager.Add(dyncfgDiscovery)
   199  	//
   200  	//	jobsManager.Dyncfg = dyncfgDiscovery
   201  	//}
   202  
   203  	if reg := a.setupVnodeRegistry(); reg == nil || reg.Len() == 0 {
   204  		vnodes.Disabled = true
   205  	} else {
   206  		jobsManager.Vnodes = reg
   207  	}
   208  
   209  	if a.LockDir != "" {
   210  		jobsManager.FileLock = filelock.New(a.LockDir)
   211  	}
   212  
   213  	var statusSaveManager *filestatus.Manager
   214  	if !isTerminal && a.StateFile != "" {
   215  		statusSaveManager = filestatus.NewManager(a.StateFile)
   216  		jobsManager.StatusSaver = statusSaveManager
   217  		if store, err := filestatus.LoadStore(a.StateFile); err != nil {
   218  			a.Warningf("couldn't load state file: %v", err)
   219  		} else {
   220  			jobsManager.StatusStore = store
   221  		}
   222  	}
   223  
   224  	in := make(chan []*confgroup.Group)
   225  	var wg sync.WaitGroup
   226  
   227  	wg.Add(1)
   228  	go func() { defer wg.Done(); functionsManager.Run(ctx) }()
   229  
   230  	wg.Add(1)
   231  	go func() { defer wg.Done(); jobsManager.Run(ctx, in) }()
   232  
   233  	wg.Add(1)
   234  	go func() { defer wg.Done(); discoveryManager.Run(ctx, in) }()
   235  
   236  	if statusSaveManager != nil {
   237  		wg.Add(1)
   238  		go func() { defer wg.Done(); statusSaveManager.Run(ctx) }()
   239  	}
   240  
   241  	wg.Wait()
   242  	<-ctx.Done()
   243  }
   244  
   245  func (a *Agent) keepAlive() {
   246  	if isTerminal {
   247  		return
   248  	}
   249  
   250  	tk := time.NewTicker(time.Second)
   251  	defer tk.Stop()
   252  
   253  	var n int
   254  	for range tk.C {
   255  		if err := a.api.EMPTYLINE(); err != nil {
   256  			a.Infof("keepAlive: %v", err)
   257  			n++
   258  		} else {
   259  			n = 0
   260  		}
   261  		if n == 3 {
   262  			a.Info("too many keepAlive errors. Terminating...")
   263  			os.Exit(0)
   264  		}
   265  	}
   266  }