github.com/influxdata/telegraf@v1.30.3/cmd/telegraf/telegraf.go (about)

     1  package main
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"log"
     8  	"os"
     9  	"os/signal"
    10  	"strings"
    11  	"syscall"
    12  	"time"
    13  
    14  	"github.com/coreos/go-systemd/v22/daemon"
    15  	"github.com/fatih/color"
    16  	"github.com/influxdata/tail/watch"
    17  	"gopkg.in/tomb.v1"
    18  
    19  	"github.com/influxdata/telegraf"
    20  	"github.com/influxdata/telegraf/agent"
    21  	"github.com/influxdata/telegraf/config"
    22  	"github.com/influxdata/telegraf/internal"
    23  	"github.com/influxdata/telegraf/logger"
    24  	"github.com/influxdata/telegraf/plugins/aggregators"
    25  	"github.com/influxdata/telegraf/plugins/inputs"
    26  	"github.com/influxdata/telegraf/plugins/outputs"
    27  	"github.com/influxdata/telegraf/plugins/parsers"
    28  	"github.com/influxdata/telegraf/plugins/processors"
    29  	"github.com/influxdata/telegraf/plugins/secretstores"
    30  )
    31  
    32  var stop chan struct{}
    33  
    34  type GlobalFlags struct {
    35  	config         []string
    36  	configDir      []string
    37  	testWait       int
    38  	watchConfig    string
    39  	pidFile        string
    40  	plugindDir     string
    41  	password       string
    42  	oldEnvBehavior bool
    43  	test           bool
    44  	debug          bool
    45  	once           bool
    46  	quiet          bool
    47  	unprotected    bool
    48  }
    49  
    50  type WindowFlags struct {
    51  	service             string
    52  	serviceName         string
    53  	serviceDisplayName  string
    54  	serviceRestartDelay string
    55  	serviceAutoRestart  bool
    56  	console             bool
    57  }
    58  
    59  type App interface {
    60  	Init(<-chan error, Filters, GlobalFlags, WindowFlags)
    61  	Run() error
    62  
    63  	// Secret store commands
    64  	ListSecretStores() ([]string, error)
    65  	GetSecretStore(string) (telegraf.SecretStore, error)
    66  }
    67  
    68  type Telegraf struct {
    69  	pprofErr <-chan error
    70  
    71  	inputFilters       []string
    72  	outputFilters      []string
    73  	configFiles        []string
    74  	secretstoreFilters []string
    75  
    76  	GlobalFlags
    77  	WindowFlags
    78  }
    79  
    80  func (t *Telegraf) Init(pprofErr <-chan error, f Filters, g GlobalFlags, w WindowFlags) {
    81  	t.pprofErr = pprofErr
    82  	t.inputFilters = f.input
    83  	t.outputFilters = f.output
    84  	t.secretstoreFilters = f.secretstore
    85  	t.GlobalFlags = g
    86  	t.WindowFlags = w
    87  
    88  	// Disable secret protection before performing any other operation
    89  	if g.unprotected {
    90  		log.Println("W! Running without secret protection!")
    91  		config.DisableSecretProtection()
    92  	}
    93  
    94  	// Set global password
    95  	if g.password != "" {
    96  		config.Password = config.NewSecret([]byte(g.password))
    97  	}
    98  
    99  	// Set environment replacement behavior
   100  	config.OldEnvVarReplacement = g.oldEnvBehavior
   101  }
   102  
   103  func (t *Telegraf) ListSecretStores() ([]string, error) {
   104  	c, err := t.loadConfiguration()
   105  	if err != nil {
   106  		return nil, err
   107  	}
   108  
   109  	ids := make([]string, 0, len(c.SecretStores))
   110  	for k := range c.SecretStores {
   111  		ids = append(ids, k)
   112  	}
   113  	return ids, nil
   114  }
   115  
   116  func (t *Telegraf) GetSecretStore(id string) (telegraf.SecretStore, error) {
   117  	t.quiet = true
   118  	c, err := t.loadConfiguration()
   119  	if err != nil {
   120  		return nil, err
   121  	}
   122  
   123  	store, found := c.SecretStores[id]
   124  	if !found {
   125  		return nil, errors.New("unknown secret store")
   126  	}
   127  
   128  	return store, nil
   129  }
   130  
   131  func (t *Telegraf) reloadLoop() error {
   132  	reloadConfig := false
   133  	cfg, err := t.loadConfiguration()
   134  	if err != nil {
   135  		return err
   136  	}
   137  
   138  	reload := make(chan bool, 1)
   139  	reload <- true
   140  	for <-reload {
   141  		reload <- false
   142  		ctx, cancel := context.WithCancel(context.Background())
   143  
   144  		signals := make(chan os.Signal, 1)
   145  		signal.Notify(signals, os.Interrupt, syscall.SIGHUP,
   146  			syscall.SIGTERM, syscall.SIGINT)
   147  		if t.watchConfig != "" {
   148  			for _, fConfig := range t.configFiles {
   149  				if _, err := os.Stat(fConfig); err == nil {
   150  					go t.watchLocalConfig(signals, fConfig)
   151  				} else {
   152  					log.Printf("W! Cannot watch config %s: %s", fConfig, err)
   153  				}
   154  			}
   155  		}
   156  		go func() {
   157  			select {
   158  			case sig := <-signals:
   159  				if sig == syscall.SIGHUP {
   160  					log.Println("I! Reloading Telegraf config")
   161  					<-reload
   162  					reload <- true
   163  				}
   164  				cancel()
   165  			case err := <-t.pprofErr:
   166  				log.Printf("E! pprof server failed: %v", err)
   167  				cancel()
   168  			case <-stop:
   169  				cancel()
   170  			}
   171  		}()
   172  
   173  		err := t.runAgent(ctx, cfg, reloadConfig)
   174  		if err != nil && !errors.Is(err, context.Canceled) {
   175  			return fmt.Errorf("[telegraf] Error running agent: %w", err)
   176  		}
   177  		reloadConfig = true
   178  	}
   179  
   180  	return nil
   181  }
   182  
   183  func (t *Telegraf) watchLocalConfig(signals chan os.Signal, fConfig string) {
   184  	var mytomb tomb.Tomb
   185  	var watcher watch.FileWatcher
   186  	if t.watchConfig == "poll" {
   187  		watcher = watch.NewPollingFileWatcher(fConfig)
   188  	} else {
   189  		watcher = watch.NewInotifyFileWatcher(fConfig)
   190  	}
   191  	changes, err := watcher.ChangeEvents(&mytomb, 0)
   192  	if err != nil {
   193  		log.Printf("E! Error watching config: %s\n", err)
   194  		return
   195  	}
   196  	log.Println("I! Config watcher started")
   197  	select {
   198  	case <-changes.Modified:
   199  		log.Println("I! Config file modified")
   200  	case <-changes.Deleted:
   201  		// deleted can mean moved. wait a bit a check existence
   202  		<-time.After(time.Second)
   203  		if _, err := os.Stat(fConfig); err == nil {
   204  			log.Println("I! Config file overwritten")
   205  		} else {
   206  			log.Println("W! Config file deleted")
   207  			if err := watcher.BlockUntilExists(&mytomb); err != nil {
   208  				log.Printf("E! Cannot watch for config: %s\n", err.Error())
   209  				return
   210  			}
   211  			log.Println("I! Config file appeared")
   212  		}
   213  	case <-changes.Truncated:
   214  		log.Println("I! Config file truncated")
   215  	case <-mytomb.Dying():
   216  		log.Println("I! Config watcher ended")
   217  		return
   218  	}
   219  	mytomb.Done()
   220  	signals <- syscall.SIGHUP
   221  }
   222  
   223  func (t *Telegraf) loadConfiguration() (*config.Config, error) {
   224  	// If no other options are specified, load the config file and run.
   225  	c := config.NewConfig()
   226  	c.Agent.Quiet = t.quiet
   227  	c.OutputFilters = t.outputFilters
   228  	c.InputFilters = t.inputFilters
   229  	c.SecretStoreFilters = t.secretstoreFilters
   230  
   231  	var configFiles []string
   232  
   233  	configFiles = append(configFiles, t.config...)
   234  	for _, fConfigDirectory := range t.configDir {
   235  		files, err := config.WalkDirectory(fConfigDirectory)
   236  		if err != nil {
   237  			return c, err
   238  		}
   239  		configFiles = append(configFiles, files...)
   240  	}
   241  
   242  	// load default config paths if none are found
   243  	if len(configFiles) == 0 {
   244  		defaultFiles, err := config.GetDefaultConfigPath()
   245  		if err != nil {
   246  			return nil, fmt.Errorf("unable to load default config paths: %w", err)
   247  		}
   248  		configFiles = append(configFiles, defaultFiles...)
   249  	}
   250  
   251  	t.configFiles = configFiles
   252  	if err := c.LoadAll(configFiles...); err != nil {
   253  		return c, err
   254  	}
   255  	return c, nil
   256  }
   257  
   258  func (t *Telegraf) runAgent(ctx context.Context, c *config.Config, reloadConfig bool) error {
   259  	var err error
   260  	if reloadConfig {
   261  		if c, err = t.loadConfiguration(); err != nil {
   262  			return err
   263  		}
   264  	}
   265  
   266  	if !(t.test || t.testWait != 0) && len(c.Outputs) == 0 {
   267  		return errors.New("no outputs found, did you provide a valid config file?")
   268  	}
   269  	if t.plugindDir == "" && len(c.Inputs) == 0 {
   270  		return errors.New("no inputs found, did you provide a valid config file?")
   271  	}
   272  
   273  	if int64(c.Agent.Interval) <= 0 {
   274  		return fmt.Errorf("agent interval must be positive, found %v", c.Agent.Interval)
   275  	}
   276  
   277  	if int64(c.Agent.FlushInterval) <= 0 {
   278  		return fmt.Errorf("agent flush_interval must be positive; found %v", c.Agent.Interval)
   279  	}
   280  
   281  	// Setup logging as configured.
   282  	telegraf.Debug = c.Agent.Debug || t.debug
   283  	logConfig := logger.LogConfig{
   284  		Debug:               telegraf.Debug,
   285  		Quiet:               c.Agent.Quiet || t.quiet,
   286  		LogTarget:           c.Agent.LogTarget,
   287  		Logfile:             c.Agent.Logfile,
   288  		RotationInterval:    c.Agent.LogfileRotationInterval,
   289  		RotationMaxSize:     c.Agent.LogfileRotationMaxSize,
   290  		RotationMaxArchives: c.Agent.LogfileRotationMaxArchives,
   291  		LogWithTimezone:     c.Agent.LogWithTimezone,
   292  	}
   293  
   294  	if err := logger.SetupLogging(logConfig); err != nil {
   295  		return err
   296  	}
   297  
   298  	log.Printf("I! Starting Telegraf %s%s brought to you by InfluxData the makers of InfluxDB", internal.Version, internal.Customized)
   299  	log.Printf("I! Available plugins: %d inputs, %d aggregators, %d processors, %d parsers, %d outputs, %d secret-stores",
   300  		len(inputs.Inputs),
   301  		len(aggregators.Aggregators),
   302  		len(processors.Processors),
   303  		len(parsers.Parsers),
   304  		len(outputs.Outputs),
   305  		len(secretstores.SecretStores),
   306  	)
   307  	log.Printf("I! Loaded inputs: %s", strings.Join(c.InputNames(), " "))
   308  	log.Printf("I! Loaded aggregators: %s", strings.Join(c.AggregatorNames(), " "))
   309  	log.Printf("I! Loaded processors: %s", strings.Join(c.ProcessorNames(), " "))
   310  	log.Printf("I! Loaded secretstores: %s", strings.Join(c.SecretstoreNames(), " "))
   311  	if !t.once && (t.test || t.testWait != 0) {
   312  		log.Print("W! " + color.RedString("Outputs are not used in testing mode!"))
   313  	} else {
   314  		log.Printf("I! Loaded outputs: %s", strings.Join(c.OutputNames(), " "))
   315  	}
   316  	log.Printf("I! Tags enabled: %s", c.ListTags())
   317  
   318  	if count, found := c.Deprecations["inputs"]; found && (count[0] > 0 || count[1] > 0) {
   319  		log.Printf("W! Deprecated inputs: %d and %d options", count[0], count[1])
   320  	}
   321  	if count, found := c.Deprecations["aggregators"]; found && (count[0] > 0 || count[1] > 0) {
   322  		log.Printf("W! Deprecated aggregators: %d and %d options", count[0], count[1])
   323  	}
   324  	if count, found := c.Deprecations["processors"]; found && (count[0] > 0 || count[1] > 0) {
   325  		log.Printf("W! Deprecated processors: %d and %d options", count[0], count[1])
   326  	}
   327  	if count, found := c.Deprecations["outputs"]; found && (count[0] > 0 || count[1] > 0) {
   328  		log.Printf("W! Deprecated outputs: %d and %d options", count[0], count[1])
   329  	}
   330  	if count, found := c.Deprecations["secretstores"]; found && (count[0] > 0 || count[1] > 0) {
   331  		log.Printf("W! Deprecated secretstores: %d and %d options", count[0], count[1])
   332  	}
   333  
   334  	// Compute the amount of locked memory needed for the secrets
   335  	if !t.GlobalFlags.unprotected {
   336  		required := 3 * c.NumberSecrets * uint64(os.Getpagesize())
   337  		available := getLockedMemoryLimit()
   338  		if required > available {
   339  			required /= 1024
   340  			available /= 1024
   341  			log.Printf("I! Found %d secrets...", c.NumberSecrets)
   342  			msg := fmt.Sprintf("Insufficient lockable memory %dkb when %dkb is required.", available, required)
   343  			msg += " Please increase the limit for Telegraf in your Operating System!"
   344  			log.Printf("W! " + color.RedString(msg))
   345  		}
   346  	}
   347  	ag := agent.NewAgent(c)
   348  
   349  	// Notify systemd that telegraf is ready
   350  	// SdNotify() only tries to notify if the NOTIFY_SOCKET environment is set, so it's safe to call when systemd isn't present.
   351  	// Ignore the return values here because they're not valid for platforms that don't use systemd.
   352  	// For platforms that use systemd, telegraf doesn't log if the notification failed.
   353  	_, _ = daemon.SdNotify(false, daemon.SdNotifyReady)
   354  
   355  	if t.once {
   356  		wait := time.Duration(t.testWait) * time.Second
   357  		return ag.Once(ctx, wait)
   358  	}
   359  
   360  	if t.test || t.testWait != 0 {
   361  		wait := time.Duration(t.testWait) * time.Second
   362  		return ag.Test(ctx, wait)
   363  	}
   364  
   365  	if t.pidFile != "" {
   366  		f, err := os.OpenFile(t.pidFile, os.O_CREATE|os.O_WRONLY, 0640)
   367  		if err != nil {
   368  			log.Printf("E! Unable to create pidfile: %s", err)
   369  		} else {
   370  			fmt.Fprintf(f, "%d\n", os.Getpid())
   371  
   372  			err = f.Close()
   373  			if err != nil {
   374  				return err
   375  			}
   376  
   377  			defer func() {
   378  				err := os.Remove(t.pidFile)
   379  				if err != nil {
   380  					log.Printf("E! Unable to remove pidfile: %s", err)
   381  				}
   382  			}()
   383  		}
   384  	}
   385  
   386  	return ag.Run(ctx)
   387  }