github.com/crowdsecurity/crowdsec@v1.6.1/pkg/acquisition/modules/file/file.go (about)

     1  package fileacquisition
     2  
     3  import (
     4  	"bufio"
     5  	"compress/gzip"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"net/url"
    10  	"os"
    11  	"path/filepath"
    12  	"regexp"
    13  	"strconv"
    14  	"strings"
    15  	"sync"
    16  	"time"
    17  
    18  	"github.com/fsnotify/fsnotify"
    19  	"github.com/nxadm/tail"
    20  	"github.com/prometheus/client_golang/prometheus"
    21  	log "github.com/sirupsen/logrus"
    22  	"gopkg.in/tomb.v2"
    23  	"gopkg.in/yaml.v2"
    24  
    25  	"github.com/crowdsecurity/go-cs-lib/trace"
    26  
    27  	"github.com/crowdsecurity/crowdsec/pkg/acquisition/configuration"
    28  	"github.com/crowdsecurity/crowdsec/pkg/types"
    29  )
    30  
    31  var linesRead = prometheus.NewCounterVec(
    32  	prometheus.CounterOpts{
    33  		Name: "cs_filesource_hits_total",
    34  		Help: "Total lines that were read.",
    35  	},
    36  	[]string{"source"})
    37  
    38  type FileConfiguration struct {
    39  	Filenames                         []string
    40  	ExcludeRegexps                    []string `yaml:"exclude_regexps"`
    41  	Filename                          string
    42  	ForceInotify                      bool  `yaml:"force_inotify"`
    43  	MaxBufferSize                     int   `yaml:"max_buffer_size"`
    44  	PollWithoutInotify                *bool `yaml:"poll_without_inotify"`
    45  	configuration.DataSourceCommonCfg `yaml:",inline"`
    46  }
    47  
    48  type FileSource struct {
    49  	metricsLevel       int
    50  	config             FileConfiguration
    51  	watcher            *fsnotify.Watcher
    52  	watchedDirectories map[string]bool
    53  	tails              map[string]bool
    54  	logger             *log.Entry
    55  	files              []string
    56  	exclude_regexps    []*regexp.Regexp
    57  	tailMapMutex       *sync.RWMutex
    58  }
    59  
    60  func (f *FileSource) GetUuid() string {
    61  	return f.config.UniqueId
    62  }
    63  
    64  func (f *FileSource) UnmarshalConfig(yamlConfig []byte) error {
    65  	f.config = FileConfiguration{}
    66  
    67  	err := yaml.UnmarshalStrict(yamlConfig, &f.config)
    68  	if err != nil {
    69  		return fmt.Errorf("cannot parse FileAcquisition configuration: %w", err)
    70  	}
    71  
    72  	if f.logger != nil {
    73  		f.logger.Tracef("FileAcquisition configuration: %+v", f.config)
    74  	}
    75  
    76  	if len(f.config.Filename) != 0 {
    77  		f.config.Filenames = append(f.config.Filenames, f.config.Filename)
    78  	}
    79  
    80  	if len(f.config.Filenames) == 0 {
    81  		return errors.New("no filename or filenames configuration provided")
    82  	}
    83  
    84  	if f.config.Mode == "" {
    85  		f.config.Mode = configuration.TAIL_MODE
    86  	}
    87  
    88  	if f.config.Mode != configuration.CAT_MODE && f.config.Mode != configuration.TAIL_MODE {
    89  		return fmt.Errorf("unsupported mode %s for file source", f.config.Mode)
    90  	}
    91  
    92  	for _, exclude := range f.config.ExcludeRegexps {
    93  		re, err := regexp.Compile(exclude)
    94  		if err != nil {
    95  			return fmt.Errorf("could not compile regexp %s: %w", exclude, err)
    96  		}
    97  
    98  		f.exclude_regexps = append(f.exclude_regexps, re)
    99  	}
   100  
   101  	return nil
   102  }
   103  
   104  func (f *FileSource) Configure(yamlConfig []byte, logger *log.Entry, MetricsLevel int) error {
   105  	f.logger = logger
   106  	f.metricsLevel = MetricsLevel
   107  
   108  	err := f.UnmarshalConfig(yamlConfig)
   109  	if err != nil {
   110  		return err
   111  	}
   112  
   113  	f.watchedDirectories = make(map[string]bool)
   114  	f.tailMapMutex = &sync.RWMutex{}
   115  	f.tails = make(map[string]bool)
   116  
   117  	f.watcher, err = fsnotify.NewWatcher()
   118  	if err != nil {
   119  		return fmt.Errorf("could not create fsnotify watcher: %w", err)
   120  	}
   121  
   122  	f.logger.Tracef("Actual FileAcquisition Configuration %+v", f.config)
   123  
   124  	for _, pattern := range f.config.Filenames {
   125  		if f.config.ForceInotify {
   126  			directory := filepath.Dir(pattern)
   127  			f.logger.Infof("Force add watch on %s", directory)
   128  
   129  			if !f.watchedDirectories[directory] {
   130  				err = f.watcher.Add(directory)
   131  				if err != nil {
   132  					f.logger.Errorf("Could not create watch on directory %s : %s", directory, err)
   133  					continue
   134  				}
   135  
   136  				f.watchedDirectories[directory] = true
   137  			}
   138  		}
   139  
   140  		files, err := filepath.Glob(pattern)
   141  		if err != nil {
   142  			return fmt.Errorf("glob failure: %w", err)
   143  		}
   144  
   145  		if len(files) == 0 {
   146  			f.logger.Warnf("No matching files for pattern %s", pattern)
   147  			continue
   148  		}
   149  
   150  		for _, file := range files {
   151  			// check if file is excluded
   152  			excluded := false
   153  
   154  			for _, pattern := range f.exclude_regexps {
   155  				if pattern.MatchString(file) {
   156  					excluded = true
   157  
   158  					f.logger.Infof("Skipping file %s as it matches exclude pattern %s", file, pattern)
   159  
   160  					break
   161  				}
   162  			}
   163  
   164  			if excluded {
   165  				continue
   166  			}
   167  
   168  			if files[0] != pattern && f.config.Mode == configuration.TAIL_MODE { // we have a glob pattern
   169  				directory := filepath.Dir(file)
   170  				f.logger.Debugf("Will add watch to directory: %s", directory)
   171  
   172  				if !f.watchedDirectories[directory] {
   173  					err = f.watcher.Add(directory)
   174  					if err != nil {
   175  						f.logger.Errorf("Could not create watch on directory %s : %s", directory, err)
   176  						continue
   177  					}
   178  
   179  					f.watchedDirectories[directory] = true
   180  				} else {
   181  					f.logger.Debugf("Watch for directory %s already exists", directory)
   182  				}
   183  			}
   184  
   185  			f.logger.Infof("Adding file %s to datasources", file)
   186  			f.files = append(f.files, file)
   187  		}
   188  	}
   189  
   190  	return nil
   191  }
   192  
   193  func (f *FileSource) ConfigureByDSN(dsn string, labels map[string]string, logger *log.Entry, uuid string) error {
   194  	if !strings.HasPrefix(dsn, "file://") {
   195  		return fmt.Errorf("invalid DSN %s for file source, must start with file://", dsn)
   196  	}
   197  
   198  	f.logger = logger
   199  	f.config = FileConfiguration{}
   200  
   201  	dsn = strings.TrimPrefix(dsn, "file://")
   202  
   203  	args := strings.Split(dsn, "?")
   204  
   205  	if len(args[0]) == 0 {
   206  		return errors.New("empty file:// DSN")
   207  	}
   208  
   209  	if len(args) == 2 && len(args[1]) != 0 {
   210  		params, err := url.ParseQuery(args[1])
   211  		if err != nil {
   212  			return fmt.Errorf("could not parse file args: %w", err)
   213  		}
   214  
   215  		for key, value := range params {
   216  			switch key {
   217  			case "log_level":
   218  				if len(value) != 1 {
   219  					return errors.New("expected zero or one value for 'log_level'")
   220  				}
   221  
   222  				lvl, err := log.ParseLevel(value[0])
   223  				if err != nil {
   224  					return fmt.Errorf("unknown level %s: %w", value[0], err)
   225  				}
   226  
   227  				f.logger.Logger.SetLevel(lvl)
   228  			case "max_buffer_size":
   229  				if len(value) != 1 {
   230  					return errors.New("expected zero or one value for 'max_buffer_size'")
   231  				}
   232  
   233  				maxBufferSize, err := strconv.Atoi(value[0])
   234  				if err != nil {
   235  					return fmt.Errorf("could not parse max_buffer_size %s: %w", value[0], err)
   236  				}
   237  
   238  				f.config.MaxBufferSize = maxBufferSize
   239  			default:
   240  				return fmt.Errorf("unknown parameter %s", key)
   241  			}
   242  		}
   243  	}
   244  
   245  	f.config.Labels = labels
   246  	f.config.Mode = configuration.CAT_MODE
   247  	f.config.UniqueId = uuid
   248  
   249  	f.logger.Debugf("Will try pattern %s", args[0])
   250  
   251  	files, err := filepath.Glob(args[0])
   252  	if err != nil {
   253  		return fmt.Errorf("glob failure: %w", err)
   254  	}
   255  
   256  	if len(files) == 0 {
   257  		return fmt.Errorf("no matching files for pattern %s", args[0])
   258  	}
   259  
   260  	if len(files) > 1 {
   261  		f.logger.Infof("Will read %d files", len(files))
   262  	}
   263  
   264  	for _, file := range files {
   265  		f.logger.Infof("Adding file %s to filelist", file)
   266  		f.files = append(f.files, file)
   267  	}
   268  
   269  	return nil
   270  }
   271  
   272  func (f *FileSource) GetMode() string {
   273  	return f.config.Mode
   274  }
   275  
   276  // SupportedModes returns the supported modes by the acquisition module
   277  func (f *FileSource) SupportedModes() []string {
   278  	return []string{configuration.TAIL_MODE, configuration.CAT_MODE}
   279  }
   280  
   281  // OneShotAcquisition reads a set of file and returns when done
   282  func (f *FileSource) OneShotAcquisition(out chan types.Event, t *tomb.Tomb) error {
   283  	f.logger.Debug("In oneshot")
   284  
   285  	for _, file := range f.files {
   286  		fi, err := os.Stat(file)
   287  		if err != nil {
   288  			return fmt.Errorf("could not stat file %s : %w", file, err)
   289  		}
   290  
   291  		if fi.IsDir() {
   292  			f.logger.Warnf("%s is a directory, ignoring it.", file)
   293  			continue
   294  		}
   295  
   296  		f.logger.Infof("reading %s at once", file)
   297  
   298  		err = f.readFile(file, out, t)
   299  		if err != nil {
   300  			return err
   301  		}
   302  	}
   303  
   304  	return nil
   305  }
   306  
   307  func (f *FileSource) GetMetrics() []prometheus.Collector {
   308  	return []prometheus.Collector{linesRead}
   309  }
   310  
   311  func (f *FileSource) GetAggregMetrics() []prometheus.Collector {
   312  	return []prometheus.Collector{linesRead}
   313  }
   314  
   315  func (f *FileSource) GetName() string {
   316  	return "file"
   317  }
   318  
   319  func (f *FileSource) CanRun() error {
   320  	return nil
   321  }
   322  
   323  func (f *FileSource) StreamingAcquisition(out chan types.Event, t *tomb.Tomb) error {
   324  	f.logger.Debug("Starting live acquisition")
   325  	t.Go(func() error {
   326  		return f.monitorNewFiles(out, t)
   327  	})
   328  
   329  	for _, file := range f.files {
   330  		// before opening the file, check if we need to specifically avoid it. (XXX)
   331  		skip := false
   332  
   333  		for _, pattern := range f.exclude_regexps {
   334  			if pattern.MatchString(file) {
   335  				f.logger.Infof("file %s matches exclusion pattern %s, skipping", file, pattern.String())
   336  
   337  				skip = true
   338  
   339  				break
   340  			}
   341  		}
   342  
   343  		if skip {
   344  			continue
   345  		}
   346  
   347  		// cf. https://github.com/crowdsecurity/crowdsec/issues/1168
   348  		// do not rely on stat, reclose file immediately as it's opened by Tail
   349  		fd, err := os.Open(file)
   350  		if err != nil {
   351  			f.logger.Errorf("unable to read %s : %s", file, err)
   352  			continue
   353  		}
   354  
   355  		if err := fd.Close(); err != nil {
   356  			f.logger.Errorf("unable to close %s : %s", file, err)
   357  			continue
   358  		}
   359  
   360  		fi, err := os.Stat(file)
   361  		if err != nil {
   362  			return fmt.Errorf("could not stat file %s : %w", file, err)
   363  		}
   364  
   365  		if fi.IsDir() {
   366  			f.logger.Warnf("%s is a directory, ignoring it.", file)
   367  			continue
   368  		}
   369  
   370  		pollFile := false
   371  		if f.config.PollWithoutInotify != nil {
   372  			pollFile = *f.config.PollWithoutInotify
   373  		} else {
   374  			networkFS, fsType, err := types.IsNetworkFS(file)
   375  			if err != nil {
   376  				f.logger.Warningf("Could not get fs type for %s : %s", file, err)
   377  			}
   378  
   379  			f.logger.Debugf("fs for %s is network: %t (%s)", file, networkFS, fsType)
   380  
   381  			if networkFS {
   382  				f.logger.Warnf("Disabling inotify polling on %s as it is on a network share. You can manually set poll_without_inotify to true to make this message disappear, or to false to enforce inotify poll", file)
   383  				pollFile = true
   384  			}
   385  		}
   386  
   387  		filink, err := os.Lstat(file)
   388  
   389  		if err != nil {
   390  			f.logger.Errorf("Could not lstat() new file %s, ignoring it : %s", file, err)
   391  			continue
   392  		}
   393  
   394  		if filink.Mode()&os.ModeSymlink == os.ModeSymlink && !pollFile {
   395  			f.logger.Warnf("File %s is a symlink, but inotify polling is enabled. Crowdsec will not be able to detect rotation. Consider setting poll_without_inotify to true in your configuration", file)
   396  		}
   397  
   398  		tail, err := tail.TailFile(file, tail.Config{ReOpen: true, Follow: true, Poll: pollFile, Location: &tail.SeekInfo{Offset: 0, Whence: io.SeekEnd}, Logger: log.NewEntry(log.StandardLogger())})
   399  		if err != nil {
   400  			f.logger.Errorf("Could not start tailing file %s : %s", file, err)
   401  			continue
   402  		}
   403  
   404  		f.tailMapMutex.Lock()
   405  		f.tails[file] = true
   406  		f.tailMapMutex.Unlock()
   407  		t.Go(func() error {
   408  			defer trace.CatchPanic("crowdsec/acquis/file/live/fsnotify")
   409  			return f.tailFile(out, t, tail)
   410  		})
   411  	}
   412  
   413  	return nil
   414  }
   415  
   416  func (f *FileSource) Dump() interface{} {
   417  	return f
   418  }
   419  
   420  func (f *FileSource) monitorNewFiles(out chan types.Event, t *tomb.Tomb) error {
   421  	logger := f.logger.WithField("goroutine", "inotify")
   422  
   423  	for {
   424  		select {
   425  		case event, ok := <-f.watcher.Events:
   426  			if !ok {
   427  				return nil
   428  			}
   429  
   430  			if event.Op&fsnotify.Create == fsnotify.Create {
   431  				fi, err := os.Stat(event.Name)
   432  				if err != nil {
   433  					logger.Errorf("Could not stat() new file %s, ignoring it : %s", event.Name, err)
   434  					continue
   435  				}
   436  
   437  				if fi.IsDir() {
   438  					continue
   439  				}
   440  
   441  				logger.Debugf("Detected new file %s", event.Name)
   442  
   443  				matched := false
   444  
   445  				for _, pattern := range f.config.Filenames {
   446  					logger.Debugf("Matching %s with %s", pattern, event.Name)
   447  
   448  					matched, err = filepath.Match(pattern, event.Name)
   449  					if err != nil {
   450  						logger.Errorf("Could not match pattern : %s", err)
   451  						continue
   452  					}
   453  
   454  					if matched {
   455  						logger.Debugf("Matched %s with %s", pattern, event.Name)
   456  						break
   457  					}
   458  				}
   459  
   460  				if !matched {
   461  					continue
   462  				}
   463  
   464  				// before opening the file, check if we need to specifically avoid it. (XXX)
   465  				skip := false
   466  
   467  				for _, pattern := range f.exclude_regexps {
   468  					if pattern.MatchString(event.Name) {
   469  						f.logger.Infof("file %s matches exclusion pattern %s, skipping", event.Name, pattern.String())
   470  
   471  						skip = true
   472  
   473  						break
   474  					}
   475  				}
   476  
   477  				if skip {
   478  					continue
   479  				}
   480  
   481  				f.tailMapMutex.RLock()
   482  				if f.tails[event.Name] {
   483  					f.tailMapMutex.RUnlock()
   484  					// we already have a tail on it, do not start a new one
   485  					logger.Debugf("Already tailing file %s, not creating a new tail", event.Name)
   486  
   487  					break
   488  				}
   489  				f.tailMapMutex.RUnlock()
   490  				// cf. https://github.com/crowdsecurity/crowdsec/issues/1168
   491  				// do not rely on stat, reclose file immediately as it's opened by Tail
   492  				fd, err := os.Open(event.Name)
   493  				if err != nil {
   494  					f.logger.Errorf("unable to read %s : %s", event.Name, err)
   495  					continue
   496  				}
   497  				if err := fd.Close(); err != nil {
   498  					f.logger.Errorf("unable to close %s : %s", event.Name, err)
   499  					continue
   500  				}
   501  
   502  				pollFile := false
   503  				if f.config.PollWithoutInotify != nil {
   504  					pollFile = *f.config.PollWithoutInotify
   505  				} else {
   506  					networkFS, fsType, err := types.IsNetworkFS(event.Name)
   507  					if err != nil {
   508  						f.logger.Warningf("Could not get fs type for %s : %s", event.Name, err)
   509  					}
   510  					f.logger.Debugf("fs for %s is network: %t (%s)", event.Name, networkFS, fsType)
   511  					if networkFS {
   512  						pollFile = true
   513  					}
   514  				}
   515  
   516  				filink, err := os.Lstat(event.Name)
   517  
   518  				if err != nil {
   519  					logger.Errorf("Could not lstat() new file %s, ignoring it : %s", event.Name, err)
   520  					continue
   521  				}
   522  
   523  				if filink.Mode()&os.ModeSymlink == os.ModeSymlink && !pollFile {
   524  					logger.Warnf("File %s is a symlink, but inotify polling is enabled. Crowdsec will not be able to detect rotation. Consider setting poll_without_inotify to true in your configuration", event.Name)
   525  				}
   526  
   527  				//Slightly different parameters for Location, as we want to read the first lines of the newly created file
   528  				tail, err := tail.TailFile(event.Name, tail.Config{ReOpen: true, Follow: true, Poll: pollFile, Location: &tail.SeekInfo{Offset: 0, Whence: io.SeekStart}})
   529  				if err != nil {
   530  					logger.Errorf("Could not start tailing file %s : %s", event.Name, err)
   531  					break
   532  				}
   533  
   534  				f.tailMapMutex.Lock()
   535  				f.tails[event.Name] = true
   536  				f.tailMapMutex.Unlock()
   537  				t.Go(func() error {
   538  					defer trace.CatchPanic("crowdsec/acquis/tailfile")
   539  					return f.tailFile(out, t, tail)
   540  				})
   541  			}
   542  		case err, ok := <-f.watcher.Errors:
   543  			if !ok {
   544  				return nil
   545  			}
   546  
   547  			logger.Errorf("Error while monitoring folder: %s", err)
   548  		case <-t.Dying():
   549  			err := f.watcher.Close()
   550  			if err != nil {
   551  				return fmt.Errorf("could not remove all inotify watches: %w", err)
   552  			}
   553  
   554  			return nil
   555  		}
   556  	}
   557  }
   558  
   559  func (f *FileSource) tailFile(out chan types.Event, t *tomb.Tomb, tail *tail.Tail) error {
   560  	logger := f.logger.WithField("tail", tail.Filename)
   561  	logger.Debugf("-> Starting tail of %s", tail.Filename)
   562  
   563  	for {
   564  		select {
   565  		case <-t.Dying():
   566  			logger.Infof("File datasource %s stopping", tail.Filename)
   567  
   568  			if err := tail.Stop(); err != nil {
   569  				f.logger.Errorf("error in stop : %s", err)
   570  				return err
   571  			}
   572  
   573  			return nil
   574  		case <-tail.Dying(): // our tailer is dying
   575  			err := tail.Err()
   576  			errMsg := fmt.Sprintf("file reader of %s died", tail.Filename)
   577  			if err != nil {
   578  				errMsg = fmt.Sprintf(errMsg+" : %s", err)
   579  			}
   580  
   581  			logger.Warningf(errMsg)
   582  
   583  			return nil
   584  		case line := <-tail.Lines:
   585  			if line == nil {
   586  				logger.Warningf("tail for %s is empty", tail.Filename)
   587  				continue
   588  			}
   589  
   590  			if line.Err != nil {
   591  				logger.Warningf("fetch error : %v", line.Err)
   592  				return line.Err
   593  			}
   594  
   595  			if line.Text == "" { // skip empty lines
   596  				continue
   597  			}
   598  
   599  			if f.metricsLevel != configuration.METRICS_NONE {
   600  				linesRead.With(prometheus.Labels{"source": tail.Filename}).Inc()
   601  			}
   602  
   603  			src := tail.Filename
   604  			if f.metricsLevel == configuration.METRICS_AGGREGATE {
   605  				src = filepath.Base(tail.Filename)
   606  			}
   607  
   608  			l := types.Line{
   609  				Raw:     trimLine(line.Text),
   610  				Labels:  f.config.Labels,
   611  				Time:    line.Time,
   612  				Src:     src,
   613  				Process: true,
   614  				Module:  f.GetName(),
   615  			}
   616  			// we're tailing, it must be real time logs
   617  			logger.Debugf("pushing %+v", l)
   618  
   619  			expectMode := types.LIVE
   620  			if f.config.UseTimeMachine {
   621  				expectMode = types.TIMEMACHINE
   622  			}
   623  			out <- types.Event{Line: l, Process: true, Type: types.LOG, ExpectMode: expectMode}
   624  		}
   625  	}
   626  }
   627  
   628  func (f *FileSource) readFile(filename string, out chan types.Event, t *tomb.Tomb) error {
   629  	var scanner *bufio.Scanner
   630  
   631  	logger := f.logger.WithField("oneshot", filename)
   632  	fd, err := os.Open(filename)
   633  
   634  	if err != nil {
   635  		return fmt.Errorf("failed opening %s: %w", filename, err)
   636  	}
   637  
   638  	defer fd.Close()
   639  
   640  	if strings.HasSuffix(filename, ".gz") {
   641  		gz, err := gzip.NewReader(fd)
   642  		if err != nil {
   643  			logger.Errorf("Failed to read gz file: %s", err)
   644  			return fmt.Errorf("failed to read gz %s: %w", filename, err)
   645  		}
   646  
   647  		defer gz.Close()
   648  		scanner = bufio.NewScanner(gz)
   649  	} else {
   650  		scanner = bufio.NewScanner(fd)
   651  	}
   652  
   653  	scanner.Split(bufio.ScanLines)
   654  
   655  	if f.config.MaxBufferSize > 0 {
   656  		buf := make([]byte, 0, 64*1024)
   657  		scanner.Buffer(buf, f.config.MaxBufferSize)
   658  	}
   659  
   660  	for scanner.Scan() {
   661  		select {
   662  		case <-t.Dying():
   663  			logger.Infof("File datasource %s stopping", filename)
   664  			return nil
   665  		default:
   666  			if scanner.Text() == "" {
   667  				continue
   668  			}
   669  
   670  			l := types.Line{
   671  				Raw:     scanner.Text(),
   672  				Time:    time.Now().UTC(),
   673  				Src:     filename,
   674  				Labels:  f.config.Labels,
   675  				Process: true,
   676  				Module:  f.GetName(),
   677  			}
   678  			logger.Debugf("line %s", l.Raw)
   679  			linesRead.With(prometheus.Labels{"source": filename}).Inc()
   680  
   681  			// we're reading logs at once, it must be time-machine buckets
   682  			out <- types.Event{Line: l, Process: true, Type: types.LOG, ExpectMode: types.TIMEMACHINE}
   683  		}
   684  	}
   685  
   686  	if err := scanner.Err(); err != nil {
   687  		logger.Errorf("Error while reading file: %s", err)
   688  		t.Kill(err)
   689  
   690  		return err
   691  	}
   692  
   693  	t.Kill(nil)
   694  
   695  	return nil
   696  }