github.com/crowdsecurity/crowdsec@v1.6.1/pkg/acquisition/acquisition.go (about)

     1  package acquisition
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"os"
     8  	"strings"
     9  
    10  	"github.com/antonmedv/expr"
    11  	"github.com/antonmedv/expr/vm"
    12  	"github.com/google/uuid"
    13  	"github.com/prometheus/client_golang/prometheus"
    14  	log "github.com/sirupsen/logrus"
    15  	tomb "gopkg.in/tomb.v2"
    16  	"gopkg.in/yaml.v2"
    17  
    18  	"github.com/crowdsecurity/go-cs-lib/trace"
    19  
    20  	"github.com/crowdsecurity/crowdsec/pkg/acquisition/configuration"
    21  	appsecacquisition "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/appsec"
    22  	cloudwatchacquisition "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/cloudwatch"
    23  	dockeracquisition "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/docker"
    24  	fileacquisition "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/file"
    25  	journalctlacquisition "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/journalctl"
    26  	kafkaacquisition "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/kafka"
    27  	kinesisacquisition "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/kinesis"
    28  	k8sauditacquisition "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/kubernetesaudit"
    29  	lokiacquisition "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/loki"
    30  	s3acquisition "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/s3"
    31  	syslogacquisition "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/syslog"
    32  	wineventlogacquisition "github.com/crowdsecurity/crowdsec/pkg/acquisition/modules/wineventlog"
    33  	"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
    34  
    35  	"github.com/crowdsecurity/crowdsec/pkg/csconfig"
    36  	"github.com/crowdsecurity/crowdsec/pkg/types"
    37  )
    38  
    39  type DataSourceUnavailableError struct {
    40  	Name string
    41  	Err  error
    42  }
    43  
    44  func (e *DataSourceUnavailableError) Error() string {
    45  	return fmt.Sprintf("datasource '%s' is not available: %v", e.Name, e.Err)
    46  }
    47  
    48  func (e *DataSourceUnavailableError) Unwrap() error {
    49  	return e.Err
    50  }
    51  
    52  // The interface each datasource must implement
    53  type DataSource interface {
    54  	GetMetrics() []prometheus.Collector                                 // Returns pointers to metrics that are managed by the module
    55  	GetAggregMetrics() []prometheus.Collector                           // Returns pointers to metrics that are managed by the module (aggregated mode, limits cardinality)
    56  	UnmarshalConfig([]byte) error                                       // Decode and pre-validate the YAML datasource - anything that can be checked before runtime
    57  	Configure([]byte, *log.Entry, int) error                            // Complete the YAML datasource configuration and perform runtime checks.
    58  	ConfigureByDSN(string, map[string]string, *log.Entry, string) error // Configure the datasource
    59  	GetMode() string                                                    // Get the mode (TAIL, CAT or SERVER)
    60  	GetName() string                                                    // Get the name of the module
    61  	OneShotAcquisition(chan types.Event, *tomb.Tomb) error              // Start one shot acquisition(eg, cat a file)
    62  	StreamingAcquisition(chan types.Event, *tomb.Tomb) error            // Start live acquisition (eg, tail a file)
    63  	CanRun() error                                                      // Whether the datasource can run or not (eg, journalctl on BSD is a non-sense)
    64  	GetUuid() string                                                    // Get the unique identifier of the datasource
    65  	Dump() interface{}
    66  }
    67  
    68  var AcquisitionSources = map[string]func() DataSource{
    69  	"file":        func() DataSource { return &fileacquisition.FileSource{} },
    70  	"journalctl":  func() DataSource { return &journalctlacquisition.JournalCtlSource{} },
    71  	"cloudwatch":  func() DataSource { return &cloudwatchacquisition.CloudwatchSource{} },
    72  	"syslog":      func() DataSource { return &syslogacquisition.SyslogSource{} },
    73  	"docker":      func() DataSource { return &dockeracquisition.DockerSource{} },
    74  	"kinesis":     func() DataSource { return &kinesisacquisition.KinesisSource{} },
    75  	"wineventlog": func() DataSource { return &wineventlogacquisition.WinEventLogSource{} },
    76  	"kafka":       func() DataSource { return &kafkaacquisition.KafkaSource{} },
    77  	"k8s-audit":   func() DataSource { return &k8sauditacquisition.KubernetesAuditSource{} },
    78  	"loki":        func() DataSource { return &lokiacquisition.LokiSource{} },
    79  	"s3":          func() DataSource { return &s3acquisition.S3Source{} },
    80  	"appsec":      func() DataSource { return &appsecacquisition.AppsecSource{} },
    81  }
    82  
    83  var transformRuntimes = map[string]*vm.Program{}
    84  
    85  func GetDataSourceIface(dataSourceType string) DataSource {
    86  	source := AcquisitionSources[dataSourceType]
    87  	if source == nil {
    88  		return nil
    89  	}
    90  	return source()
    91  }
    92  
    93  // DataSourceConfigure creates and returns a DataSource object from a configuration,
    94  // if the configuration is not valid it returns an error.
    95  // If the datasource can't be run (eg. journalctl not available), it still returns an error which
    96  // can be checked for the appropriate action.
    97  func DataSourceConfigure(commonConfig configuration.DataSourceCommonCfg, metricsLevel int) (*DataSource, error) {
    98  	// we dump it back to []byte, because we want to decode the yaml blob twice:
    99  	// once to DataSourceCommonCfg, and then later to the dedicated type of the datasource
   100  	yamlConfig, err := yaml.Marshal(commonConfig)
   101  	if err != nil {
   102  		return nil, fmt.Errorf("unable to marshal back interface: %w", err)
   103  	}
   104  	if dataSrc := GetDataSourceIface(commonConfig.Source); dataSrc != nil {
   105  		/* this logger will then be used by the datasource at runtime */
   106  		clog := log.New()
   107  		if err := types.ConfigureLogger(clog); err != nil {
   108  			return nil, fmt.Errorf("while configuring datasource logger: %w", err)
   109  		}
   110  		if commonConfig.LogLevel != nil {
   111  			clog.SetLevel(*commonConfig.LogLevel)
   112  		}
   113  		customLog := log.Fields{
   114  			"type": commonConfig.Source,
   115  		}
   116  		if commonConfig.Name != "" {
   117  			customLog["name"] = commonConfig.Name
   118  		}
   119  		subLogger := clog.WithFields(customLog)
   120  		/* check eventual dependencies are satisfied (ie. journald will check journalctl availability) */
   121  		if err := dataSrc.CanRun(); err != nil {
   122  			return nil, &DataSourceUnavailableError{Name: commonConfig.Source, Err: err}
   123  		}
   124  		/* configure the actual datasource */
   125  		if err := dataSrc.Configure(yamlConfig, subLogger, metricsLevel); err != nil {
   126  			return nil, fmt.Errorf("failed to configure datasource %s: %w", commonConfig.Source, err)
   127  
   128  		}
   129  		return &dataSrc, nil
   130  	}
   131  	return nil, fmt.Errorf("cannot find source %s", commonConfig.Source)
   132  }
   133  
   134  // detectBackwardCompatAcquis: try to magically detect the type for backward compat (type was not mandatory then)
   135  func detectBackwardCompatAcquis(sub configuration.DataSourceCommonCfg) string {
   136  	if _, ok := sub.Config["filename"]; ok {
   137  		return "file"
   138  	}
   139  	if _, ok := sub.Config["filenames"]; ok {
   140  		return "file"
   141  	}
   142  	if _, ok := sub.Config["journalctl_filter"]; ok {
   143  		return "journalctl"
   144  	}
   145  	return ""
   146  }
   147  
   148  func LoadAcquisitionFromDSN(dsn string, labels map[string]string, transformExpr string) ([]DataSource, error) {
   149  	var sources []DataSource
   150  
   151  	frags := strings.Split(dsn, ":")
   152  	if len(frags) == 1 {
   153  		return nil, fmt.Errorf("%s isn't valid dsn (no protocol)", dsn)
   154  	}
   155  	dataSrc := GetDataSourceIface(frags[0])
   156  	if dataSrc == nil {
   157  		return nil, fmt.Errorf("no acquisition for protocol %s://", frags[0])
   158  	}
   159  	/* this logger will then be used by the datasource at runtime */
   160  	clog := log.New()
   161  	if err := types.ConfigureLogger(clog); err != nil {
   162  		return nil, fmt.Errorf("while configuring datasource logger: %w", err)
   163  	}
   164  	subLogger := clog.WithFields(log.Fields{
   165  		"type": dsn,
   166  	})
   167  	uniqueId := uuid.NewString()
   168  	if transformExpr != "" {
   169  		vm, err := expr.Compile(transformExpr, exprhelpers.GetExprOptions(map[string]interface{}{"evt": &types.Event{}})...)
   170  		if err != nil {
   171  			return nil, fmt.Errorf("while compiling transform expression '%s': %w", transformExpr, err)
   172  		}
   173  		transformRuntimes[uniqueId] = vm
   174  	}
   175  	err := dataSrc.ConfigureByDSN(dsn, labels, subLogger, uniqueId)
   176  	if err != nil {
   177  		return nil, fmt.Errorf("while configuration datasource for %s: %w", dsn, err)
   178  	}
   179  	sources = append(sources, dataSrc)
   180  	return sources, nil
   181  }
   182  
   183  func GetMetricsLevelFromPromCfg(prom *csconfig.PrometheusCfg) int {
   184  	if prom == nil {
   185  		return configuration.METRICS_FULL
   186  
   187  	}
   188  	if !prom.Enabled {
   189  		return configuration.METRICS_NONE
   190  	}
   191  	if prom.Level == configuration.CFG_METRICS_AGGREGATE {
   192  		return configuration.METRICS_AGGREGATE
   193  	}
   194  
   195  	if prom.Level == configuration.CFG_METRICS_FULL {
   196  		return configuration.METRICS_FULL
   197  	}
   198  	return configuration.METRICS_FULL
   199  
   200  }
   201  
   202  // LoadAcquisitionFromFile unmarshals the configuration item and checks its availability
   203  func LoadAcquisitionFromFile(config *csconfig.CrowdsecServiceCfg, prom *csconfig.PrometheusCfg) ([]DataSource, error) {
   204  	var sources []DataSource
   205  
   206  	metrics_level := GetMetricsLevelFromPromCfg(prom)
   207  	for _, acquisFile := range config.AcquisitionFiles {
   208  		log.Infof("loading acquisition file : %s", acquisFile)
   209  		yamlFile, err := os.Open(acquisFile)
   210  		if err != nil {
   211  			return nil, err
   212  		}
   213  		dec := yaml.NewDecoder(yamlFile)
   214  		dec.SetStrict(true)
   215  		idx := -1
   216  		for {
   217  			var sub configuration.DataSourceCommonCfg
   218  			err = dec.Decode(&sub)
   219  			idx += 1
   220  			if err != nil {
   221  				if !errors.Is(err, io.EOF) {
   222  					return nil, fmt.Errorf("failed to yaml decode %s: %w", acquisFile, err)
   223  				}
   224  				log.Tracef("End of yaml file")
   225  				break
   226  			}
   227  
   228  			//for backward compat ('type' was not mandatory, detect it)
   229  			if guessType := detectBackwardCompatAcquis(sub); guessType != "" {
   230  				sub.Source = guessType
   231  			}
   232  			//it's an empty item, skip it
   233  			if len(sub.Labels) == 0 {
   234  				if sub.Source == "" {
   235  					log.Debugf("skipping empty item in %s", acquisFile)
   236  					continue
   237  				}
   238  				return nil, fmt.Errorf("missing labels in %s (position: %d)", acquisFile, idx)
   239  			}
   240  			if sub.Source == "" {
   241  				return nil, fmt.Errorf("data source type is empty ('source') in %s (position: %d)", acquisFile, idx)
   242  			}
   243  			if GetDataSourceIface(sub.Source) == nil {
   244  				return nil, fmt.Errorf("unknown data source %s in %s (position: %d)", sub.Source, acquisFile, idx)
   245  			}
   246  			uniqueId := uuid.NewString()
   247  			sub.UniqueId = uniqueId
   248  			src, err := DataSourceConfigure(sub, metrics_level)
   249  			if err != nil {
   250  				var dserr *DataSourceUnavailableError
   251  				if errors.As(err, &dserr) {
   252  					log.Error(err)
   253  					continue
   254  				}
   255  				return nil, fmt.Errorf("while configuring datasource of type %s from %s (position: %d): %w", sub.Source, acquisFile, idx, err)
   256  			}
   257  			if sub.TransformExpr != "" {
   258  				vm, err := expr.Compile(sub.TransformExpr, exprhelpers.GetExprOptions(map[string]interface{}{"evt": &types.Event{}})...)
   259  				if err != nil {
   260  					return nil, fmt.Errorf("while compiling transform expression '%s' for datasource %s in %s (position: %d): %w", sub.TransformExpr, sub.Source, acquisFile, idx, err)
   261  				}
   262  				transformRuntimes[uniqueId] = vm
   263  			}
   264  			sources = append(sources, *src)
   265  		}
   266  	}
   267  	return sources, nil
   268  }
   269  
   270  func GetMetrics(sources []DataSource, aggregated bool) error {
   271  	var metrics []prometheus.Collector
   272  	for i := 0; i < len(sources); i++ {
   273  		if aggregated {
   274  			metrics = sources[i].GetMetrics()
   275  		} else {
   276  			metrics = sources[i].GetAggregMetrics()
   277  		}
   278  		for _, metric := range metrics {
   279  			if err := prometheus.Register(metric); err != nil {
   280  				if _, ok := err.(prometheus.AlreadyRegisteredError); !ok {
   281  					return fmt.Errorf("could not register metrics for datasource %s: %w", sources[i].GetName(), err)
   282  				}
   283  				// ignore the error
   284  			}
   285  		}
   286  	}
   287  	return nil
   288  }
   289  
   290  func transform(transformChan chan types.Event, output chan types.Event, AcquisTomb *tomb.Tomb, transformRuntime *vm.Program, logger *log.Entry) {
   291  	defer trace.CatchPanic("crowdsec/acquis")
   292  	logger.Infof("transformer started")
   293  	for {
   294  		select {
   295  		case <-AcquisTomb.Dying():
   296  			logger.Debugf("transformer is dying")
   297  			return
   298  		case evt := <-transformChan:
   299  			logger.Tracef("Received event %s", evt.Line.Raw)
   300  			out, err := expr.Run(transformRuntime, map[string]interface{}{"evt": &evt})
   301  			if err != nil {
   302  				logger.Errorf("while running transform expression: %s, sending event as-is", err)
   303  				output <- evt
   304  			}
   305  			if out == nil {
   306  				logger.Errorf("transform expression returned nil, sending event as-is")
   307  				output <- evt
   308  			}
   309  			switch v := out.(type) {
   310  			case string:
   311  				logger.Tracef("transform expression returned %s", v)
   312  				evt.Line.Raw = v
   313  				output <- evt
   314  			case []interface{}:
   315  				logger.Tracef("transform expression returned %v", v) //nolint:asasalint // We actually want to log the slice content
   316  				for _, line := range v {
   317  					l, ok := line.(string)
   318  					if !ok {
   319  						logger.Errorf("transform expression returned []interface{}, but cannot assert an element to string")
   320  						output <- evt
   321  						continue
   322  					}
   323  					evt.Line.Raw = l
   324  					output <- evt
   325  				}
   326  			case []string:
   327  				logger.Tracef("transform expression returned %v", v)
   328  				for _, line := range v {
   329  					evt.Line.Raw = line
   330  					output <- evt
   331  				}
   332  			default:
   333  				logger.Errorf("transform expression returned an invalid type %T, sending event as-is", out)
   334  				output <- evt
   335  			}
   336  		}
   337  	}
   338  }
   339  
   340  func StartAcquisition(sources []DataSource, output chan types.Event, AcquisTomb *tomb.Tomb) error {
   341  	// Don't wait if we have no sources, as it will hang forever
   342  	if len(sources) == 0 {
   343  		return nil
   344  	}
   345  
   346  	for i := 0; i < len(sources); i++ {
   347  		subsrc := sources[i] //ensure its a copy
   348  		log.Debugf("starting one source %d/%d ->> %T", i, len(sources), subsrc)
   349  
   350  		AcquisTomb.Go(func() error {
   351  			defer trace.CatchPanic("crowdsec/acquis")
   352  			var err error
   353  
   354  			outChan := output
   355  			log.Debugf("datasource %s UUID: %s", subsrc.GetName(), subsrc.GetUuid())
   356  			if transformRuntime, ok := transformRuntimes[subsrc.GetUuid()]; ok {
   357  				log.Infof("transform expression found for datasource %s", subsrc.GetName())
   358  				transformChan := make(chan types.Event)
   359  				outChan = transformChan
   360  				transformLogger := log.WithFields(log.Fields{
   361  					"component":  "transform",
   362  					"datasource": subsrc.GetName(),
   363  				})
   364  				AcquisTomb.Go(func() error {
   365  					transform(outChan, output, AcquisTomb, transformRuntime, transformLogger)
   366  					return nil
   367  				})
   368  			}
   369  			if subsrc.GetMode() == configuration.TAIL_MODE {
   370  				err = subsrc.StreamingAcquisition(outChan, AcquisTomb)
   371  			} else {
   372  				err = subsrc.OneShotAcquisition(outChan, AcquisTomb)
   373  			}
   374  			if err != nil {
   375  				//if one of the acqusition returns an error, we kill the others to properly shutdown
   376  				AcquisTomb.Kill(err)
   377  			}
   378  			return nil
   379  		})
   380  	}
   381  
   382  	/*return only when acquisition is over (cat) or never (tail)*/
   383  	err := AcquisTomb.Wait()
   384  	return err
   385  }