github.com/crowdsecurity/crowdsec@v1.6.1/pkg/leakybucket/manager_load.go (about)

     1  package leakybucket
     2  
     3  import (
     4  	"encoding/json"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/antonmedv/expr"
    15  	"github.com/antonmedv/expr/vm"
    16  	"github.com/davecgh/go-spew/spew"
    17  	"github.com/goombaio/namegenerator"
    18  	log "github.com/sirupsen/logrus"
    19  	"gopkg.in/tomb.v2"
    20  	yaml "gopkg.in/yaml.v2"
    21  
    22  	"github.com/crowdsecurity/crowdsec/pkg/alertcontext"
    23  	"github.com/crowdsecurity/crowdsec/pkg/csconfig"
    24  	"github.com/crowdsecurity/crowdsec/pkg/cwhub"
    25  	"github.com/crowdsecurity/crowdsec/pkg/cwversion"
    26  	"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
    27  	"github.com/crowdsecurity/crowdsec/pkg/types"
    28  )
    29  
    30  // BucketFactory struct holds all fields for any bucket configuration. This is to have a
    31  // generic struct for buckets. This can be seen as a bucket factory.
    32  type BucketFactory struct {
    33  	FormatVersion       string                 `yaml:"format"`
    34  	Author              string                 `yaml:"author"`
    35  	Description         string                 `yaml:"description"`
    36  	References          []string               `yaml:"references"`
    37  	Type                string                 `yaml:"type"`                //Type can be : leaky, counter, trigger. It determines the main bucket characteristics
    38  	Name                string                 `yaml:"name"`                //Name of the bucket, used later in log and user-messages. Should be unique
    39  	Capacity            int                    `yaml:"capacity"`            //Capacity is applicable to leaky buckets and determines the "burst" capacity
    40  	LeakSpeed           string                 `yaml:"leakspeed"`           //Leakspeed is a float representing how many events per second leak out of the bucket
    41  	Duration            string                 `yaml:"duration"`            //Duration allows 'counter' buckets to have a fixed life-time
    42  	Filter              string                 `yaml:"filter"`              //Filter is an expr that determines if an event is elligible for said bucket. Filter is evaluated against the Event struct
    43  	GroupBy             string                 `yaml:"groupby,omitempty"`   //groupy is an expr that allows to determine the partitions of the bucket. A common example is the source_ip
    44  	Distinct            string                 `yaml:"distinct"`            //Distinct, when present, adds a `Pour()` processor that will only pour uniq items (based on distinct expr result)
    45  	Debug               bool                   `yaml:"debug"`               //Debug, when set to true, will enable debugging for _this_ scenario specifically
    46  	Labels              map[string]interface{} `yaml:"labels"`              //Labels is K:V list aiming at providing context the overflow
    47  	Blackhole           string                 `yaml:"blackhole,omitempty"` //Blackhole is a duration that, if present, will prevent same bucket partition to overflow more often than $duration
    48  	logger              *log.Entry             `yaml:"-"`                   //logger is bucket-specific logger (used by Debug as well)
    49  	Reprocess           bool                   `yaml:"reprocess"`           //Reprocess, if true, will for the bucket to be re-injected into processing chain
    50  	CacheSize           int                    `yaml:"cache_size"`          //CacheSize, if > 0, limits the size of in-memory cache of the bucket
    51  	Profiling           bool                   `yaml:"profiling"`           //Profiling, if true, will make the bucket record pours/overflows/etc.
    52  	OverflowFilter      string                 `yaml:"overflow_filter"`     //OverflowFilter if present, is a filter that must return true for the overflow to go through
    53  	ConditionalOverflow string                 `yaml:"condition"`           //condition if present, is an expression that must return true for the bucket to overflow
    54  	BayesianPrior       float32                `yaml:"bayesian_prior"`
    55  	BayesianThreshold   float32                `yaml:"bayesian_threshold"`
    56  	BayesianConditions  []RawBayesianCondition `yaml:"bayesian_conditions"` //conditions for the bayesian bucket
    57  	ScopeType           types.ScopeType        `yaml:"scope,omitempty"`     //to enforce a different remediation than blocking an IP. Will default this to IP
    58  	BucketName          string                 `yaml:"-"`
    59  	Filename            string                 `yaml:"-"`
    60  	RunTimeFilter       *vm.Program            `json:"-"`
    61  	RunTimeGroupBy      *vm.Program            `json:"-"`
    62  	Data                []*types.DataSource    `yaml:"data,omitempty"`
    63  	DataDir             string                 `yaml:"-"`
    64  	CancelOnFilter      string                 `yaml:"cancel_on,omitempty"` //a filter that, if matched, kills the bucket
    65  	leakspeed           time.Duration          //internal representation of `Leakspeed`
    66  	duration            time.Duration          //internal representation of `Duration`
    67  	ret                 chan types.Event       //the bucket-specific output chan for overflows
    68  	processors          []Processor            //processors is the list of hooks for pour/overflow/create (cf. uniq, blackhole etc.)
    69  	output              bool                   //??
    70  	ScenarioVersion     string                 `yaml:"version,omitempty"`
    71  	hash                string                 `yaml:"-"`
    72  	Simulated           bool                   `yaml:"simulated"` //Set to true if the scenario instantiating the bucket was in the exclusion list
    73  	tomb                *tomb.Tomb             `yaml:"-"`
    74  	wgPour              *sync.WaitGroup        `yaml:"-"`
    75  	wgDumpState         *sync.WaitGroup        `yaml:"-"`
    76  	orderEvent          bool
    77  }
    78  
    79  // we use one NameGenerator for all the future buckets
    80  var seed namegenerator.Generator = namegenerator.NewNameGenerator(time.Now().UTC().UnixNano())
    81  
    82  func ValidateFactory(bucketFactory *BucketFactory) error {
    83  	if bucketFactory.Name == "" {
    84  		return fmt.Errorf("bucket must have name")
    85  	}
    86  	if bucketFactory.Description == "" {
    87  		return fmt.Errorf("description is mandatory")
    88  	}
    89  	if bucketFactory.Type == "leaky" {
    90  		if bucketFactory.Capacity <= 0 { //capacity must be a positive int
    91  			return fmt.Errorf("bad capacity for leaky '%d'", bucketFactory.Capacity)
    92  		}
    93  		if bucketFactory.LeakSpeed == "" {
    94  			return fmt.Errorf("leakspeed can't be empty for leaky")
    95  		}
    96  		if bucketFactory.leakspeed == 0 {
    97  			return fmt.Errorf("bad leakspeed for leaky '%s'", bucketFactory.LeakSpeed)
    98  		}
    99  	} else if bucketFactory.Type == "counter" {
   100  		if bucketFactory.Duration == "" {
   101  			return fmt.Errorf("duration can't be empty for counter")
   102  		}
   103  		if bucketFactory.duration == 0 {
   104  			return fmt.Errorf("bad duration for counter bucket '%d'", bucketFactory.duration)
   105  		}
   106  		if bucketFactory.Capacity != -1 {
   107  			return fmt.Errorf("counter bucket must have -1 capacity")
   108  		}
   109  	} else if bucketFactory.Type == "trigger" {
   110  		if bucketFactory.Capacity != 0 {
   111  			return fmt.Errorf("trigger bucket must have 0 capacity")
   112  		}
   113  	} else if bucketFactory.Type == "conditional" {
   114  		if bucketFactory.ConditionalOverflow == "" {
   115  			return fmt.Errorf("conditional bucket must have a condition")
   116  		}
   117  		if bucketFactory.Capacity != -1 {
   118  			bucketFactory.logger.Warnf("Using a value different than -1 as capacity for conditional bucket, this may lead to unexpected overflows")
   119  		}
   120  		if bucketFactory.LeakSpeed == "" {
   121  			return fmt.Errorf("leakspeed can't be empty for conditional bucket")
   122  		}
   123  		if bucketFactory.leakspeed == 0 {
   124  			return fmt.Errorf("bad leakspeed for conditional bucket '%s'", bucketFactory.LeakSpeed)
   125  		}
   126  	} else if bucketFactory.Type == "bayesian" {
   127  		if bucketFactory.BayesianConditions == nil {
   128  			return fmt.Errorf("bayesian bucket must have bayesian conditions")
   129  		}
   130  		if bucketFactory.BayesianPrior == 0 {
   131  			return fmt.Errorf("bayesian bucket must have a valid, non-zero prior")
   132  		}
   133  		if bucketFactory.BayesianThreshold == 0 {
   134  			return fmt.Errorf("bayesian bucket must have a valid, non-zero threshold")
   135  		}
   136  		if bucketFactory.BayesianPrior > 1 {
   137  			return fmt.Errorf("bayesian bucket must have a valid, non-zero prior")
   138  		}
   139  		if bucketFactory.BayesianThreshold > 1 {
   140  			return fmt.Errorf("bayesian bucket must have a valid, non-zero threshold")
   141  		}
   142  		if bucketFactory.Capacity != -1 {
   143  			return fmt.Errorf("bayesian bucket must have capacity -1")
   144  		}
   145  	} else {
   146  		return fmt.Errorf("unknown bucket type '%s'", bucketFactory.Type)
   147  	}
   148  
   149  	switch bucketFactory.ScopeType.Scope {
   150  	case types.Undefined:
   151  		bucketFactory.ScopeType.Scope = types.Ip
   152  	case types.Ip:
   153  	case types.Range:
   154  		var (
   155  			runTimeFilter *vm.Program
   156  			err           error
   157  		)
   158  		if bucketFactory.ScopeType.Filter != "" {
   159  			if runTimeFilter, err = expr.Compile(bucketFactory.ScopeType.Filter, exprhelpers.GetExprOptions(map[string]interface{}{"evt": &types.Event{}})...); err != nil {
   160  				return fmt.Errorf("Error compiling the scope filter: %s", err)
   161  			}
   162  			bucketFactory.ScopeType.RunTimeFilter = runTimeFilter
   163  		}
   164  
   165  	default:
   166  		//Compile the scope filter
   167  		var (
   168  			runTimeFilter *vm.Program
   169  			err           error
   170  		)
   171  		if bucketFactory.ScopeType.Filter != "" {
   172  			if runTimeFilter, err = expr.Compile(bucketFactory.ScopeType.Filter, exprhelpers.GetExprOptions(map[string]interface{}{"evt": &types.Event{}})...); err != nil {
   173  				return fmt.Errorf("Error compiling the scope filter: %s", err)
   174  			}
   175  			bucketFactory.ScopeType.RunTimeFilter = runTimeFilter
   176  		}
   177  	}
   178  	return nil
   179  }
   180  
   181  func LoadBuckets(cscfg *csconfig.CrowdsecServiceCfg, hub *cwhub.Hub, files []string, tomb *tomb.Tomb, buckets *Buckets, orderEvent bool) ([]BucketFactory, chan types.Event, error) {
   182  	var (
   183  		ret      = []BucketFactory{}
   184  		response chan types.Event
   185  	)
   186  
   187  	response = make(chan types.Event, 1)
   188  	for _, f := range files {
   189  		log.Debugf("Loading '%s'", f)
   190  		if !strings.HasSuffix(f, ".yaml") && !strings.HasSuffix(f, ".yml") {
   191  			log.Debugf("Skipping %s : not a yaml file", f)
   192  			continue
   193  		}
   194  
   195  		//process the yaml
   196  		bucketConfigurationFile, err := os.Open(f)
   197  		if err != nil {
   198  			log.Errorf("Can't access leaky configuration file %s", f)
   199  			return nil, nil, err
   200  		}
   201  		defer bucketConfigurationFile.Close()
   202  		dec := yaml.NewDecoder(bucketConfigurationFile)
   203  		dec.SetStrict(true)
   204  		for {
   205  			bucketFactory := BucketFactory{}
   206  			err = dec.Decode(&bucketFactory)
   207  			if err != nil {
   208  				if !errors.Is(err, io.EOF) {
   209  					log.Errorf("Bad yaml in %s : %v", f, err)
   210  					return nil, nil, fmt.Errorf("bad yaml in %s : %v", f, err)
   211  				}
   212  				log.Tracef("End of yaml file")
   213  				break
   214  			}
   215  			bucketFactory.DataDir = hub.GetDataDir()
   216  			//check empty
   217  			if bucketFactory.Name == "" {
   218  				log.Errorf("Won't load nameless bucket")
   219  				return nil, nil, fmt.Errorf("nameless bucket")
   220  			}
   221  			//check compat
   222  			if bucketFactory.FormatVersion == "" {
   223  				log.Tracef("no version in %s : %s, assuming '1.0'", bucketFactory.Name, f)
   224  				bucketFactory.FormatVersion = "1.0"
   225  			}
   226  			ok, err := cwversion.Satisfies(bucketFactory.FormatVersion, cwversion.Constraint_scenario)
   227  			if err != nil {
   228  				return nil, nil, fmt.Errorf("failed to check version : %s", err)
   229  			}
   230  			if !ok {
   231  				log.Errorf("can't load %s : %s doesn't satisfy scenario format %s, skip", bucketFactory.Name, bucketFactory.FormatVersion, cwversion.Constraint_scenario)
   232  				continue
   233  			}
   234  
   235  			bucketFactory.Filename = filepath.Clean(f)
   236  			bucketFactory.BucketName = seed.Generate()
   237  			bucketFactory.ret = response
   238  			hubItem, err := hub.GetItemByPath(cwhub.SCENARIOS, bucketFactory.Filename)
   239  			if err != nil {
   240  				log.Errorf("scenario %s (%s) couldn't be find in hub (ignore if in unit tests)", bucketFactory.Name, bucketFactory.Filename)
   241  			} else {
   242  				if cscfg.SimulationConfig != nil {
   243  					bucketFactory.Simulated = cscfg.SimulationConfig.IsSimulated(hubItem.Name)
   244  				}
   245  				if hubItem != nil {
   246  					bucketFactory.ScenarioVersion = hubItem.State.LocalVersion
   247  					bucketFactory.hash = hubItem.State.LocalHash
   248  				} else {
   249  					log.Errorf("scenario %s (%s) couldn't be find in hub (ignore if in unit tests)", bucketFactory.Name, bucketFactory.Filename)
   250  				}
   251  			}
   252  
   253  			bucketFactory.wgDumpState = buckets.wgDumpState
   254  			bucketFactory.wgPour = buckets.wgPour
   255  			err = LoadBucket(&bucketFactory, tomb)
   256  			if err != nil {
   257  				log.Errorf("Failed to load bucket %s : %v", bucketFactory.Name, err)
   258  				return nil, nil, fmt.Errorf("loading of %s failed : %v", bucketFactory.Name, err)
   259  			}
   260  
   261  			bucketFactory.orderEvent = orderEvent
   262  
   263  			ret = append(ret, bucketFactory)
   264  		}
   265  	}
   266  
   267  	if err := alertcontext.NewAlertContext(cscfg.ContextToSend, cscfg.ConsoleContextValueLength); err != nil {
   268  		return nil, nil, fmt.Errorf("unable to load alert context: %s", err)
   269  	}
   270  
   271  	log.Infof("Loaded %d scenarios", len(ret))
   272  	return ret, response, nil
   273  }
   274  
   275  /* Init recursively process yaml files from a directory and loads them as BucketFactory */
   276  func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error {
   277  	var err error
   278  	if bucketFactory.Debug {
   279  		var clog = log.New()
   280  		if err := types.ConfigureLogger(clog); err != nil {
   281  			log.Fatalf("While creating bucket-specific logger : %s", err)
   282  		}
   283  		clog.SetLevel(log.DebugLevel)
   284  		bucketFactory.logger = clog.WithFields(log.Fields{
   285  			"cfg":  bucketFactory.BucketName,
   286  			"name": bucketFactory.Name,
   287  		})
   288  	} else {
   289  		/* else bind it to the default one (might find something more elegant here)*/
   290  		bucketFactory.logger = log.WithFields(log.Fields{
   291  			"cfg":  bucketFactory.BucketName,
   292  			"name": bucketFactory.Name,
   293  		})
   294  	}
   295  
   296  	if bucketFactory.LeakSpeed != "" {
   297  		if bucketFactory.leakspeed, err = time.ParseDuration(bucketFactory.LeakSpeed); err != nil {
   298  			return fmt.Errorf("bad leakspeed '%s' in %s : %v", bucketFactory.LeakSpeed, bucketFactory.Filename, err)
   299  		}
   300  	} else {
   301  		bucketFactory.leakspeed = time.Duration(0)
   302  	}
   303  	if bucketFactory.Duration != "" {
   304  		if bucketFactory.duration, err = time.ParseDuration(bucketFactory.Duration); err != nil {
   305  			return fmt.Errorf("invalid Duration '%s' in %s : %v", bucketFactory.Duration, bucketFactory.Filename, err)
   306  		}
   307  	}
   308  
   309  	if bucketFactory.Filter == "" {
   310  		bucketFactory.logger.Warning("Bucket without filter, abort.")
   311  		return fmt.Errorf("bucket without filter directive")
   312  	}
   313  	bucketFactory.RunTimeFilter, err = expr.Compile(bucketFactory.Filter, exprhelpers.GetExprOptions(map[string]interface{}{"evt": &types.Event{}})...)
   314  	if err != nil {
   315  		return fmt.Errorf("invalid filter '%s' in %s : %v", bucketFactory.Filter, bucketFactory.Filename, err)
   316  	}
   317  
   318  	if bucketFactory.GroupBy != "" {
   319  		bucketFactory.RunTimeGroupBy, err = expr.Compile(bucketFactory.GroupBy, exprhelpers.GetExprOptions(map[string]interface{}{"evt": &types.Event{}})...)
   320  		if err != nil {
   321  			return fmt.Errorf("invalid groupby '%s' in %s : %v", bucketFactory.GroupBy, bucketFactory.Filename, err)
   322  		}
   323  	}
   324  
   325  	bucketFactory.logger.Infof("Adding %s bucket", bucketFactory.Type)
   326  	//return the Holder corresponding to the type of bucket
   327  	bucketFactory.processors = []Processor{}
   328  	switch bucketFactory.Type {
   329  	case "leaky":
   330  		bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{})
   331  	case "trigger":
   332  		bucketFactory.processors = append(bucketFactory.processors, &Trigger{})
   333  	case "counter":
   334  		bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{})
   335  	case "conditional":
   336  		bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{})
   337  	case "bayesian":
   338  		bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{})
   339  	default:
   340  		return fmt.Errorf("invalid type '%s' in %s : %v", bucketFactory.Type, bucketFactory.Filename, err)
   341  	}
   342  
   343  	if bucketFactory.Distinct != "" {
   344  		bucketFactory.logger.Tracef("Adding a non duplicate filter")
   345  		bucketFactory.processors = append(bucketFactory.processors, &Uniq{})
   346  	}
   347  
   348  	if bucketFactory.CancelOnFilter != "" {
   349  		bucketFactory.logger.Tracef("Adding a cancel_on filter")
   350  		bucketFactory.processors = append(bucketFactory.processors, &CancelOnFilter{})
   351  	}
   352  
   353  	if bucketFactory.OverflowFilter != "" {
   354  		bucketFactory.logger.Tracef("Adding an overflow filter")
   355  		filovflw, err := NewOverflowFilter(bucketFactory)
   356  		if err != nil {
   357  			bucketFactory.logger.Errorf("Error creating overflow_filter : %s", err)
   358  			return fmt.Errorf("error creating overflow_filter : %s", err)
   359  		}
   360  		bucketFactory.processors = append(bucketFactory.processors, filovflw)
   361  	}
   362  
   363  	if bucketFactory.Blackhole != "" {
   364  		bucketFactory.logger.Tracef("Adding blackhole.")
   365  		blackhole, err := NewBlackhole(bucketFactory)
   366  		if err != nil {
   367  			bucketFactory.logger.Errorf("Error creating blackhole : %s", err)
   368  			return fmt.Errorf("error creating blackhole : %s", err)
   369  		}
   370  		bucketFactory.processors = append(bucketFactory.processors, blackhole)
   371  	}
   372  
   373  	if bucketFactory.ConditionalOverflow != "" {
   374  		bucketFactory.logger.Tracef("Adding conditional overflow")
   375  		bucketFactory.processors = append(bucketFactory.processors, &ConditionalOverflow{})
   376  	}
   377  
   378  	if bucketFactory.BayesianThreshold != 0 {
   379  		bucketFactory.logger.Tracef("Adding bayesian processor")
   380  		bucketFactory.processors = append(bucketFactory.processors, &BayesianBucket{})
   381  	}
   382  
   383  	if len(bucketFactory.Data) > 0 {
   384  		for _, data := range bucketFactory.Data {
   385  			if data.DestPath == "" {
   386  				bucketFactory.logger.Errorf("no dest_file provided for '%s'", bucketFactory.Name)
   387  				continue
   388  			}
   389  			err = exprhelpers.FileInit(bucketFactory.DataDir, data.DestPath, data.Type)
   390  			if err != nil {
   391  				bucketFactory.logger.Errorf("unable to init data for file '%s': %s", data.DestPath, err)
   392  			}
   393  			if data.Type == "regexp" { //cache only makes sense for regexp
   394  				exprhelpers.RegexpCacheInit(data.DestPath, *data)
   395  			}
   396  		}
   397  	}
   398  
   399  	bucketFactory.output = false
   400  	if err := ValidateFactory(bucketFactory); err != nil {
   401  		return fmt.Errorf("invalid bucket from %s : %v", bucketFactory.Filename, err)
   402  	}
   403  	bucketFactory.tomb = tomb
   404  
   405  	return nil
   406  
   407  }
   408  
   409  func LoadBucketsState(file string, buckets *Buckets, bucketFactories []BucketFactory) error {
   410  	var state map[string]Leaky
   411  	body, err := os.ReadFile(file)
   412  	if err != nil {
   413  		return fmt.Errorf("can't state file %s : %s", file, err)
   414  	}
   415  	if err := json.Unmarshal(body, &state); err != nil {
   416  		return fmt.Errorf("can't unmarshal state file %s : %s", file, err)
   417  	}
   418  	for k, v := range state {
   419  		var tbucket *Leaky
   420  		log.Debugf("Reloading bucket %s", k)
   421  		val, ok := buckets.Bucket_map.Load(k)
   422  		if ok {
   423  			log.Fatalf("key %s already exists : %+v", k, val)
   424  		}
   425  		//find back our holder
   426  		found := false
   427  		for _, h := range bucketFactories {
   428  			if h.Name == v.Name {
   429  				log.Debugf("found factory %s/%s -> %s", h.Author, h.Name, h.Description)
   430  				//check in which mode the bucket was
   431  				if v.Mode == types.TIMEMACHINE {
   432  					tbucket = NewTimeMachine(h)
   433  				} else if v.Mode == types.LIVE {
   434  					tbucket = NewLeaky(h)
   435  				} else {
   436  					log.Errorf("Unknown bucket type : %d", v.Mode)
   437  				}
   438  				/*Trying to restore queue state*/
   439  				tbucket.Queue = v.Queue
   440  				/*Trying to set the limiter to the saved values*/
   441  				tbucket.Limiter.Load(v.SerializedState)
   442  				tbucket.In = make(chan *types.Event)
   443  				tbucket.Mapkey = k
   444  				tbucket.Signal = make(chan bool, 1)
   445  				tbucket.First_ts = v.First_ts
   446  				tbucket.Last_ts = v.Last_ts
   447  				tbucket.Ovflw_ts = v.Ovflw_ts
   448  				tbucket.Total_count = v.Total_count
   449  				buckets.Bucket_map.Store(k, tbucket)
   450  				h.tomb.Go(func() error {
   451  					return LeakRoutine(tbucket)
   452  				})
   453  				<-tbucket.Signal
   454  				found = true
   455  				break
   456  			}
   457  		}
   458  		if !found {
   459  			log.Fatalf("Unable to find holder for bucket %s : %s", k, spew.Sdump(v))
   460  		}
   461  	}
   462  
   463  	log.Infof("Restored %d buckets from dump", len(state))
   464  	return nil
   465  
   466  }