github.com/crowdsecurity/crowdsec@v1.6.1/pkg/leakybucket/manager_load.go (about) 1 package leakybucket 2 3 import ( 4 "encoding/json" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "path/filepath" 10 "strings" 11 "sync" 12 "time" 13 14 "github.com/antonmedv/expr" 15 "github.com/antonmedv/expr/vm" 16 "github.com/davecgh/go-spew/spew" 17 "github.com/goombaio/namegenerator" 18 log "github.com/sirupsen/logrus" 19 "gopkg.in/tomb.v2" 20 yaml "gopkg.in/yaml.v2" 21 22 "github.com/crowdsecurity/crowdsec/pkg/alertcontext" 23 "github.com/crowdsecurity/crowdsec/pkg/csconfig" 24 "github.com/crowdsecurity/crowdsec/pkg/cwhub" 25 "github.com/crowdsecurity/crowdsec/pkg/cwversion" 26 "github.com/crowdsecurity/crowdsec/pkg/exprhelpers" 27 "github.com/crowdsecurity/crowdsec/pkg/types" 28 ) 29 30 // BucketFactory struct holds all fields for any bucket configuration. This is to have a 31 // generic struct for buckets. This can be seen as a bucket factory. 32 type BucketFactory struct { 33 FormatVersion string `yaml:"format"` 34 Author string `yaml:"author"` 35 Description string `yaml:"description"` 36 References []string `yaml:"references"` 37 Type string `yaml:"type"` //Type can be : leaky, counter, trigger. It determines the main bucket characteristics 38 Name string `yaml:"name"` //Name of the bucket, used later in log and user-messages. Should be unique 39 Capacity int `yaml:"capacity"` //Capacity is applicable to leaky buckets and determines the "burst" capacity 40 LeakSpeed string `yaml:"leakspeed"` //Leakspeed is a float representing how many events per second leak out of the bucket 41 Duration string `yaml:"duration"` //Duration allows 'counter' buckets to have a fixed life-time 42 Filter string `yaml:"filter"` //Filter is an expr that determines if an event is elligible for said bucket. Filter is evaluated against the Event struct 43 GroupBy string `yaml:"groupby,omitempty"` //groupy is an expr that allows to determine the partitions of the bucket. A common example is the source_ip 44 Distinct string `yaml:"distinct"` //Distinct, when present, adds a `Pour()` processor that will only pour uniq items (based on distinct expr result) 45 Debug bool `yaml:"debug"` //Debug, when set to true, will enable debugging for _this_ scenario specifically 46 Labels map[string]interface{} `yaml:"labels"` //Labels is K:V list aiming at providing context the overflow 47 Blackhole string `yaml:"blackhole,omitempty"` //Blackhole is a duration that, if present, will prevent same bucket partition to overflow more often than $duration 48 logger *log.Entry `yaml:"-"` //logger is bucket-specific logger (used by Debug as well) 49 Reprocess bool `yaml:"reprocess"` //Reprocess, if true, will for the bucket to be re-injected into processing chain 50 CacheSize int `yaml:"cache_size"` //CacheSize, if > 0, limits the size of in-memory cache of the bucket 51 Profiling bool `yaml:"profiling"` //Profiling, if true, will make the bucket record pours/overflows/etc. 52 OverflowFilter string `yaml:"overflow_filter"` //OverflowFilter if present, is a filter that must return true for the overflow to go through 53 ConditionalOverflow string `yaml:"condition"` //condition if present, is an expression that must return true for the bucket to overflow 54 BayesianPrior float32 `yaml:"bayesian_prior"` 55 BayesianThreshold float32 `yaml:"bayesian_threshold"` 56 BayesianConditions []RawBayesianCondition `yaml:"bayesian_conditions"` //conditions for the bayesian bucket 57 ScopeType types.ScopeType `yaml:"scope,omitempty"` //to enforce a different remediation than blocking an IP. Will default this to IP 58 BucketName string `yaml:"-"` 59 Filename string `yaml:"-"` 60 RunTimeFilter *vm.Program `json:"-"` 61 RunTimeGroupBy *vm.Program `json:"-"` 62 Data []*types.DataSource `yaml:"data,omitempty"` 63 DataDir string `yaml:"-"` 64 CancelOnFilter string `yaml:"cancel_on,omitempty"` //a filter that, if matched, kills the bucket 65 leakspeed time.Duration //internal representation of `Leakspeed` 66 duration time.Duration //internal representation of `Duration` 67 ret chan types.Event //the bucket-specific output chan for overflows 68 processors []Processor //processors is the list of hooks for pour/overflow/create (cf. uniq, blackhole etc.) 69 output bool //?? 70 ScenarioVersion string `yaml:"version,omitempty"` 71 hash string `yaml:"-"` 72 Simulated bool `yaml:"simulated"` //Set to true if the scenario instantiating the bucket was in the exclusion list 73 tomb *tomb.Tomb `yaml:"-"` 74 wgPour *sync.WaitGroup `yaml:"-"` 75 wgDumpState *sync.WaitGroup `yaml:"-"` 76 orderEvent bool 77 } 78 79 // we use one NameGenerator for all the future buckets 80 var seed namegenerator.Generator = namegenerator.NewNameGenerator(time.Now().UTC().UnixNano()) 81 82 func ValidateFactory(bucketFactory *BucketFactory) error { 83 if bucketFactory.Name == "" { 84 return fmt.Errorf("bucket must have name") 85 } 86 if bucketFactory.Description == "" { 87 return fmt.Errorf("description is mandatory") 88 } 89 if bucketFactory.Type == "leaky" { 90 if bucketFactory.Capacity <= 0 { //capacity must be a positive int 91 return fmt.Errorf("bad capacity for leaky '%d'", bucketFactory.Capacity) 92 } 93 if bucketFactory.LeakSpeed == "" { 94 return fmt.Errorf("leakspeed can't be empty for leaky") 95 } 96 if bucketFactory.leakspeed == 0 { 97 return fmt.Errorf("bad leakspeed for leaky '%s'", bucketFactory.LeakSpeed) 98 } 99 } else if bucketFactory.Type == "counter" { 100 if bucketFactory.Duration == "" { 101 return fmt.Errorf("duration can't be empty for counter") 102 } 103 if bucketFactory.duration == 0 { 104 return fmt.Errorf("bad duration for counter bucket '%d'", bucketFactory.duration) 105 } 106 if bucketFactory.Capacity != -1 { 107 return fmt.Errorf("counter bucket must have -1 capacity") 108 } 109 } else if bucketFactory.Type == "trigger" { 110 if bucketFactory.Capacity != 0 { 111 return fmt.Errorf("trigger bucket must have 0 capacity") 112 } 113 } else if bucketFactory.Type == "conditional" { 114 if bucketFactory.ConditionalOverflow == "" { 115 return fmt.Errorf("conditional bucket must have a condition") 116 } 117 if bucketFactory.Capacity != -1 { 118 bucketFactory.logger.Warnf("Using a value different than -1 as capacity for conditional bucket, this may lead to unexpected overflows") 119 } 120 if bucketFactory.LeakSpeed == "" { 121 return fmt.Errorf("leakspeed can't be empty for conditional bucket") 122 } 123 if bucketFactory.leakspeed == 0 { 124 return fmt.Errorf("bad leakspeed for conditional bucket '%s'", bucketFactory.LeakSpeed) 125 } 126 } else if bucketFactory.Type == "bayesian" { 127 if bucketFactory.BayesianConditions == nil { 128 return fmt.Errorf("bayesian bucket must have bayesian conditions") 129 } 130 if bucketFactory.BayesianPrior == 0 { 131 return fmt.Errorf("bayesian bucket must have a valid, non-zero prior") 132 } 133 if bucketFactory.BayesianThreshold == 0 { 134 return fmt.Errorf("bayesian bucket must have a valid, non-zero threshold") 135 } 136 if bucketFactory.BayesianPrior > 1 { 137 return fmt.Errorf("bayesian bucket must have a valid, non-zero prior") 138 } 139 if bucketFactory.BayesianThreshold > 1 { 140 return fmt.Errorf("bayesian bucket must have a valid, non-zero threshold") 141 } 142 if bucketFactory.Capacity != -1 { 143 return fmt.Errorf("bayesian bucket must have capacity -1") 144 } 145 } else { 146 return fmt.Errorf("unknown bucket type '%s'", bucketFactory.Type) 147 } 148 149 switch bucketFactory.ScopeType.Scope { 150 case types.Undefined: 151 bucketFactory.ScopeType.Scope = types.Ip 152 case types.Ip: 153 case types.Range: 154 var ( 155 runTimeFilter *vm.Program 156 err error 157 ) 158 if bucketFactory.ScopeType.Filter != "" { 159 if runTimeFilter, err = expr.Compile(bucketFactory.ScopeType.Filter, exprhelpers.GetExprOptions(map[string]interface{}{"evt": &types.Event{}})...); err != nil { 160 return fmt.Errorf("Error compiling the scope filter: %s", err) 161 } 162 bucketFactory.ScopeType.RunTimeFilter = runTimeFilter 163 } 164 165 default: 166 //Compile the scope filter 167 var ( 168 runTimeFilter *vm.Program 169 err error 170 ) 171 if bucketFactory.ScopeType.Filter != "" { 172 if runTimeFilter, err = expr.Compile(bucketFactory.ScopeType.Filter, exprhelpers.GetExprOptions(map[string]interface{}{"evt": &types.Event{}})...); err != nil { 173 return fmt.Errorf("Error compiling the scope filter: %s", err) 174 } 175 bucketFactory.ScopeType.RunTimeFilter = runTimeFilter 176 } 177 } 178 return nil 179 } 180 181 func LoadBuckets(cscfg *csconfig.CrowdsecServiceCfg, hub *cwhub.Hub, files []string, tomb *tomb.Tomb, buckets *Buckets, orderEvent bool) ([]BucketFactory, chan types.Event, error) { 182 var ( 183 ret = []BucketFactory{} 184 response chan types.Event 185 ) 186 187 response = make(chan types.Event, 1) 188 for _, f := range files { 189 log.Debugf("Loading '%s'", f) 190 if !strings.HasSuffix(f, ".yaml") && !strings.HasSuffix(f, ".yml") { 191 log.Debugf("Skipping %s : not a yaml file", f) 192 continue 193 } 194 195 //process the yaml 196 bucketConfigurationFile, err := os.Open(f) 197 if err != nil { 198 log.Errorf("Can't access leaky configuration file %s", f) 199 return nil, nil, err 200 } 201 defer bucketConfigurationFile.Close() 202 dec := yaml.NewDecoder(bucketConfigurationFile) 203 dec.SetStrict(true) 204 for { 205 bucketFactory := BucketFactory{} 206 err = dec.Decode(&bucketFactory) 207 if err != nil { 208 if !errors.Is(err, io.EOF) { 209 log.Errorf("Bad yaml in %s : %v", f, err) 210 return nil, nil, fmt.Errorf("bad yaml in %s : %v", f, err) 211 } 212 log.Tracef("End of yaml file") 213 break 214 } 215 bucketFactory.DataDir = hub.GetDataDir() 216 //check empty 217 if bucketFactory.Name == "" { 218 log.Errorf("Won't load nameless bucket") 219 return nil, nil, fmt.Errorf("nameless bucket") 220 } 221 //check compat 222 if bucketFactory.FormatVersion == "" { 223 log.Tracef("no version in %s : %s, assuming '1.0'", bucketFactory.Name, f) 224 bucketFactory.FormatVersion = "1.0" 225 } 226 ok, err := cwversion.Satisfies(bucketFactory.FormatVersion, cwversion.Constraint_scenario) 227 if err != nil { 228 return nil, nil, fmt.Errorf("failed to check version : %s", err) 229 } 230 if !ok { 231 log.Errorf("can't load %s : %s doesn't satisfy scenario format %s, skip", bucketFactory.Name, bucketFactory.FormatVersion, cwversion.Constraint_scenario) 232 continue 233 } 234 235 bucketFactory.Filename = filepath.Clean(f) 236 bucketFactory.BucketName = seed.Generate() 237 bucketFactory.ret = response 238 hubItem, err := hub.GetItemByPath(cwhub.SCENARIOS, bucketFactory.Filename) 239 if err != nil { 240 log.Errorf("scenario %s (%s) couldn't be find in hub (ignore if in unit tests)", bucketFactory.Name, bucketFactory.Filename) 241 } else { 242 if cscfg.SimulationConfig != nil { 243 bucketFactory.Simulated = cscfg.SimulationConfig.IsSimulated(hubItem.Name) 244 } 245 if hubItem != nil { 246 bucketFactory.ScenarioVersion = hubItem.State.LocalVersion 247 bucketFactory.hash = hubItem.State.LocalHash 248 } else { 249 log.Errorf("scenario %s (%s) couldn't be find in hub (ignore if in unit tests)", bucketFactory.Name, bucketFactory.Filename) 250 } 251 } 252 253 bucketFactory.wgDumpState = buckets.wgDumpState 254 bucketFactory.wgPour = buckets.wgPour 255 err = LoadBucket(&bucketFactory, tomb) 256 if err != nil { 257 log.Errorf("Failed to load bucket %s : %v", bucketFactory.Name, err) 258 return nil, nil, fmt.Errorf("loading of %s failed : %v", bucketFactory.Name, err) 259 } 260 261 bucketFactory.orderEvent = orderEvent 262 263 ret = append(ret, bucketFactory) 264 } 265 } 266 267 if err := alertcontext.NewAlertContext(cscfg.ContextToSend, cscfg.ConsoleContextValueLength); err != nil { 268 return nil, nil, fmt.Errorf("unable to load alert context: %s", err) 269 } 270 271 log.Infof("Loaded %d scenarios", len(ret)) 272 return ret, response, nil 273 } 274 275 /* Init recursively process yaml files from a directory and loads them as BucketFactory */ 276 func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error { 277 var err error 278 if bucketFactory.Debug { 279 var clog = log.New() 280 if err := types.ConfigureLogger(clog); err != nil { 281 log.Fatalf("While creating bucket-specific logger : %s", err) 282 } 283 clog.SetLevel(log.DebugLevel) 284 bucketFactory.logger = clog.WithFields(log.Fields{ 285 "cfg": bucketFactory.BucketName, 286 "name": bucketFactory.Name, 287 }) 288 } else { 289 /* else bind it to the default one (might find something more elegant here)*/ 290 bucketFactory.logger = log.WithFields(log.Fields{ 291 "cfg": bucketFactory.BucketName, 292 "name": bucketFactory.Name, 293 }) 294 } 295 296 if bucketFactory.LeakSpeed != "" { 297 if bucketFactory.leakspeed, err = time.ParseDuration(bucketFactory.LeakSpeed); err != nil { 298 return fmt.Errorf("bad leakspeed '%s' in %s : %v", bucketFactory.LeakSpeed, bucketFactory.Filename, err) 299 } 300 } else { 301 bucketFactory.leakspeed = time.Duration(0) 302 } 303 if bucketFactory.Duration != "" { 304 if bucketFactory.duration, err = time.ParseDuration(bucketFactory.Duration); err != nil { 305 return fmt.Errorf("invalid Duration '%s' in %s : %v", bucketFactory.Duration, bucketFactory.Filename, err) 306 } 307 } 308 309 if bucketFactory.Filter == "" { 310 bucketFactory.logger.Warning("Bucket without filter, abort.") 311 return fmt.Errorf("bucket without filter directive") 312 } 313 bucketFactory.RunTimeFilter, err = expr.Compile(bucketFactory.Filter, exprhelpers.GetExprOptions(map[string]interface{}{"evt": &types.Event{}})...) 314 if err != nil { 315 return fmt.Errorf("invalid filter '%s' in %s : %v", bucketFactory.Filter, bucketFactory.Filename, err) 316 } 317 318 if bucketFactory.GroupBy != "" { 319 bucketFactory.RunTimeGroupBy, err = expr.Compile(bucketFactory.GroupBy, exprhelpers.GetExprOptions(map[string]interface{}{"evt": &types.Event{}})...) 320 if err != nil { 321 return fmt.Errorf("invalid groupby '%s' in %s : %v", bucketFactory.GroupBy, bucketFactory.Filename, err) 322 } 323 } 324 325 bucketFactory.logger.Infof("Adding %s bucket", bucketFactory.Type) 326 //return the Holder corresponding to the type of bucket 327 bucketFactory.processors = []Processor{} 328 switch bucketFactory.Type { 329 case "leaky": 330 bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{}) 331 case "trigger": 332 bucketFactory.processors = append(bucketFactory.processors, &Trigger{}) 333 case "counter": 334 bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{}) 335 case "conditional": 336 bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{}) 337 case "bayesian": 338 bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{}) 339 default: 340 return fmt.Errorf("invalid type '%s' in %s : %v", bucketFactory.Type, bucketFactory.Filename, err) 341 } 342 343 if bucketFactory.Distinct != "" { 344 bucketFactory.logger.Tracef("Adding a non duplicate filter") 345 bucketFactory.processors = append(bucketFactory.processors, &Uniq{}) 346 } 347 348 if bucketFactory.CancelOnFilter != "" { 349 bucketFactory.logger.Tracef("Adding a cancel_on filter") 350 bucketFactory.processors = append(bucketFactory.processors, &CancelOnFilter{}) 351 } 352 353 if bucketFactory.OverflowFilter != "" { 354 bucketFactory.logger.Tracef("Adding an overflow filter") 355 filovflw, err := NewOverflowFilter(bucketFactory) 356 if err != nil { 357 bucketFactory.logger.Errorf("Error creating overflow_filter : %s", err) 358 return fmt.Errorf("error creating overflow_filter : %s", err) 359 } 360 bucketFactory.processors = append(bucketFactory.processors, filovflw) 361 } 362 363 if bucketFactory.Blackhole != "" { 364 bucketFactory.logger.Tracef("Adding blackhole.") 365 blackhole, err := NewBlackhole(bucketFactory) 366 if err != nil { 367 bucketFactory.logger.Errorf("Error creating blackhole : %s", err) 368 return fmt.Errorf("error creating blackhole : %s", err) 369 } 370 bucketFactory.processors = append(bucketFactory.processors, blackhole) 371 } 372 373 if bucketFactory.ConditionalOverflow != "" { 374 bucketFactory.logger.Tracef("Adding conditional overflow") 375 bucketFactory.processors = append(bucketFactory.processors, &ConditionalOverflow{}) 376 } 377 378 if bucketFactory.BayesianThreshold != 0 { 379 bucketFactory.logger.Tracef("Adding bayesian processor") 380 bucketFactory.processors = append(bucketFactory.processors, &BayesianBucket{}) 381 } 382 383 if len(bucketFactory.Data) > 0 { 384 for _, data := range bucketFactory.Data { 385 if data.DestPath == "" { 386 bucketFactory.logger.Errorf("no dest_file provided for '%s'", bucketFactory.Name) 387 continue 388 } 389 err = exprhelpers.FileInit(bucketFactory.DataDir, data.DestPath, data.Type) 390 if err != nil { 391 bucketFactory.logger.Errorf("unable to init data for file '%s': %s", data.DestPath, err) 392 } 393 if data.Type == "regexp" { //cache only makes sense for regexp 394 exprhelpers.RegexpCacheInit(data.DestPath, *data) 395 } 396 } 397 } 398 399 bucketFactory.output = false 400 if err := ValidateFactory(bucketFactory); err != nil { 401 return fmt.Errorf("invalid bucket from %s : %v", bucketFactory.Filename, err) 402 } 403 bucketFactory.tomb = tomb 404 405 return nil 406 407 } 408 409 func LoadBucketsState(file string, buckets *Buckets, bucketFactories []BucketFactory) error { 410 var state map[string]Leaky 411 body, err := os.ReadFile(file) 412 if err != nil { 413 return fmt.Errorf("can't state file %s : %s", file, err) 414 } 415 if err := json.Unmarshal(body, &state); err != nil { 416 return fmt.Errorf("can't unmarshal state file %s : %s", file, err) 417 } 418 for k, v := range state { 419 var tbucket *Leaky 420 log.Debugf("Reloading bucket %s", k) 421 val, ok := buckets.Bucket_map.Load(k) 422 if ok { 423 log.Fatalf("key %s already exists : %+v", k, val) 424 } 425 //find back our holder 426 found := false 427 for _, h := range bucketFactories { 428 if h.Name == v.Name { 429 log.Debugf("found factory %s/%s -> %s", h.Author, h.Name, h.Description) 430 //check in which mode the bucket was 431 if v.Mode == types.TIMEMACHINE { 432 tbucket = NewTimeMachine(h) 433 } else if v.Mode == types.LIVE { 434 tbucket = NewLeaky(h) 435 } else { 436 log.Errorf("Unknown bucket type : %d", v.Mode) 437 } 438 /*Trying to restore queue state*/ 439 tbucket.Queue = v.Queue 440 /*Trying to set the limiter to the saved values*/ 441 tbucket.Limiter.Load(v.SerializedState) 442 tbucket.In = make(chan *types.Event) 443 tbucket.Mapkey = k 444 tbucket.Signal = make(chan bool, 1) 445 tbucket.First_ts = v.First_ts 446 tbucket.Last_ts = v.Last_ts 447 tbucket.Ovflw_ts = v.Ovflw_ts 448 tbucket.Total_count = v.Total_count 449 buckets.Bucket_map.Store(k, tbucket) 450 h.tomb.Go(func() error { 451 return LeakRoutine(tbucket) 452 }) 453 <-tbucket.Signal 454 found = true 455 break 456 } 457 } 458 if !found { 459 log.Fatalf("Unable to find holder for bucket %s : %s", k, spew.Sdump(v)) 460 } 461 } 462 463 log.Infof("Restored %d buckets from dump", len(state)) 464 return nil 465 466 }