github.com/crowdsecurity/crowdsec@v1.6.1/pkg/leakybucket/manager_run.go (about) 1 package leakybucket 2 3 import ( 4 "encoding/json" 5 "errors" 6 "fmt" 7 "math" 8 "os" 9 "sync" 10 "time" 11 12 "github.com/mohae/deepcopy" 13 "github.com/prometheus/client_golang/prometheus" 14 log "github.com/sirupsen/logrus" 15 16 "github.com/crowdsecurity/crowdsec/pkg/exprhelpers" 17 "github.com/crowdsecurity/crowdsec/pkg/types" 18 ) 19 20 var serialized map[string]Leaky 21 var BucketPourCache map[string][]types.Event 22 var BucketPourTrack bool 23 24 /* 25 The leaky routines lifecycle are based on "real" time. 26 But when we are running in time-machine mode, the reference time is in logs and not "real" time. 27 Thus we need to garbage collect them to avoid a skyrocketing memory usage. 28 */ 29 func GarbageCollectBuckets(deadline time.Time, buckets *Buckets) error { 30 buckets.wgPour.Wait() 31 buckets.wgDumpState.Add(1) 32 defer buckets.wgDumpState.Done() 33 34 total := 0 35 discard := 0 36 toflush := []string{} 37 buckets.Bucket_map.Range(func(rkey, rvalue interface{}) bool { 38 key := rkey.(string) 39 val := rvalue.(*Leaky) 40 total += 1 41 //bucket already overflowed, we can kill it 42 if !val.Ovflw_ts.IsZero() { 43 discard += 1 44 val.logger.Debugf("overflowed at %s.", val.Ovflw_ts) 45 toflush = append(toflush, key) 46 val.tomb.Kill(nil) 47 return true 48 } 49 /*FIXME : sometimes the gettokenscountat has some rounding issues when we try to 50 match it with bucket capacity, even if the bucket has long due underflow. Round to 2 decimals*/ 51 tokat := val.Limiter.GetTokensCountAt(deadline) 52 tokcapa := float64(val.Capacity) 53 tokat = math.Round(tokat*100) / 100 54 tokcapa = math.Round(tokcapa*100) / 100 55 //bucket actually underflowed based on log time, but no in real time 56 if tokat >= tokcapa { 57 BucketsUnderflow.With(prometheus.Labels{"name": val.Name}).Inc() 58 val.logger.Debugf("UNDERFLOW : first_ts:%s tokens_at:%f capcity:%f", val.First_ts, tokat, tokcapa) 59 toflush = append(toflush, key) 60 val.tomb.Kill(nil) 61 return true 62 } 63 64 val.logger.Tracef("(%s) not dead, count:%f capacity:%f", val.First_ts, tokat, tokcapa) 65 if _, ok := serialized[key]; ok { 66 log.Errorf("entry %s already exists", key) 67 return false 68 } 69 log.Debugf("serialize %s of %s : %s", val.Name, val.Uuid, val.Mapkey) 70 71 return true 72 }) 73 log.Infof("Cleaned %d buckets", len(toflush)) 74 for _, flushkey := range toflush { 75 buckets.Bucket_map.Delete(flushkey) 76 } 77 return nil 78 } 79 80 func DumpBucketsStateAt(deadline time.Time, outputdir string, buckets *Buckets) (string, error) { 81 82 //synchronize with PourItemtoHolders 83 buckets.wgPour.Wait() 84 buckets.wgDumpState.Add(1) 85 defer buckets.wgDumpState.Done() 86 87 if outputdir == "" { 88 return "", fmt.Errorf("empty output dir for dump bucket state") 89 } 90 tmpFd, err := os.CreateTemp(os.TempDir(), "crowdsec-buckets-dump-") 91 if err != nil { 92 return "", fmt.Errorf("failed to create temp file : %s", err) 93 } 94 defer tmpFd.Close() 95 tmpFileName := tmpFd.Name() 96 serialized = make(map[string]Leaky) 97 log.Printf("Dumping buckets state at %s", deadline) 98 total := 0 99 discard := 0 100 buckets.Bucket_map.Range(func(rkey, rvalue interface{}) bool { 101 key := rkey.(string) 102 val := rvalue.(*Leaky) 103 total += 1 104 if !val.Ovflw_ts.IsZero() { 105 discard += 1 106 val.logger.Debugf("overflowed at %s.", val.Ovflw_ts) 107 return true 108 } 109 /*FIXME : sometimes the gettokenscountat has some rounding issues when we try to 110 match it with bucket capacity, even if the bucket has long due underflow. Round to 2 decimals*/ 111 tokat := val.Limiter.GetTokensCountAt(deadline) 112 tokcapa := float64(val.Capacity) 113 tokat = math.Round(tokat*100) / 100 114 tokcapa = math.Round(tokcapa*100) / 100 115 116 if tokat >= tokcapa { 117 BucketsUnderflow.With(prometheus.Labels{"name": val.Name}).Inc() 118 val.logger.Debugf("UNDERFLOW : first_ts:%s tokens_at:%f capcity:%f", val.First_ts, tokat, tokcapa) 119 discard += 1 120 return true 121 } 122 val.logger.Debugf("(%s) not dead, count:%f capacity:%f", val.First_ts, tokat, tokcapa) 123 124 if _, ok := serialized[key]; ok { 125 log.Errorf("entry %s already exists", key) 126 return false 127 } 128 log.Debugf("serialize %s of %s : %s", val.Name, val.Uuid, val.Mapkey) 129 val.SerializedState = val.Limiter.Dump() 130 serialized[key] = *val 131 return true 132 }) 133 bbuckets, err := json.MarshalIndent(serialized, "", " ") 134 if err != nil { 135 return "", fmt.Errorf("Failed to unmarshal buckets : %s", err) 136 } 137 size, err := tmpFd.Write(bbuckets) 138 if err != nil { 139 return "", fmt.Errorf("failed to write temp file : %s", err) 140 } 141 log.Infof("Serialized %d live buckets (+%d expired) in %d bytes to %s", len(serialized), discard, size, tmpFd.Name()) 142 serialized = nil 143 return tmpFileName, nil 144 } 145 146 func ShutdownAllBuckets(buckets *Buckets) error { 147 buckets.Bucket_map.Range(func(rkey, rvalue interface{}) bool { 148 key := rkey.(string) 149 val := rvalue.(*Leaky) 150 val.tomb.Kill(nil) 151 log.Infof("killed %s", key) 152 return true 153 }) 154 return nil 155 } 156 157 func PourItemToBucket(bucket *Leaky, holder BucketFactory, buckets *Buckets, parsed *types.Event) (bool, error) { 158 var sent bool 159 var buckey = bucket.Mapkey 160 var err error 161 162 sigclosed := 0 163 failed_sent := 0 164 attempts := 0 165 start := time.Now().UTC() 166 167 for !sent { 168 attempts += 1 169 /* Warn the user if we used more than a 100 ms to pour an event, it's at least an half lock*/ 170 if attempts%100000 == 0 && start.Add(100*time.Millisecond).Before(time.Now().UTC()) { 171 holder.logger.Warningf("stuck for %s sending event to %s (sigclosed:%d failed_sent:%d attempts:%d)", time.Since(start), 172 buckey, sigclosed, failed_sent, attempts) 173 } 174 175 /* check if leak routine is up */ 176 select { 177 case _, ok := <-bucket.Signal: 178 if !ok { 179 //the bucket was found and dead, get a new one and continue 180 bucket.logger.Tracef("Bucket %s found dead, cleanup the body", buckey) 181 buckets.Bucket_map.Delete(buckey) 182 sigclosed += 1 183 bucket, err = LoadOrStoreBucketFromHolder(buckey, buckets, holder, parsed.ExpectMode) 184 if err != nil { 185 return false, err 186 } 187 continue 188 } 189 //holder.logger.Tracef("Signal exists, try to pour :)") 190 default: 191 /*nothing to read, but not closed, try to pour */ 192 //holder.logger.Tracef("Signal exists but empty, try to pour :)") 193 } 194 195 /*let's see if this time-bucket should have expired */ 196 if bucket.Mode == types.TIMEMACHINE { 197 bucket.mutex.Lock() 198 firstTs := bucket.First_ts 199 lastTs := bucket.Last_ts 200 bucket.mutex.Unlock() 201 202 if !firstTs.IsZero() { 203 var d time.Time 204 err = d.UnmarshalText([]byte(parsed.MarshaledTime)) 205 if err != nil { 206 holder.logger.Warningf("Failed unmarshaling event time (%s) : %v", parsed.MarshaledTime, err) 207 } 208 if d.After(lastTs.Add(bucket.Duration)) { 209 bucket.logger.Tracef("bucket is expired (curr event: %s, bucket deadline: %s), kill", d, lastTs.Add(bucket.Duration)) 210 buckets.Bucket_map.Delete(buckey) 211 //not sure about this, should we create a new one ? 212 sigclosed += 1 213 bucket, err = LoadOrStoreBucketFromHolder(buckey, buckets, holder, parsed.ExpectMode) 214 if err != nil { 215 return false, err 216 } 217 continue 218 } 219 } 220 } 221 /*the bucket seems to be up & running*/ 222 select { 223 case bucket.In <- parsed: 224 //holder.logger.Tracef("Successfully sent !") 225 if BucketPourTrack { 226 if _, ok := BucketPourCache[bucket.Name]; !ok { 227 BucketPourCache[bucket.Name] = make([]types.Event, 0) 228 } 229 evt := deepcopy.Copy(*parsed) 230 BucketPourCache[bucket.Name] = append(BucketPourCache[bucket.Name], evt.(types.Event)) 231 } 232 sent = true 233 continue 234 default: 235 failed_sent += 1 236 //holder.logger.Tracef("Failed to send, try again") 237 continue 238 239 } 240 } 241 holder.logger.Debugf("bucket '%s' is poured", holder.Name) 242 return sent, nil 243 } 244 245 func LoadOrStoreBucketFromHolder(partitionKey string, buckets *Buckets, holder BucketFactory, expectMode int) (*Leaky, error) { 246 247 biface, ok := buckets.Bucket_map.Load(partitionKey) 248 249 /* the bucket doesn't exist, create it !*/ 250 if !ok { 251 var fresh_bucket *Leaky 252 253 switch expectMode { 254 case types.TIMEMACHINE: 255 fresh_bucket = NewTimeMachine(holder) 256 holder.logger.Debugf("Creating TimeMachine bucket") 257 case types.LIVE: 258 fresh_bucket = NewLeaky(holder) 259 holder.logger.Debugf("Creating Live bucket") 260 default: 261 return nil, fmt.Errorf("input event has no expected mode : %+v", expectMode) 262 } 263 fresh_bucket.In = make(chan *types.Event) 264 fresh_bucket.Mapkey = partitionKey 265 fresh_bucket.Signal = make(chan bool, 1) 266 actual, stored := buckets.Bucket_map.LoadOrStore(partitionKey, fresh_bucket) 267 if !stored { 268 holder.tomb.Go(func() error { 269 return LeakRoutine(fresh_bucket) 270 }) 271 biface = fresh_bucket 272 //once the created goroutine is ready to process event, we can return it 273 <-fresh_bucket.Signal 274 } else { 275 holder.logger.Debugf("Unexpectedly found exisint bucket for %s", partitionKey) 276 biface = actual 277 } 278 holder.logger.Debugf("Created new bucket %s", partitionKey) 279 } 280 return biface.(*Leaky), nil 281 } 282 283 var orderEvent map[string]*sync.WaitGroup 284 285 func PourItemToHolders(parsed types.Event, holders []BucketFactory, buckets *Buckets) (bool, error) { 286 var ( 287 ok, condition, poured bool 288 ) 289 290 if BucketPourTrack { 291 if BucketPourCache == nil { 292 BucketPourCache = make(map[string][]types.Event) 293 } 294 if _, ok = BucketPourCache["OK"]; !ok { 295 BucketPourCache["OK"] = make([]types.Event, 0) 296 } 297 evt := deepcopy.Copy(parsed) 298 BucketPourCache["OK"] = append(BucketPourCache["OK"], evt.(types.Event)) 299 } 300 //find the relevant holders (scenarios) 301 for idx := 0; idx < len(holders); idx++ { 302 //for idx, holder := range holders { 303 304 //evaluate bucket's condition 305 if holders[idx].RunTimeFilter != nil { 306 holders[idx].logger.Tracef("event against holder %d/%d", idx, len(holders)) 307 output, err := exprhelpers.Run(holders[idx].RunTimeFilter, 308 map[string]interface{}{"evt": &parsed}, 309 holders[idx].logger, 310 holders[idx].Debug) 311 if err != nil { 312 holders[idx].logger.Errorf("failed parsing : %v", err) 313 return false, fmt.Errorf("leaky failed : %s", err) 314 } 315 // we assume we a bool should add type check here 316 if condition, ok = output.(bool); !ok { 317 holders[idx].logger.Errorf("unexpected non-bool return : %T", output) 318 holders[idx].logger.Fatalf("Filter issue") 319 } 320 if !condition { 321 holders[idx].logger.Debugf("Event leaving node : ko (filter mismatch)") 322 continue 323 } 324 } 325 326 //groupby determines the partition key for the specific bucket 327 var groupby string 328 if holders[idx].RunTimeGroupBy != nil { 329 tmpGroupBy, err := exprhelpers.Run(holders[idx].RunTimeGroupBy, map[string]interface{}{"evt": &parsed}, holders[idx].logger, holders[idx].Debug) 330 if err != nil { 331 holders[idx].logger.Errorf("failed groupby : %v", err) 332 return false, errors.New("leaky failed :/") 333 } 334 335 if groupby, ok = tmpGroupBy.(string); !ok { 336 holders[idx].logger.Fatalf("failed groupby type : %v", err) 337 return false, errors.New("groupby wrong type") 338 } 339 } 340 buckey := GetKey(holders[idx], groupby) 341 342 //we need to either find the existing bucket, or create a new one (if it's the first event to hit it for this partition key) 343 bucket, err := LoadOrStoreBucketFromHolder(buckey, buckets, holders[idx], parsed.ExpectMode) 344 if err != nil { 345 return false, fmt.Errorf("failed to load or store bucket: %w", err) 346 } 347 //finally, pour the even into the bucket 348 349 if bucket.orderEvent { 350 if orderEvent == nil { 351 orderEvent = make(map[string]*sync.WaitGroup) 352 } 353 if orderEvent[buckey] != nil { 354 orderEvent[buckey].Wait() 355 } else { 356 orderEvent[buckey] = &sync.WaitGroup{} 357 } 358 359 orderEvent[buckey].Add(1) 360 } 361 362 ok, err := PourItemToBucket(bucket, holders[idx], buckets, &parsed) 363 364 if bucket.orderEvent { 365 orderEvent[buckey].Wait() 366 } 367 368 if err != nil { 369 return false, fmt.Errorf("failed to pour bucket: %w", err) 370 } 371 if ok { 372 poured = true 373 } 374 } 375 return poured, nil 376 }