github.com/crowdsecurity/crowdsec@v1.6.1/pkg/leakybucket/manager_run.go (about)

     1  package leakybucket
     2  
     3  import (
     4  	"encoding/json"
     5  	"errors"
     6  	"fmt"
     7  	"math"
     8  	"os"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/mohae/deepcopy"
    13  	"github.com/prometheus/client_golang/prometheus"
    14  	log "github.com/sirupsen/logrus"
    15  
    16  	"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
    17  	"github.com/crowdsecurity/crowdsec/pkg/types"
    18  )
    19  
    20  var serialized map[string]Leaky
    21  var BucketPourCache map[string][]types.Event
    22  var BucketPourTrack bool
    23  
    24  /*
    25  The leaky routines lifecycle are based on "real" time.
    26  But when we are running in time-machine mode, the reference time is in logs and not "real" time.
    27  Thus we need to garbage collect them to avoid a skyrocketing memory usage.
    28  */
    29  func GarbageCollectBuckets(deadline time.Time, buckets *Buckets) error {
    30  	buckets.wgPour.Wait()
    31  	buckets.wgDumpState.Add(1)
    32  	defer buckets.wgDumpState.Done()
    33  
    34  	total := 0
    35  	discard := 0
    36  	toflush := []string{}
    37  	buckets.Bucket_map.Range(func(rkey, rvalue interface{}) bool {
    38  		key := rkey.(string)
    39  		val := rvalue.(*Leaky)
    40  		total += 1
    41  		//bucket already overflowed, we can kill it
    42  		if !val.Ovflw_ts.IsZero() {
    43  			discard += 1
    44  			val.logger.Debugf("overflowed at %s.", val.Ovflw_ts)
    45  			toflush = append(toflush, key)
    46  			val.tomb.Kill(nil)
    47  			return true
    48  		}
    49  		/*FIXME : sometimes the gettokenscountat has some rounding issues when we try to
    50  		match it with bucket capacity, even if the bucket has long due underflow. Round to 2 decimals*/
    51  		tokat := val.Limiter.GetTokensCountAt(deadline)
    52  		tokcapa := float64(val.Capacity)
    53  		tokat = math.Round(tokat*100) / 100
    54  		tokcapa = math.Round(tokcapa*100) / 100
    55  		//bucket actually underflowed based on log time, but no in real time
    56  		if tokat >= tokcapa {
    57  			BucketsUnderflow.With(prometheus.Labels{"name": val.Name}).Inc()
    58  			val.logger.Debugf("UNDERFLOW : first_ts:%s tokens_at:%f capcity:%f", val.First_ts, tokat, tokcapa)
    59  			toflush = append(toflush, key)
    60  			val.tomb.Kill(nil)
    61  			return true
    62  		}
    63  
    64  		val.logger.Tracef("(%s) not dead, count:%f capacity:%f", val.First_ts, tokat, tokcapa)
    65  		if _, ok := serialized[key]; ok {
    66  			log.Errorf("entry %s already exists", key)
    67  			return false
    68  		}
    69  		log.Debugf("serialize %s of %s : %s", val.Name, val.Uuid, val.Mapkey)
    70  
    71  		return true
    72  	})
    73  	log.Infof("Cleaned %d buckets", len(toflush))
    74  	for _, flushkey := range toflush {
    75  		buckets.Bucket_map.Delete(flushkey)
    76  	}
    77  	return nil
    78  }
    79  
    80  func DumpBucketsStateAt(deadline time.Time, outputdir string, buckets *Buckets) (string, error) {
    81  
    82  	//synchronize with PourItemtoHolders
    83  	buckets.wgPour.Wait()
    84  	buckets.wgDumpState.Add(1)
    85  	defer buckets.wgDumpState.Done()
    86  
    87  	if outputdir == "" {
    88  		return "", fmt.Errorf("empty output dir for dump bucket state")
    89  	}
    90  	tmpFd, err := os.CreateTemp(os.TempDir(), "crowdsec-buckets-dump-")
    91  	if err != nil {
    92  		return "", fmt.Errorf("failed to create temp file : %s", err)
    93  	}
    94  	defer tmpFd.Close()
    95  	tmpFileName := tmpFd.Name()
    96  	serialized = make(map[string]Leaky)
    97  	log.Printf("Dumping buckets state at %s", deadline)
    98  	total := 0
    99  	discard := 0
   100  	buckets.Bucket_map.Range(func(rkey, rvalue interface{}) bool {
   101  		key := rkey.(string)
   102  		val := rvalue.(*Leaky)
   103  		total += 1
   104  		if !val.Ovflw_ts.IsZero() {
   105  			discard += 1
   106  			val.logger.Debugf("overflowed at %s.", val.Ovflw_ts)
   107  			return true
   108  		}
   109  		/*FIXME : sometimes the gettokenscountat has some rounding issues when we try to
   110  		match it with bucket capacity, even if the bucket has long due underflow. Round to 2 decimals*/
   111  		tokat := val.Limiter.GetTokensCountAt(deadline)
   112  		tokcapa := float64(val.Capacity)
   113  		tokat = math.Round(tokat*100) / 100
   114  		tokcapa = math.Round(tokcapa*100) / 100
   115  
   116  		if tokat >= tokcapa {
   117  			BucketsUnderflow.With(prometheus.Labels{"name": val.Name}).Inc()
   118  			val.logger.Debugf("UNDERFLOW : first_ts:%s tokens_at:%f capcity:%f", val.First_ts, tokat, tokcapa)
   119  			discard += 1
   120  			return true
   121  		}
   122  		val.logger.Debugf("(%s) not dead, count:%f capacity:%f", val.First_ts, tokat, tokcapa)
   123  
   124  		if _, ok := serialized[key]; ok {
   125  			log.Errorf("entry %s already exists", key)
   126  			return false
   127  		}
   128  		log.Debugf("serialize %s of %s : %s", val.Name, val.Uuid, val.Mapkey)
   129  		val.SerializedState = val.Limiter.Dump()
   130  		serialized[key] = *val
   131  		return true
   132  	})
   133  	bbuckets, err := json.MarshalIndent(serialized, "", " ")
   134  	if err != nil {
   135  		return "", fmt.Errorf("Failed to unmarshal buckets : %s", err)
   136  	}
   137  	size, err := tmpFd.Write(bbuckets)
   138  	if err != nil {
   139  		return "", fmt.Errorf("failed to write temp file : %s", err)
   140  	}
   141  	log.Infof("Serialized %d live buckets (+%d expired) in %d bytes to %s", len(serialized), discard, size, tmpFd.Name())
   142  	serialized = nil
   143  	return tmpFileName, nil
   144  }
   145  
   146  func ShutdownAllBuckets(buckets *Buckets) error {
   147  	buckets.Bucket_map.Range(func(rkey, rvalue interface{}) bool {
   148  		key := rkey.(string)
   149  		val := rvalue.(*Leaky)
   150  		val.tomb.Kill(nil)
   151  		log.Infof("killed %s", key)
   152  		return true
   153  	})
   154  	return nil
   155  }
   156  
   157  func PourItemToBucket(bucket *Leaky, holder BucketFactory, buckets *Buckets, parsed *types.Event) (bool, error) {
   158  	var sent bool
   159  	var buckey = bucket.Mapkey
   160  	var err error
   161  
   162  	sigclosed := 0
   163  	failed_sent := 0
   164  	attempts := 0
   165  	start := time.Now().UTC()
   166  
   167  	for !sent {
   168  		attempts += 1
   169  		/* Warn the user if we used more than a 100 ms to pour an event, it's at least an half lock*/
   170  		if attempts%100000 == 0 && start.Add(100*time.Millisecond).Before(time.Now().UTC()) {
   171  			holder.logger.Warningf("stuck for %s sending event to %s (sigclosed:%d failed_sent:%d attempts:%d)", time.Since(start),
   172  				buckey, sigclosed, failed_sent, attempts)
   173  		}
   174  
   175  		/* check if leak routine is up */
   176  		select {
   177  		case _, ok := <-bucket.Signal:
   178  			if !ok {
   179  				//the bucket was found and dead, get a new one and continue
   180  				bucket.logger.Tracef("Bucket %s found dead, cleanup the body", buckey)
   181  				buckets.Bucket_map.Delete(buckey)
   182  				sigclosed += 1
   183  				bucket, err = LoadOrStoreBucketFromHolder(buckey, buckets, holder, parsed.ExpectMode)
   184  				if err != nil {
   185  					return false, err
   186  				}
   187  				continue
   188  			}
   189  			//holder.logger.Tracef("Signal exists, try to pour :)")
   190  		default:
   191  			/*nothing to read, but not closed, try to pour */
   192  			//holder.logger.Tracef("Signal exists but empty, try to pour :)")
   193  		}
   194  
   195  		/*let's see if this time-bucket should have expired */
   196  		if bucket.Mode == types.TIMEMACHINE {
   197  			bucket.mutex.Lock()
   198  			firstTs := bucket.First_ts
   199  			lastTs := bucket.Last_ts
   200  			bucket.mutex.Unlock()
   201  
   202  			if !firstTs.IsZero() {
   203  				var d time.Time
   204  				err = d.UnmarshalText([]byte(parsed.MarshaledTime))
   205  				if err != nil {
   206  					holder.logger.Warningf("Failed unmarshaling event time (%s) : %v", parsed.MarshaledTime, err)
   207  				}
   208  				if d.After(lastTs.Add(bucket.Duration)) {
   209  					bucket.logger.Tracef("bucket is expired (curr event: %s, bucket deadline: %s), kill", d, lastTs.Add(bucket.Duration))
   210  					buckets.Bucket_map.Delete(buckey)
   211  					//not sure about this, should we create a new one ?
   212  					sigclosed += 1
   213  					bucket, err = LoadOrStoreBucketFromHolder(buckey, buckets, holder, parsed.ExpectMode)
   214  					if err != nil {
   215  						return false, err
   216  					}
   217  					continue
   218  				}
   219  			}
   220  		}
   221  		/*the bucket seems to be up & running*/
   222  		select {
   223  		case bucket.In <- parsed:
   224  			//holder.logger.Tracef("Successfully sent !")
   225  			if BucketPourTrack {
   226  				if _, ok := BucketPourCache[bucket.Name]; !ok {
   227  					BucketPourCache[bucket.Name] = make([]types.Event, 0)
   228  				}
   229  				evt := deepcopy.Copy(*parsed)
   230  				BucketPourCache[bucket.Name] = append(BucketPourCache[bucket.Name], evt.(types.Event))
   231  			}
   232  			sent = true
   233  			continue
   234  		default:
   235  			failed_sent += 1
   236  			//holder.logger.Tracef("Failed to send, try again")
   237  			continue
   238  
   239  		}
   240  	}
   241  	holder.logger.Debugf("bucket '%s' is poured", holder.Name)
   242  	return sent, nil
   243  }
   244  
   245  func LoadOrStoreBucketFromHolder(partitionKey string, buckets *Buckets, holder BucketFactory, expectMode int) (*Leaky, error) {
   246  
   247  	biface, ok := buckets.Bucket_map.Load(partitionKey)
   248  
   249  	/* the bucket doesn't exist, create it !*/
   250  	if !ok {
   251  		var fresh_bucket *Leaky
   252  
   253  		switch expectMode {
   254  		case types.TIMEMACHINE:
   255  			fresh_bucket = NewTimeMachine(holder)
   256  			holder.logger.Debugf("Creating TimeMachine bucket")
   257  		case types.LIVE:
   258  			fresh_bucket = NewLeaky(holder)
   259  			holder.logger.Debugf("Creating Live bucket")
   260  		default:
   261  			return nil, fmt.Errorf("input event has no expected mode : %+v", expectMode)
   262  		}
   263  		fresh_bucket.In = make(chan *types.Event)
   264  		fresh_bucket.Mapkey = partitionKey
   265  		fresh_bucket.Signal = make(chan bool, 1)
   266  		actual, stored := buckets.Bucket_map.LoadOrStore(partitionKey, fresh_bucket)
   267  		if !stored {
   268  			holder.tomb.Go(func() error {
   269  				return LeakRoutine(fresh_bucket)
   270  			})
   271  			biface = fresh_bucket
   272  			//once the created goroutine is ready to process event, we can return it
   273  			<-fresh_bucket.Signal
   274  		} else {
   275  			holder.logger.Debugf("Unexpectedly found exisint bucket for %s", partitionKey)
   276  			biface = actual
   277  		}
   278  		holder.logger.Debugf("Created new bucket %s", partitionKey)
   279  	}
   280  	return biface.(*Leaky), nil
   281  }
   282  
   283  var orderEvent map[string]*sync.WaitGroup
   284  
   285  func PourItemToHolders(parsed types.Event, holders []BucketFactory, buckets *Buckets) (bool, error) {
   286  	var (
   287  		ok, condition, poured bool
   288  	)
   289  
   290  	if BucketPourTrack {
   291  		if BucketPourCache == nil {
   292  			BucketPourCache = make(map[string][]types.Event)
   293  		}
   294  		if _, ok = BucketPourCache["OK"]; !ok {
   295  			BucketPourCache["OK"] = make([]types.Event, 0)
   296  		}
   297  		evt := deepcopy.Copy(parsed)
   298  		BucketPourCache["OK"] = append(BucketPourCache["OK"], evt.(types.Event))
   299  	}
   300  	//find the relevant holders (scenarios)
   301  	for idx := 0; idx < len(holders); idx++ {
   302  		//for idx, holder := range holders {
   303  
   304  		//evaluate bucket's condition
   305  		if holders[idx].RunTimeFilter != nil {
   306  			holders[idx].logger.Tracef("event against holder %d/%d", idx, len(holders))
   307  			output, err := exprhelpers.Run(holders[idx].RunTimeFilter,
   308  				map[string]interface{}{"evt": &parsed},
   309  				holders[idx].logger,
   310  				holders[idx].Debug)
   311  			if err != nil {
   312  				holders[idx].logger.Errorf("failed parsing : %v", err)
   313  				return false, fmt.Errorf("leaky failed : %s", err)
   314  			}
   315  			// we assume we a bool should add type check here
   316  			if condition, ok = output.(bool); !ok {
   317  				holders[idx].logger.Errorf("unexpected non-bool return : %T", output)
   318  				holders[idx].logger.Fatalf("Filter issue")
   319  			}
   320  			if !condition {
   321  				holders[idx].logger.Debugf("Event leaving node : ko (filter mismatch)")
   322  				continue
   323  			}
   324  		}
   325  
   326  		//groupby determines the partition key for the specific bucket
   327  		var groupby string
   328  		if holders[idx].RunTimeGroupBy != nil {
   329  			tmpGroupBy, err := exprhelpers.Run(holders[idx].RunTimeGroupBy, map[string]interface{}{"evt": &parsed}, holders[idx].logger, holders[idx].Debug)
   330  			if err != nil {
   331  				holders[idx].logger.Errorf("failed groupby : %v", err)
   332  				return false, errors.New("leaky failed :/")
   333  			}
   334  
   335  			if groupby, ok = tmpGroupBy.(string); !ok {
   336  				holders[idx].logger.Fatalf("failed groupby type : %v", err)
   337  				return false, errors.New("groupby wrong type")
   338  			}
   339  		}
   340  		buckey := GetKey(holders[idx], groupby)
   341  
   342  		//we need to either find the existing bucket, or create a new one (if it's the first event to hit it for this partition key)
   343  		bucket, err := LoadOrStoreBucketFromHolder(buckey, buckets, holders[idx], parsed.ExpectMode)
   344  		if err != nil {
   345  			return false, fmt.Errorf("failed to load or store bucket: %w", err)
   346  		}
   347  		//finally, pour the even into the bucket
   348  
   349  		if bucket.orderEvent {
   350  			if orderEvent == nil {
   351  				orderEvent = make(map[string]*sync.WaitGroup)
   352  			}
   353  			if orderEvent[buckey] != nil {
   354  				orderEvent[buckey].Wait()
   355  			} else {
   356  				orderEvent[buckey] = &sync.WaitGroup{}
   357  			}
   358  
   359  			orderEvent[buckey].Add(1)
   360  		}
   361  
   362  		ok, err := PourItemToBucket(bucket, holders[idx], buckets, &parsed)
   363  
   364  		if bucket.orderEvent {
   365  			orderEvent[buckey].Wait()
   366  		}
   367  
   368  		if err != nil {
   369  			return false, fmt.Errorf("failed to pour bucket: %w", err)
   370  		}
   371  		if ok {
   372  			poured = true
   373  		}
   374  	}
   375  	return poured, nil
   376  }