bitbucket.org/Aishee/synsec@v0.0.0-20210414005726-236fc01a153d/pkg/leakybucket/manager_run.go (about)

     1  package leakybucket
     2  
     3  import (
     4  	"encoding/json"
     5  	"errors"
     6  	"fmt"
     7  	"io/ioutil"
     8  	"math"
     9  	"os"
    10  	"time"
    11  
    12  	log "github.com/sirupsen/logrus"
    13  
    14  	"github.com/antonmedv/expr"
    15  	"bitbucket.org/Aishee/synsec/pkg/exprhelpers"
    16  	"bitbucket.org/Aishee/synsec/pkg/types"
    17  	"github.com/prometheus/client_golang/prometheus"
    18  )
    19  
    20  var serialized map[string]Leaky
    21  
    22  /*The leaky routines lifecycle are based on "real" time.
    23  But when we are running in time-machine mode, the reference time is in logs and not "real" time.
    24  Thus we need to garbage collect them to avoid a skyrocketing memory usage.*/
    25  func GarbageCollectBuckets(deadline time.Time, buckets *Buckets) error {
    26  	buckets.wgPour.Wait()
    27  	buckets.wgDumpState.Add(1)
    28  	defer buckets.wgDumpState.Done()
    29  
    30  	total := 0
    31  	discard := 0
    32  	toflush := []string{}
    33  	buckets.Bucket_map.Range(func(rkey, rvalue interface{}) bool {
    34  		key := rkey.(string)
    35  		val := rvalue.(*Leaky)
    36  		total += 1
    37  		//bucket already overflowed, we can kill it
    38  		if !val.Ovflw_ts.IsZero() {
    39  			discard += 1
    40  			val.logger.Debugf("overflowed at %s.", val.Ovflw_ts)
    41  			toflush = append(toflush, key)
    42  			val.tomb.Kill(nil)
    43  			return true
    44  		}
    45  		/*FIXME : sometimes the gettokenscountat has some rounding issues when we try to
    46  		match it with bucket capacity, even if the bucket has long due underflow. Round to 2 decimals*/
    47  		tokat := val.Limiter.GetTokensCountAt(deadline)
    48  		tokcapa := float64(val.Capacity)
    49  		tokat = math.Round(tokat*100) / 100
    50  		tokcapa = math.Round(tokcapa*100) / 100
    51  		//bucket actually underflowed based on log time, but no in real time
    52  		if tokat >= tokcapa {
    53  			BucketsUnderflow.With(prometheus.Labels{"name": val.Name}).Inc()
    54  			val.logger.Debugf("UNDERFLOW : first_ts:%s tokens_at:%f capcity:%f", val.First_ts, tokat, tokcapa)
    55  			toflush = append(toflush, key)
    56  			val.tomb.Kill(nil)
    57  			return true
    58  		} else {
    59  			val.logger.Tracef("(%s) not dead, count:%f capacity:%f", val.First_ts, tokat, tokcapa)
    60  		}
    61  		if _, ok := serialized[key]; ok {
    62  			log.Errorf("entry %s already exists", key)
    63  			return false
    64  		} else {
    65  			log.Debugf("serialize %s of %s : %s", val.Name, val.Uuid, val.Mapkey)
    66  		}
    67  		return true
    68  	})
    69  	log.Infof("Cleaned %d buckets", len(toflush))
    70  	for _, flushkey := range toflush {
    71  		buckets.Bucket_map.Delete(flushkey)
    72  	}
    73  	return nil
    74  }
    75  
    76  func DumpBucketsStateAt(deadline time.Time, outputdir string, buckets *Buckets) (string, error) {
    77  
    78  	//synchronize with PourItemtoHolders
    79  	buckets.wgPour.Wait()
    80  	buckets.wgDumpState.Add(1)
    81  	defer buckets.wgDumpState.Done()
    82  
    83  	if outputdir == "" {
    84  		return "", fmt.Errorf("empty output dir for dump bucket state")
    85  	}
    86  	tmpFd, err := ioutil.TempFile(os.TempDir(), "synsec-buckets-dump-")
    87  	if err != nil {
    88  		return "", fmt.Errorf("failed to create temp file : %s", err)
    89  	}
    90  	defer tmpFd.Close()
    91  	tmpFileName := tmpFd.Name()
    92  	serialized = make(map[string]Leaky)
    93  	log.Printf("Dumping buckets state at %s", deadline)
    94  	total := 0
    95  	discard := 0
    96  	buckets.Bucket_map.Range(func(rkey, rvalue interface{}) bool {
    97  		key := rkey.(string)
    98  		val := rvalue.(*Leaky)
    99  		total += 1
   100  		if !val.Ovflw_ts.IsZero() {
   101  			discard += 1
   102  			val.logger.Debugf("overflowed at %s.", val.Ovflw_ts)
   103  			return true
   104  		}
   105  		/*FIXME : sometimes the gettokenscountat has some rounding issues when we try to
   106  		match it with bucket capacity, even if the bucket has long due underflow. Round to 2 decimals*/
   107  		tokat := val.Limiter.GetTokensCountAt(deadline)
   108  		tokcapa := float64(val.Capacity)
   109  		tokat = math.Round(tokat*100) / 100
   110  		tokcapa = math.Round(tokcapa*100) / 100
   111  
   112  		if tokat >= tokcapa {
   113  			BucketsUnderflow.With(prometheus.Labels{"name": val.Name}).Inc()
   114  			val.logger.Debugf("UNDERFLOW : first_ts:%s tokens_at:%f capcity:%f", val.First_ts, tokat, tokcapa)
   115  			discard += 1
   116  			return true
   117  		} else {
   118  			val.logger.Debugf("(%s) not dead, count:%f capacity:%f", val.First_ts, tokat, tokcapa)
   119  		}
   120  		if _, ok := serialized[key]; ok {
   121  			log.Errorf("entry %s already exists", key)
   122  			return false
   123  		} else {
   124  			log.Debugf("serialize %s of %s : %s", val.Name, val.Uuid, val.Mapkey)
   125  		}
   126  		val.SerializedState = val.Limiter.Dump()
   127  		serialized[key] = *val
   128  		return true
   129  	})
   130  	bbuckets, err := json.MarshalIndent(serialized, "", " ")
   131  	if err != nil {
   132  		log.Fatalf("Failed to unmarshal buckets : %s", err)
   133  	}
   134  	size, err := tmpFd.Write(bbuckets)
   135  	if err != nil {
   136  		return "", fmt.Errorf("failed to write temp file : %s", err)
   137  	}
   138  	log.Infof("Serialized %d live buckets (+%d expired) in %d bytes to %s", len(serialized), discard, size, tmpFd.Name())
   139  	serialized = nil
   140  	return tmpFileName, nil
   141  }
   142  
   143  func ShutdownAllBuckets(buckets *Buckets) error {
   144  	buckets.Bucket_map.Range(func(rkey, rvalue interface{}) bool {
   145  		key := rkey.(string)
   146  		val := rvalue.(*Leaky)
   147  		val.tomb.Kill(nil)
   148  		log.Infof("killed %s", key)
   149  		return true
   150  	})
   151  	return nil
   152  }
   153  
   154  func PourItemToHolders(parsed types.Event, holders []BucketFactory, buckets *Buckets) (bool, error) {
   155  	var (
   156  		ok, condition, sent bool
   157  		err                 error
   158  	)
   159  	//synchronize with DumpBucketsStateAt
   160  
   161  	for idx, holder := range holders {
   162  
   163  		if holder.RunTimeFilter != nil {
   164  			holder.logger.Tracef("event against holder %d/%d", idx, len(holders))
   165  			output, err := expr.Run(holder.RunTimeFilter, exprhelpers.GetExprEnv(map[string]interface{}{"evt": &parsed}))
   166  			if err != nil {
   167  				holder.logger.Errorf("failed parsing : %v", err)
   168  				return false, fmt.Errorf("leaky failed : %s", err)
   169  			}
   170  			// we assume we a bool should add type check here
   171  			if condition, ok = output.(bool); !ok {
   172  				holder.logger.Errorf("unexpected non-bool return : %T", output)
   173  				holder.logger.Fatalf("Filter issue")
   174  			}
   175  
   176  			if holder.Debug {
   177  				holder.ExprDebugger.Run(holder.logger, condition, exprhelpers.GetExprEnv(map[string]interface{}{"evt": &parsed}))
   178  			}
   179  			if !condition {
   180  				holder.logger.Debugf("Event leaving node : ko (filter mismatch)")
   181  				continue
   182  			}
   183  		}
   184  
   185  		sent = false
   186  		var groupby string
   187  		if holder.RunTimeGroupBy != nil {
   188  			tmpGroupBy, err := expr.Run(holder.RunTimeGroupBy, exprhelpers.GetExprEnv(map[string]interface{}{"evt": &parsed}))
   189  			if err != nil {
   190  				holder.logger.Errorf("failed groupby : %v", err)
   191  				return false, errors.New("leaky failed :/")
   192  			}
   193  
   194  			if groupby, ok = tmpGroupBy.(string); !ok {
   195  				holder.logger.Fatalf("failed groupby type : %v", err)
   196  				return false, errors.New("groupby wrong type")
   197  			}
   198  		}
   199  		buckey := GetKey(holder, groupby)
   200  
   201  		sigclosed := 0
   202  		keymiss := 0
   203  		failed_sent := 0
   204  		attempts := 0
   205  		start := time.Now()
   206  		for !sent {
   207  			attempts += 1
   208  			/* Warn the user if we used more than a 100 ms to pour an event, it's at least an half lock*/
   209  			if attempts%100000 == 0 && start.Add(100*time.Millisecond).Before(time.Now()) {
   210  				holder.logger.Warningf("stuck for %s sending event to %s (sigclosed:%d keymiss:%d failed_sent:%d attempts:%d)", time.Since(start),
   211  					buckey, sigclosed, keymiss, failed_sent, attempts)
   212  			}
   213  			biface, ok := buckets.Bucket_map.Load(buckey)
   214  			//biface, bigout
   215  			/* the bucket doesn't exist, create it !*/
   216  			if !ok {
   217  				/*
   218  					not found in map
   219  				*/
   220  
   221  				holder.logger.Debugf("Creating bucket %s", buckey)
   222  				keymiss += 1
   223  				var fresh_bucket *Leaky
   224  
   225  				switch parsed.ExpectMode {
   226  				case TIMEMACHINE:
   227  					fresh_bucket = NewTimeMachine(holder)
   228  					holder.logger.Debugf("Creating TimeMachine bucket")
   229  				case LIVE:
   230  					fresh_bucket = NewLeaky(holder)
   231  					holder.logger.Debugf("Creating Live bucket")
   232  				default:
   233  					holder.logger.Fatalf("input event has no expected mode, malformed : %+v", parsed)
   234  				}
   235  				fresh_bucket.In = make(chan types.Event)
   236  				fresh_bucket.Mapkey = buckey
   237  				fresh_bucket.Signal = make(chan bool, 1)
   238  				buckets.Bucket_map.Store(buckey, fresh_bucket)
   239  				holder.tomb.Go(func() error {
   240  					return LeakRoutine(fresh_bucket)
   241  				})
   242  
   243  				holder.logger.Debugf("Created new bucket %s", buckey)
   244  
   245  				//wait for signal to be opened
   246  				<-fresh_bucket.Signal
   247  				continue
   248  			}
   249  
   250  			bucket := biface.(*Leaky)
   251  			/* check if leak routine is up */
   252  			select {
   253  			case _, ok := <-bucket.Signal:
   254  				if !ok {
   255  					//it's closed, delete it
   256  					bucket.logger.Debugf("Bucket %s found dead, cleanup the body", buckey)
   257  					buckets.Bucket_map.Delete(buckey)
   258  					sigclosed += 1
   259  					continue
   260  				}
   261  				holder.logger.Tracef("Signal exists, try to pour :)")
   262  
   263  			default:
   264  				/*nothing to read, but not closed, try to pour */
   265  				holder.logger.Tracef("Signal exists but empty, try to pour :)")
   266  
   267  			}
   268  			/*let's see if this time-bucket should have expired */
   269  			if bucket.Mode == TIMEMACHINE {
   270  				bucket.mutex.Lock()
   271  				firstTs := bucket.First_ts
   272  				lastTs := bucket.Last_ts
   273  				bucket.mutex.Unlock()
   274  
   275  				if !firstTs.IsZero() {
   276  					var d time.Time
   277  					err = d.UnmarshalText([]byte(parsed.MarshaledTime))
   278  					if err != nil {
   279  						holder.logger.Warningf("Failed unmarshaling event time (%s) : %v", parsed.MarshaledTime, err)
   280  					}
   281  					if d.After(lastTs.Add(bucket.Duration)) {
   282  						bucket.logger.Tracef("bucket is expired (curr event: %s, bucket deadline: %s), kill", d, lastTs.Add(bucket.Duration))
   283  						buckets.Bucket_map.Delete(buckey)
   284  						continue
   285  					}
   286  				}
   287  			}
   288  			/*if we're here, let's try to pour */
   289  
   290  			select {
   291  			case bucket.In <- parsed:
   292  				holder.logger.Tracef("Successfully sent !")
   293  				//sent was successful !
   294  				sent = true
   295  				continue
   296  			default:
   297  				failed_sent += 1
   298  				holder.logger.Tracef("Failed to send, try again")
   299  				continue
   300  
   301  			}
   302  		}
   303  
   304  		holder.logger.Debugf("bucket '%s' is poured", holder.Name)
   305  	}
   306  	return sent, nil
   307  }