github.com/crowdsecurity/crowdsec@v1.6.1/cmd/crowdsec/crowdsec.go (about)

     1  package main
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"sync"
     9  	"time"
    10  
    11  	log "github.com/sirupsen/logrus"
    12  	"gopkg.in/yaml.v2"
    13  
    14  	"github.com/crowdsecurity/go-cs-lib/trace"
    15  
    16  	"github.com/crowdsecurity/crowdsec/pkg/acquisition"
    17  	"github.com/crowdsecurity/crowdsec/pkg/acquisition/configuration"
    18  	"github.com/crowdsecurity/crowdsec/pkg/alertcontext"
    19  	"github.com/crowdsecurity/crowdsec/pkg/appsec"
    20  	"github.com/crowdsecurity/crowdsec/pkg/csconfig"
    21  	"github.com/crowdsecurity/crowdsec/pkg/cwhub"
    22  	leaky "github.com/crowdsecurity/crowdsec/pkg/leakybucket"
    23  	"github.com/crowdsecurity/crowdsec/pkg/parser"
    24  	"github.com/crowdsecurity/crowdsec/pkg/types"
    25  )
    26  
    27  // initCrowdsec prepares the log processor service
    28  func initCrowdsec(cConfig *csconfig.Config, hub *cwhub.Hub) (*parser.Parsers, []acquisition.DataSource, error) {
    29  	var err error
    30  
    31  	if err = alertcontext.LoadConsoleContext(cConfig, hub); err != nil {
    32  		return nil, nil, fmt.Errorf("while loading context: %w", err)
    33  	}
    34  
    35  	// Start loading configs
    36  	csParsers := parser.NewParsers(hub)
    37  	if csParsers, err = parser.LoadParsers(cConfig, csParsers); err != nil {
    38  		return nil, nil, fmt.Errorf("while loading parsers: %w", err)
    39  	}
    40  
    41  	if err := LoadBuckets(cConfig, hub); err != nil {
    42  		return nil, nil, fmt.Errorf("while loading scenarios: %w", err)
    43  	}
    44  
    45  	if err := appsec.LoadAppsecRules(hub); err != nil {
    46  		return nil, nil, fmt.Errorf("while loading appsec rules: %w", err)
    47  	}
    48  
    49  	datasources, err := LoadAcquisition(cConfig)
    50  	if err != nil {
    51  		return nil, nil, fmt.Errorf("while loading acquisition config: %w", err)
    52  	}
    53  
    54  	return csParsers, datasources, nil
    55  }
    56  
    57  // runCrowdsec starts the log processor service
    58  func runCrowdsec(cConfig *csconfig.Config, parsers *parser.Parsers, hub *cwhub.Hub, datasources []acquisition.DataSource) error {
    59  	inputEventChan = make(chan types.Event)
    60  	inputLineChan = make(chan types.Event)
    61  
    62  	// start go-routines for parsing, buckets pour and outputs.
    63  	parserWg := &sync.WaitGroup{}
    64  
    65  	parsersTomb.Go(func() error {
    66  		parserWg.Add(1)
    67  
    68  		for i := 0; i < cConfig.Crowdsec.ParserRoutinesCount; i++ {
    69  			parsersTomb.Go(func() error {
    70  				defer trace.CatchPanic("crowdsec/runParse")
    71  
    72  				if err := runParse(inputLineChan, inputEventChan, *parsers.Ctx, parsers.Nodes); err != nil {
    73  					// this error will never happen as parser.Parse is not able to return errors
    74  					log.Fatalf("starting parse error : %s", err)
    75  					return err
    76  				}
    77  
    78  				return nil
    79  			})
    80  		}
    81  		parserWg.Done()
    82  
    83  		return nil
    84  	})
    85  	parserWg.Wait()
    86  
    87  	bucketWg := &sync.WaitGroup{}
    88  
    89  	bucketsTomb.Go(func() error {
    90  		bucketWg.Add(1)
    91  		/*restore previous state as well if present*/
    92  		if cConfig.Crowdsec.BucketStateFile != "" {
    93  			log.Warningf("Restoring buckets state from %s", cConfig.Crowdsec.BucketStateFile)
    94  
    95  			if err := leaky.LoadBucketsState(cConfig.Crowdsec.BucketStateFile, buckets, holders); err != nil {
    96  				return fmt.Errorf("unable to restore buckets: %w", err)
    97  			}
    98  		}
    99  
   100  		for i := 0; i < cConfig.Crowdsec.BucketsRoutinesCount; i++ {
   101  			bucketsTomb.Go(func() error {
   102  				defer trace.CatchPanic("crowdsec/runPour")
   103  
   104  				if err := runPour(inputEventChan, holders, buckets, cConfig); err != nil {
   105  					log.Fatalf("starting pour error : %s", err)
   106  					return err
   107  				}
   108  
   109  				return nil
   110  			})
   111  		}
   112  		bucketWg.Done()
   113  
   114  		return nil
   115  	})
   116  	bucketWg.Wait()
   117  
   118  	apiClient, err := AuthenticatedLAPIClient(*cConfig.API.Client.Credentials, hub)
   119  	if err != nil {
   120  		return err
   121  	}
   122  
   123  	log.Debugf("Starting HeartBeat service")
   124  	apiClient.HeartBeat.StartHeartBeat(context.Background(), &outputsTomb)
   125  
   126  	outputWg := &sync.WaitGroup{}
   127  
   128  	outputsTomb.Go(func() error {
   129  		outputWg.Add(1)
   130  
   131  		for i := 0; i < cConfig.Crowdsec.OutputRoutinesCount; i++ {
   132  			outputsTomb.Go(func() error {
   133  				defer trace.CatchPanic("crowdsec/runOutput")
   134  
   135  				if err := runOutput(inputEventChan, outputEventChan, buckets, *parsers.Povfwctx, parsers.Povfwnodes, apiClient); err != nil {
   136  					log.Fatalf("starting outputs error : %s", err)
   137  					return err
   138  				}
   139  
   140  				return nil
   141  			})
   142  		}
   143  		outputWg.Done()
   144  
   145  		return nil
   146  	})
   147  	outputWg.Wait()
   148  
   149  	if cConfig.Prometheus != nil && cConfig.Prometheus.Enabled {
   150  		aggregated := false
   151  		if cConfig.Prometheus.Level == configuration.CFG_METRICS_AGGREGATE {
   152  			aggregated = true
   153  		}
   154  
   155  		if err := acquisition.GetMetrics(dataSources, aggregated); err != nil {
   156  			return fmt.Errorf("while fetching prometheus metrics for datasources: %w", err)
   157  		}
   158  	}
   159  
   160  	log.Info("Starting processing data")
   161  
   162  	if err := acquisition.StartAcquisition(dataSources, inputLineChan, &acquisTomb); err != nil {
   163  		return fmt.Errorf("starting acquisition error: %w", err)
   164  	}
   165  
   166  	return nil
   167  }
   168  
   169  // serveCrowdsec wraps the log processor service
   170  func serveCrowdsec(parsers *parser.Parsers, cConfig *csconfig.Config, hub *cwhub.Hub, datasources []acquisition.DataSource, agentReady chan bool) {
   171  	crowdsecTomb.Go(func() error {
   172  		defer trace.CatchPanic("crowdsec/serveCrowdsec")
   173  
   174  		go func() {
   175  			defer trace.CatchPanic("crowdsec/runCrowdsec")
   176  			// this logs every time, even at config reload
   177  			log.Debugf("running agent after %s ms", time.Since(crowdsecT0))
   178  			agentReady <- true
   179  
   180  			if err := runCrowdsec(cConfig, parsers, hub, datasources); err != nil {
   181  				log.Fatalf("unable to start crowdsec routines: %s", err)
   182  			}
   183  		}()
   184  
   185  		/*we should stop in two cases :
   186  		- crowdsecTomb has been Killed() : it might be shutdown or reload, so stop
   187  		- acquisTomb is dead, it means that we were in "cat" mode and files are done reading, quit
   188  		*/
   189  		waitOnTomb()
   190  		log.Debugf("Shutting down crowdsec routines")
   191  
   192  		if err := ShutdownCrowdsecRoutines(); err != nil {
   193  			log.Fatalf("unable to shutdown crowdsec routines: %s", err)
   194  		}
   195  
   196  		log.Debugf("everything is dead, return crowdsecTomb")
   197  
   198  		if dumpStates {
   199  			dumpParserState()
   200  			dumpOverflowState()
   201  			dumpBucketsPour()
   202  			os.Exit(0)
   203  		}
   204  
   205  		return nil
   206  	})
   207  }
   208  
   209  func dumpBucketsPour() {
   210  	fd, err := os.OpenFile(filepath.Join(parser.DumpFolder, "bucketpour-dump.yaml"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666)
   211  	if err != nil {
   212  		log.Fatalf("open: %s", err)
   213  	}
   214  
   215  	out, err := yaml.Marshal(leaky.BucketPourCache)
   216  	if err != nil {
   217  		log.Fatalf("marshal: %s", err)
   218  	}
   219  
   220  	b, err := fd.Write(out)
   221  	if err != nil {
   222  		log.Fatalf("write: %s", err)
   223  	}
   224  
   225  	log.Tracef("wrote %d bytes", b)
   226  
   227  	if err := fd.Close(); err != nil {
   228  		log.Fatalf(" close: %s", err)
   229  	}
   230  }
   231  
   232  func dumpParserState() {
   233  	fd, err := os.OpenFile(filepath.Join(parser.DumpFolder, "parser-dump.yaml"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666)
   234  	if err != nil {
   235  		log.Fatalf("open: %s", err)
   236  	}
   237  
   238  	out, err := yaml.Marshal(parser.StageParseCache)
   239  	if err != nil {
   240  		log.Fatalf("marshal: %s", err)
   241  	}
   242  
   243  	b, err := fd.Write(out)
   244  	if err != nil {
   245  		log.Fatalf("write: %s", err)
   246  	}
   247  
   248  	log.Tracef("wrote %d bytes", b)
   249  
   250  	if err := fd.Close(); err != nil {
   251  		log.Fatalf(" close: %s", err)
   252  	}
   253  }
   254  
   255  func dumpOverflowState() {
   256  	fd, err := os.OpenFile(filepath.Join(parser.DumpFolder, "bucket-dump.yaml"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666)
   257  	if err != nil {
   258  		log.Fatalf("open: %s", err)
   259  	}
   260  
   261  	out, err := yaml.Marshal(bucketOverflows)
   262  	if err != nil {
   263  		log.Fatalf("marshal: %s", err)
   264  	}
   265  
   266  	b, err := fd.Write(out)
   267  	if err != nil {
   268  		log.Fatalf("write: %s", err)
   269  	}
   270  
   271  	log.Tracef("wrote %d bytes", b)
   272  
   273  	if err := fd.Close(); err != nil {
   274  		log.Fatalf(" close: %s", err)
   275  	}
   276  }
   277  
   278  func waitOnTomb() {
   279  	for {
   280  		select {
   281  		case <-acquisTomb.Dead():
   282  			/*if it's acquisition dying it means that we were in "cat" mode.
   283  			while shutting down, we need to give time for all buckets to process in flight data*/
   284  			log.Info("Acquisition is finished, shutting down")
   285  			/*
   286  				While it might make sense to want to shut-down parser/buckets/etc. as soon as acquisition is finished,
   287  				we might have some pending buckets: buckets that overflowed, but whose LeakRoutine are still alive because they
   288  				are waiting to be able to "commit" (push to api). This can happen specifically in a context where a lot of logs
   289  				are going to trigger overflow (ie. trigger buckets with ~100% of the logs triggering an overflow).
   290  
   291  				To avoid this (which would mean that we would "lose" some overflows), let's monitor the number of live buckets.
   292  				However, because of the blackhole mechanism, we can't really wait for the number of LeakRoutine to go to zero
   293  				(we might have to wait $blackhole_duration).
   294  
   295  				So: we are waiting for the number of buckets to stop decreasing before returning. "how long" we should wait
   296  				is a bit of the trick question, as some operations (ie. reverse dns or such in post-overflow) can take some time :)
   297  			*/
   298  
   299  			return
   300  
   301  		case <-crowdsecTomb.Dying():
   302  			log.Infof("Crowdsec engine shutting down")
   303  			return
   304  		}
   305  	}
   306  }