github.com/crowdsecurity/crowdsec@v1.6.1/cmd/crowdsec/crowdsec.go (about) 1 package main 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "path/filepath" 8 "sync" 9 "time" 10 11 log "github.com/sirupsen/logrus" 12 "gopkg.in/yaml.v2" 13 14 "github.com/crowdsecurity/go-cs-lib/trace" 15 16 "github.com/crowdsecurity/crowdsec/pkg/acquisition" 17 "github.com/crowdsecurity/crowdsec/pkg/acquisition/configuration" 18 "github.com/crowdsecurity/crowdsec/pkg/alertcontext" 19 "github.com/crowdsecurity/crowdsec/pkg/appsec" 20 "github.com/crowdsecurity/crowdsec/pkg/csconfig" 21 "github.com/crowdsecurity/crowdsec/pkg/cwhub" 22 leaky "github.com/crowdsecurity/crowdsec/pkg/leakybucket" 23 "github.com/crowdsecurity/crowdsec/pkg/parser" 24 "github.com/crowdsecurity/crowdsec/pkg/types" 25 ) 26 27 // initCrowdsec prepares the log processor service 28 func initCrowdsec(cConfig *csconfig.Config, hub *cwhub.Hub) (*parser.Parsers, []acquisition.DataSource, error) { 29 var err error 30 31 if err = alertcontext.LoadConsoleContext(cConfig, hub); err != nil { 32 return nil, nil, fmt.Errorf("while loading context: %w", err) 33 } 34 35 // Start loading configs 36 csParsers := parser.NewParsers(hub) 37 if csParsers, err = parser.LoadParsers(cConfig, csParsers); err != nil { 38 return nil, nil, fmt.Errorf("while loading parsers: %w", err) 39 } 40 41 if err := LoadBuckets(cConfig, hub); err != nil { 42 return nil, nil, fmt.Errorf("while loading scenarios: %w", err) 43 } 44 45 if err := appsec.LoadAppsecRules(hub); err != nil { 46 return nil, nil, fmt.Errorf("while loading appsec rules: %w", err) 47 } 48 49 datasources, err := LoadAcquisition(cConfig) 50 if err != nil { 51 return nil, nil, fmt.Errorf("while loading acquisition config: %w", err) 52 } 53 54 return csParsers, datasources, nil 55 } 56 57 // runCrowdsec starts the log processor service 58 func runCrowdsec(cConfig *csconfig.Config, parsers *parser.Parsers, hub *cwhub.Hub, datasources []acquisition.DataSource) error { 59 inputEventChan = make(chan types.Event) 60 inputLineChan = make(chan types.Event) 61 62 // start go-routines for parsing, buckets pour and outputs. 63 parserWg := &sync.WaitGroup{} 64 65 parsersTomb.Go(func() error { 66 parserWg.Add(1) 67 68 for i := 0; i < cConfig.Crowdsec.ParserRoutinesCount; i++ { 69 parsersTomb.Go(func() error { 70 defer trace.CatchPanic("crowdsec/runParse") 71 72 if err := runParse(inputLineChan, inputEventChan, *parsers.Ctx, parsers.Nodes); err != nil { 73 // this error will never happen as parser.Parse is not able to return errors 74 log.Fatalf("starting parse error : %s", err) 75 return err 76 } 77 78 return nil 79 }) 80 } 81 parserWg.Done() 82 83 return nil 84 }) 85 parserWg.Wait() 86 87 bucketWg := &sync.WaitGroup{} 88 89 bucketsTomb.Go(func() error { 90 bucketWg.Add(1) 91 /*restore previous state as well if present*/ 92 if cConfig.Crowdsec.BucketStateFile != "" { 93 log.Warningf("Restoring buckets state from %s", cConfig.Crowdsec.BucketStateFile) 94 95 if err := leaky.LoadBucketsState(cConfig.Crowdsec.BucketStateFile, buckets, holders); err != nil { 96 return fmt.Errorf("unable to restore buckets: %w", err) 97 } 98 } 99 100 for i := 0; i < cConfig.Crowdsec.BucketsRoutinesCount; i++ { 101 bucketsTomb.Go(func() error { 102 defer trace.CatchPanic("crowdsec/runPour") 103 104 if err := runPour(inputEventChan, holders, buckets, cConfig); err != nil { 105 log.Fatalf("starting pour error : %s", err) 106 return err 107 } 108 109 return nil 110 }) 111 } 112 bucketWg.Done() 113 114 return nil 115 }) 116 bucketWg.Wait() 117 118 apiClient, err := AuthenticatedLAPIClient(*cConfig.API.Client.Credentials, hub) 119 if err != nil { 120 return err 121 } 122 123 log.Debugf("Starting HeartBeat service") 124 apiClient.HeartBeat.StartHeartBeat(context.Background(), &outputsTomb) 125 126 outputWg := &sync.WaitGroup{} 127 128 outputsTomb.Go(func() error { 129 outputWg.Add(1) 130 131 for i := 0; i < cConfig.Crowdsec.OutputRoutinesCount; i++ { 132 outputsTomb.Go(func() error { 133 defer trace.CatchPanic("crowdsec/runOutput") 134 135 if err := runOutput(inputEventChan, outputEventChan, buckets, *parsers.Povfwctx, parsers.Povfwnodes, apiClient); err != nil { 136 log.Fatalf("starting outputs error : %s", err) 137 return err 138 } 139 140 return nil 141 }) 142 } 143 outputWg.Done() 144 145 return nil 146 }) 147 outputWg.Wait() 148 149 if cConfig.Prometheus != nil && cConfig.Prometheus.Enabled { 150 aggregated := false 151 if cConfig.Prometheus.Level == configuration.CFG_METRICS_AGGREGATE { 152 aggregated = true 153 } 154 155 if err := acquisition.GetMetrics(dataSources, aggregated); err != nil { 156 return fmt.Errorf("while fetching prometheus metrics for datasources: %w", err) 157 } 158 } 159 160 log.Info("Starting processing data") 161 162 if err := acquisition.StartAcquisition(dataSources, inputLineChan, &acquisTomb); err != nil { 163 return fmt.Errorf("starting acquisition error: %w", err) 164 } 165 166 return nil 167 } 168 169 // serveCrowdsec wraps the log processor service 170 func serveCrowdsec(parsers *parser.Parsers, cConfig *csconfig.Config, hub *cwhub.Hub, datasources []acquisition.DataSource, agentReady chan bool) { 171 crowdsecTomb.Go(func() error { 172 defer trace.CatchPanic("crowdsec/serveCrowdsec") 173 174 go func() { 175 defer trace.CatchPanic("crowdsec/runCrowdsec") 176 // this logs every time, even at config reload 177 log.Debugf("running agent after %s ms", time.Since(crowdsecT0)) 178 agentReady <- true 179 180 if err := runCrowdsec(cConfig, parsers, hub, datasources); err != nil { 181 log.Fatalf("unable to start crowdsec routines: %s", err) 182 } 183 }() 184 185 /*we should stop in two cases : 186 - crowdsecTomb has been Killed() : it might be shutdown or reload, so stop 187 - acquisTomb is dead, it means that we were in "cat" mode and files are done reading, quit 188 */ 189 waitOnTomb() 190 log.Debugf("Shutting down crowdsec routines") 191 192 if err := ShutdownCrowdsecRoutines(); err != nil { 193 log.Fatalf("unable to shutdown crowdsec routines: %s", err) 194 } 195 196 log.Debugf("everything is dead, return crowdsecTomb") 197 198 if dumpStates { 199 dumpParserState() 200 dumpOverflowState() 201 dumpBucketsPour() 202 os.Exit(0) 203 } 204 205 return nil 206 }) 207 } 208 209 func dumpBucketsPour() { 210 fd, err := os.OpenFile(filepath.Join(parser.DumpFolder, "bucketpour-dump.yaml"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666) 211 if err != nil { 212 log.Fatalf("open: %s", err) 213 } 214 215 out, err := yaml.Marshal(leaky.BucketPourCache) 216 if err != nil { 217 log.Fatalf("marshal: %s", err) 218 } 219 220 b, err := fd.Write(out) 221 if err != nil { 222 log.Fatalf("write: %s", err) 223 } 224 225 log.Tracef("wrote %d bytes", b) 226 227 if err := fd.Close(); err != nil { 228 log.Fatalf(" close: %s", err) 229 } 230 } 231 232 func dumpParserState() { 233 fd, err := os.OpenFile(filepath.Join(parser.DumpFolder, "parser-dump.yaml"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666) 234 if err != nil { 235 log.Fatalf("open: %s", err) 236 } 237 238 out, err := yaml.Marshal(parser.StageParseCache) 239 if err != nil { 240 log.Fatalf("marshal: %s", err) 241 } 242 243 b, err := fd.Write(out) 244 if err != nil { 245 log.Fatalf("write: %s", err) 246 } 247 248 log.Tracef("wrote %d bytes", b) 249 250 if err := fd.Close(); err != nil { 251 log.Fatalf(" close: %s", err) 252 } 253 } 254 255 func dumpOverflowState() { 256 fd, err := os.OpenFile(filepath.Join(parser.DumpFolder, "bucket-dump.yaml"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666) 257 if err != nil { 258 log.Fatalf("open: %s", err) 259 } 260 261 out, err := yaml.Marshal(bucketOverflows) 262 if err != nil { 263 log.Fatalf("marshal: %s", err) 264 } 265 266 b, err := fd.Write(out) 267 if err != nil { 268 log.Fatalf("write: %s", err) 269 } 270 271 log.Tracef("wrote %d bytes", b) 272 273 if err := fd.Close(); err != nil { 274 log.Fatalf(" close: %s", err) 275 } 276 } 277 278 func waitOnTomb() { 279 for { 280 select { 281 case <-acquisTomb.Dead(): 282 /*if it's acquisition dying it means that we were in "cat" mode. 283 while shutting down, we need to give time for all buckets to process in flight data*/ 284 log.Info("Acquisition is finished, shutting down") 285 /* 286 While it might make sense to want to shut-down parser/buckets/etc. as soon as acquisition is finished, 287 we might have some pending buckets: buckets that overflowed, but whose LeakRoutine are still alive because they 288 are waiting to be able to "commit" (push to api). This can happen specifically in a context where a lot of logs 289 are going to trigger overflow (ie. trigger buckets with ~100% of the logs triggering an overflow). 290 291 To avoid this (which would mean that we would "lose" some overflows), let's monitor the number of live buckets. 292 However, because of the blackhole mechanism, we can't really wait for the number of LeakRoutine to go to zero 293 (we might have to wait $blackhole_duration). 294 295 So: we are waiting for the number of buckets to stop decreasing before returning. "how long" we should wait 296 is a bit of the trick question, as some operations (ie. reverse dns or such in post-overflow) can take some time :) 297 */ 298 299 return 300 301 case <-crowdsecTomb.Dying(): 302 log.Infof("Crowdsec engine shutting down") 303 return 304 } 305 } 306 }