github.com/crowdsecurity/crowdsec@v1.6.1/pkg/acquisition/modules/file/file.go (about) 1 package fileacquisition 2 3 import ( 4 "bufio" 5 "compress/gzip" 6 "errors" 7 "fmt" 8 "io" 9 "net/url" 10 "os" 11 "path/filepath" 12 "regexp" 13 "strconv" 14 "strings" 15 "sync" 16 "time" 17 18 "github.com/fsnotify/fsnotify" 19 "github.com/nxadm/tail" 20 "github.com/prometheus/client_golang/prometheus" 21 log "github.com/sirupsen/logrus" 22 "gopkg.in/tomb.v2" 23 "gopkg.in/yaml.v2" 24 25 "github.com/crowdsecurity/go-cs-lib/trace" 26 27 "github.com/crowdsecurity/crowdsec/pkg/acquisition/configuration" 28 "github.com/crowdsecurity/crowdsec/pkg/types" 29 ) 30 31 var linesRead = prometheus.NewCounterVec( 32 prometheus.CounterOpts{ 33 Name: "cs_filesource_hits_total", 34 Help: "Total lines that were read.", 35 }, 36 []string{"source"}) 37 38 type FileConfiguration struct { 39 Filenames []string 40 ExcludeRegexps []string `yaml:"exclude_regexps"` 41 Filename string 42 ForceInotify bool `yaml:"force_inotify"` 43 MaxBufferSize int `yaml:"max_buffer_size"` 44 PollWithoutInotify *bool `yaml:"poll_without_inotify"` 45 configuration.DataSourceCommonCfg `yaml:",inline"` 46 } 47 48 type FileSource struct { 49 metricsLevel int 50 config FileConfiguration 51 watcher *fsnotify.Watcher 52 watchedDirectories map[string]bool 53 tails map[string]bool 54 logger *log.Entry 55 files []string 56 exclude_regexps []*regexp.Regexp 57 tailMapMutex *sync.RWMutex 58 } 59 60 func (f *FileSource) GetUuid() string { 61 return f.config.UniqueId 62 } 63 64 func (f *FileSource) UnmarshalConfig(yamlConfig []byte) error { 65 f.config = FileConfiguration{} 66 67 err := yaml.UnmarshalStrict(yamlConfig, &f.config) 68 if err != nil { 69 return fmt.Errorf("cannot parse FileAcquisition configuration: %w", err) 70 } 71 72 if f.logger != nil { 73 f.logger.Tracef("FileAcquisition configuration: %+v", f.config) 74 } 75 76 if len(f.config.Filename) != 0 { 77 f.config.Filenames = append(f.config.Filenames, f.config.Filename) 78 } 79 80 if len(f.config.Filenames) == 0 { 81 return errors.New("no filename or filenames configuration provided") 82 } 83 84 if f.config.Mode == "" { 85 f.config.Mode = configuration.TAIL_MODE 86 } 87 88 if f.config.Mode != configuration.CAT_MODE && f.config.Mode != configuration.TAIL_MODE { 89 return fmt.Errorf("unsupported mode %s for file source", f.config.Mode) 90 } 91 92 for _, exclude := range f.config.ExcludeRegexps { 93 re, err := regexp.Compile(exclude) 94 if err != nil { 95 return fmt.Errorf("could not compile regexp %s: %w", exclude, err) 96 } 97 98 f.exclude_regexps = append(f.exclude_regexps, re) 99 } 100 101 return nil 102 } 103 104 func (f *FileSource) Configure(yamlConfig []byte, logger *log.Entry, MetricsLevel int) error { 105 f.logger = logger 106 f.metricsLevel = MetricsLevel 107 108 err := f.UnmarshalConfig(yamlConfig) 109 if err != nil { 110 return err 111 } 112 113 f.watchedDirectories = make(map[string]bool) 114 f.tailMapMutex = &sync.RWMutex{} 115 f.tails = make(map[string]bool) 116 117 f.watcher, err = fsnotify.NewWatcher() 118 if err != nil { 119 return fmt.Errorf("could not create fsnotify watcher: %w", err) 120 } 121 122 f.logger.Tracef("Actual FileAcquisition Configuration %+v", f.config) 123 124 for _, pattern := range f.config.Filenames { 125 if f.config.ForceInotify { 126 directory := filepath.Dir(pattern) 127 f.logger.Infof("Force add watch on %s", directory) 128 129 if !f.watchedDirectories[directory] { 130 err = f.watcher.Add(directory) 131 if err != nil { 132 f.logger.Errorf("Could not create watch on directory %s : %s", directory, err) 133 continue 134 } 135 136 f.watchedDirectories[directory] = true 137 } 138 } 139 140 files, err := filepath.Glob(pattern) 141 if err != nil { 142 return fmt.Errorf("glob failure: %w", err) 143 } 144 145 if len(files) == 0 { 146 f.logger.Warnf("No matching files for pattern %s", pattern) 147 continue 148 } 149 150 for _, file := range files { 151 // check if file is excluded 152 excluded := false 153 154 for _, pattern := range f.exclude_regexps { 155 if pattern.MatchString(file) { 156 excluded = true 157 158 f.logger.Infof("Skipping file %s as it matches exclude pattern %s", file, pattern) 159 160 break 161 } 162 } 163 164 if excluded { 165 continue 166 } 167 168 if files[0] != pattern && f.config.Mode == configuration.TAIL_MODE { // we have a glob pattern 169 directory := filepath.Dir(file) 170 f.logger.Debugf("Will add watch to directory: %s", directory) 171 172 if !f.watchedDirectories[directory] { 173 err = f.watcher.Add(directory) 174 if err != nil { 175 f.logger.Errorf("Could not create watch on directory %s : %s", directory, err) 176 continue 177 } 178 179 f.watchedDirectories[directory] = true 180 } else { 181 f.logger.Debugf("Watch for directory %s already exists", directory) 182 } 183 } 184 185 f.logger.Infof("Adding file %s to datasources", file) 186 f.files = append(f.files, file) 187 } 188 } 189 190 return nil 191 } 192 193 func (f *FileSource) ConfigureByDSN(dsn string, labels map[string]string, logger *log.Entry, uuid string) error { 194 if !strings.HasPrefix(dsn, "file://") { 195 return fmt.Errorf("invalid DSN %s for file source, must start with file://", dsn) 196 } 197 198 f.logger = logger 199 f.config = FileConfiguration{} 200 201 dsn = strings.TrimPrefix(dsn, "file://") 202 203 args := strings.Split(dsn, "?") 204 205 if len(args[0]) == 0 { 206 return errors.New("empty file:// DSN") 207 } 208 209 if len(args) == 2 && len(args[1]) != 0 { 210 params, err := url.ParseQuery(args[1]) 211 if err != nil { 212 return fmt.Errorf("could not parse file args: %w", err) 213 } 214 215 for key, value := range params { 216 switch key { 217 case "log_level": 218 if len(value) != 1 { 219 return errors.New("expected zero or one value for 'log_level'") 220 } 221 222 lvl, err := log.ParseLevel(value[0]) 223 if err != nil { 224 return fmt.Errorf("unknown level %s: %w", value[0], err) 225 } 226 227 f.logger.Logger.SetLevel(lvl) 228 case "max_buffer_size": 229 if len(value) != 1 { 230 return errors.New("expected zero or one value for 'max_buffer_size'") 231 } 232 233 maxBufferSize, err := strconv.Atoi(value[0]) 234 if err != nil { 235 return fmt.Errorf("could not parse max_buffer_size %s: %w", value[0], err) 236 } 237 238 f.config.MaxBufferSize = maxBufferSize 239 default: 240 return fmt.Errorf("unknown parameter %s", key) 241 } 242 } 243 } 244 245 f.config.Labels = labels 246 f.config.Mode = configuration.CAT_MODE 247 f.config.UniqueId = uuid 248 249 f.logger.Debugf("Will try pattern %s", args[0]) 250 251 files, err := filepath.Glob(args[0]) 252 if err != nil { 253 return fmt.Errorf("glob failure: %w", err) 254 } 255 256 if len(files) == 0 { 257 return fmt.Errorf("no matching files for pattern %s", args[0]) 258 } 259 260 if len(files) > 1 { 261 f.logger.Infof("Will read %d files", len(files)) 262 } 263 264 for _, file := range files { 265 f.logger.Infof("Adding file %s to filelist", file) 266 f.files = append(f.files, file) 267 } 268 269 return nil 270 } 271 272 func (f *FileSource) GetMode() string { 273 return f.config.Mode 274 } 275 276 // SupportedModes returns the supported modes by the acquisition module 277 func (f *FileSource) SupportedModes() []string { 278 return []string{configuration.TAIL_MODE, configuration.CAT_MODE} 279 } 280 281 // OneShotAcquisition reads a set of file and returns when done 282 func (f *FileSource) OneShotAcquisition(out chan types.Event, t *tomb.Tomb) error { 283 f.logger.Debug("In oneshot") 284 285 for _, file := range f.files { 286 fi, err := os.Stat(file) 287 if err != nil { 288 return fmt.Errorf("could not stat file %s : %w", file, err) 289 } 290 291 if fi.IsDir() { 292 f.logger.Warnf("%s is a directory, ignoring it.", file) 293 continue 294 } 295 296 f.logger.Infof("reading %s at once", file) 297 298 err = f.readFile(file, out, t) 299 if err != nil { 300 return err 301 } 302 } 303 304 return nil 305 } 306 307 func (f *FileSource) GetMetrics() []prometheus.Collector { 308 return []prometheus.Collector{linesRead} 309 } 310 311 func (f *FileSource) GetAggregMetrics() []prometheus.Collector { 312 return []prometheus.Collector{linesRead} 313 } 314 315 func (f *FileSource) GetName() string { 316 return "file" 317 } 318 319 func (f *FileSource) CanRun() error { 320 return nil 321 } 322 323 func (f *FileSource) StreamingAcquisition(out chan types.Event, t *tomb.Tomb) error { 324 f.logger.Debug("Starting live acquisition") 325 t.Go(func() error { 326 return f.monitorNewFiles(out, t) 327 }) 328 329 for _, file := range f.files { 330 // before opening the file, check if we need to specifically avoid it. (XXX) 331 skip := false 332 333 for _, pattern := range f.exclude_regexps { 334 if pattern.MatchString(file) { 335 f.logger.Infof("file %s matches exclusion pattern %s, skipping", file, pattern.String()) 336 337 skip = true 338 339 break 340 } 341 } 342 343 if skip { 344 continue 345 } 346 347 // cf. https://github.com/crowdsecurity/crowdsec/issues/1168 348 // do not rely on stat, reclose file immediately as it's opened by Tail 349 fd, err := os.Open(file) 350 if err != nil { 351 f.logger.Errorf("unable to read %s : %s", file, err) 352 continue 353 } 354 355 if err := fd.Close(); err != nil { 356 f.logger.Errorf("unable to close %s : %s", file, err) 357 continue 358 } 359 360 fi, err := os.Stat(file) 361 if err != nil { 362 return fmt.Errorf("could not stat file %s : %w", file, err) 363 } 364 365 if fi.IsDir() { 366 f.logger.Warnf("%s is a directory, ignoring it.", file) 367 continue 368 } 369 370 pollFile := false 371 if f.config.PollWithoutInotify != nil { 372 pollFile = *f.config.PollWithoutInotify 373 } else { 374 networkFS, fsType, err := types.IsNetworkFS(file) 375 if err != nil { 376 f.logger.Warningf("Could not get fs type for %s : %s", file, err) 377 } 378 379 f.logger.Debugf("fs for %s is network: %t (%s)", file, networkFS, fsType) 380 381 if networkFS { 382 f.logger.Warnf("Disabling inotify polling on %s as it is on a network share. You can manually set poll_without_inotify to true to make this message disappear, or to false to enforce inotify poll", file) 383 pollFile = true 384 } 385 } 386 387 filink, err := os.Lstat(file) 388 389 if err != nil { 390 f.logger.Errorf("Could not lstat() new file %s, ignoring it : %s", file, err) 391 continue 392 } 393 394 if filink.Mode()&os.ModeSymlink == os.ModeSymlink && !pollFile { 395 f.logger.Warnf("File %s is a symlink, but inotify polling is enabled. Crowdsec will not be able to detect rotation. Consider setting poll_without_inotify to true in your configuration", file) 396 } 397 398 tail, err := tail.TailFile(file, tail.Config{ReOpen: true, Follow: true, Poll: pollFile, Location: &tail.SeekInfo{Offset: 0, Whence: io.SeekEnd}, Logger: log.NewEntry(log.StandardLogger())}) 399 if err != nil { 400 f.logger.Errorf("Could not start tailing file %s : %s", file, err) 401 continue 402 } 403 404 f.tailMapMutex.Lock() 405 f.tails[file] = true 406 f.tailMapMutex.Unlock() 407 t.Go(func() error { 408 defer trace.CatchPanic("crowdsec/acquis/file/live/fsnotify") 409 return f.tailFile(out, t, tail) 410 }) 411 } 412 413 return nil 414 } 415 416 func (f *FileSource) Dump() interface{} { 417 return f 418 } 419 420 func (f *FileSource) monitorNewFiles(out chan types.Event, t *tomb.Tomb) error { 421 logger := f.logger.WithField("goroutine", "inotify") 422 423 for { 424 select { 425 case event, ok := <-f.watcher.Events: 426 if !ok { 427 return nil 428 } 429 430 if event.Op&fsnotify.Create == fsnotify.Create { 431 fi, err := os.Stat(event.Name) 432 if err != nil { 433 logger.Errorf("Could not stat() new file %s, ignoring it : %s", event.Name, err) 434 continue 435 } 436 437 if fi.IsDir() { 438 continue 439 } 440 441 logger.Debugf("Detected new file %s", event.Name) 442 443 matched := false 444 445 for _, pattern := range f.config.Filenames { 446 logger.Debugf("Matching %s with %s", pattern, event.Name) 447 448 matched, err = filepath.Match(pattern, event.Name) 449 if err != nil { 450 logger.Errorf("Could not match pattern : %s", err) 451 continue 452 } 453 454 if matched { 455 logger.Debugf("Matched %s with %s", pattern, event.Name) 456 break 457 } 458 } 459 460 if !matched { 461 continue 462 } 463 464 // before opening the file, check if we need to specifically avoid it. (XXX) 465 skip := false 466 467 for _, pattern := range f.exclude_regexps { 468 if pattern.MatchString(event.Name) { 469 f.logger.Infof("file %s matches exclusion pattern %s, skipping", event.Name, pattern.String()) 470 471 skip = true 472 473 break 474 } 475 } 476 477 if skip { 478 continue 479 } 480 481 f.tailMapMutex.RLock() 482 if f.tails[event.Name] { 483 f.tailMapMutex.RUnlock() 484 // we already have a tail on it, do not start a new one 485 logger.Debugf("Already tailing file %s, not creating a new tail", event.Name) 486 487 break 488 } 489 f.tailMapMutex.RUnlock() 490 // cf. https://github.com/crowdsecurity/crowdsec/issues/1168 491 // do not rely on stat, reclose file immediately as it's opened by Tail 492 fd, err := os.Open(event.Name) 493 if err != nil { 494 f.logger.Errorf("unable to read %s : %s", event.Name, err) 495 continue 496 } 497 if err := fd.Close(); err != nil { 498 f.logger.Errorf("unable to close %s : %s", event.Name, err) 499 continue 500 } 501 502 pollFile := false 503 if f.config.PollWithoutInotify != nil { 504 pollFile = *f.config.PollWithoutInotify 505 } else { 506 networkFS, fsType, err := types.IsNetworkFS(event.Name) 507 if err != nil { 508 f.logger.Warningf("Could not get fs type for %s : %s", event.Name, err) 509 } 510 f.logger.Debugf("fs for %s is network: %t (%s)", event.Name, networkFS, fsType) 511 if networkFS { 512 pollFile = true 513 } 514 } 515 516 filink, err := os.Lstat(event.Name) 517 518 if err != nil { 519 logger.Errorf("Could not lstat() new file %s, ignoring it : %s", event.Name, err) 520 continue 521 } 522 523 if filink.Mode()&os.ModeSymlink == os.ModeSymlink && !pollFile { 524 logger.Warnf("File %s is a symlink, but inotify polling is enabled. Crowdsec will not be able to detect rotation. Consider setting poll_without_inotify to true in your configuration", event.Name) 525 } 526 527 //Slightly different parameters for Location, as we want to read the first lines of the newly created file 528 tail, err := tail.TailFile(event.Name, tail.Config{ReOpen: true, Follow: true, Poll: pollFile, Location: &tail.SeekInfo{Offset: 0, Whence: io.SeekStart}}) 529 if err != nil { 530 logger.Errorf("Could not start tailing file %s : %s", event.Name, err) 531 break 532 } 533 534 f.tailMapMutex.Lock() 535 f.tails[event.Name] = true 536 f.tailMapMutex.Unlock() 537 t.Go(func() error { 538 defer trace.CatchPanic("crowdsec/acquis/tailfile") 539 return f.tailFile(out, t, tail) 540 }) 541 } 542 case err, ok := <-f.watcher.Errors: 543 if !ok { 544 return nil 545 } 546 547 logger.Errorf("Error while monitoring folder: %s", err) 548 case <-t.Dying(): 549 err := f.watcher.Close() 550 if err != nil { 551 return fmt.Errorf("could not remove all inotify watches: %w", err) 552 } 553 554 return nil 555 } 556 } 557 } 558 559 func (f *FileSource) tailFile(out chan types.Event, t *tomb.Tomb, tail *tail.Tail) error { 560 logger := f.logger.WithField("tail", tail.Filename) 561 logger.Debugf("-> Starting tail of %s", tail.Filename) 562 563 for { 564 select { 565 case <-t.Dying(): 566 logger.Infof("File datasource %s stopping", tail.Filename) 567 568 if err := tail.Stop(); err != nil { 569 f.logger.Errorf("error in stop : %s", err) 570 return err 571 } 572 573 return nil 574 case <-tail.Dying(): // our tailer is dying 575 err := tail.Err() 576 errMsg := fmt.Sprintf("file reader of %s died", tail.Filename) 577 if err != nil { 578 errMsg = fmt.Sprintf(errMsg+" : %s", err) 579 } 580 581 logger.Warningf(errMsg) 582 583 return nil 584 case line := <-tail.Lines: 585 if line == nil { 586 logger.Warningf("tail for %s is empty", tail.Filename) 587 continue 588 } 589 590 if line.Err != nil { 591 logger.Warningf("fetch error : %v", line.Err) 592 return line.Err 593 } 594 595 if line.Text == "" { // skip empty lines 596 continue 597 } 598 599 if f.metricsLevel != configuration.METRICS_NONE { 600 linesRead.With(prometheus.Labels{"source": tail.Filename}).Inc() 601 } 602 603 src := tail.Filename 604 if f.metricsLevel == configuration.METRICS_AGGREGATE { 605 src = filepath.Base(tail.Filename) 606 } 607 608 l := types.Line{ 609 Raw: trimLine(line.Text), 610 Labels: f.config.Labels, 611 Time: line.Time, 612 Src: src, 613 Process: true, 614 Module: f.GetName(), 615 } 616 // we're tailing, it must be real time logs 617 logger.Debugf("pushing %+v", l) 618 619 expectMode := types.LIVE 620 if f.config.UseTimeMachine { 621 expectMode = types.TIMEMACHINE 622 } 623 out <- types.Event{Line: l, Process: true, Type: types.LOG, ExpectMode: expectMode} 624 } 625 } 626 } 627 628 func (f *FileSource) readFile(filename string, out chan types.Event, t *tomb.Tomb) error { 629 var scanner *bufio.Scanner 630 631 logger := f.logger.WithField("oneshot", filename) 632 fd, err := os.Open(filename) 633 634 if err != nil { 635 return fmt.Errorf("failed opening %s: %w", filename, err) 636 } 637 638 defer fd.Close() 639 640 if strings.HasSuffix(filename, ".gz") { 641 gz, err := gzip.NewReader(fd) 642 if err != nil { 643 logger.Errorf("Failed to read gz file: %s", err) 644 return fmt.Errorf("failed to read gz %s: %w", filename, err) 645 } 646 647 defer gz.Close() 648 scanner = bufio.NewScanner(gz) 649 } else { 650 scanner = bufio.NewScanner(fd) 651 } 652 653 scanner.Split(bufio.ScanLines) 654 655 if f.config.MaxBufferSize > 0 { 656 buf := make([]byte, 0, 64*1024) 657 scanner.Buffer(buf, f.config.MaxBufferSize) 658 } 659 660 for scanner.Scan() { 661 select { 662 case <-t.Dying(): 663 logger.Infof("File datasource %s stopping", filename) 664 return nil 665 default: 666 if scanner.Text() == "" { 667 continue 668 } 669 670 l := types.Line{ 671 Raw: scanner.Text(), 672 Time: time.Now().UTC(), 673 Src: filename, 674 Labels: f.config.Labels, 675 Process: true, 676 Module: f.GetName(), 677 } 678 logger.Debugf("line %s", l.Raw) 679 linesRead.With(prometheus.Labels{"source": filename}).Inc() 680 681 // we're reading logs at once, it must be time-machine buckets 682 out <- types.Event{Line: l, Process: true, Type: types.LOG, ExpectMode: types.TIMEMACHINE} 683 } 684 } 685 686 if err := scanner.Err(); err != nil { 687 logger.Errorf("Error while reading file: %s", err) 688 t.Kill(err) 689 690 return err 691 } 692 693 t.Kill(nil) 694 695 return nil 696 }