bitbucket.org/Aishee/synsec@v0.0.0-20210414005726-236fc01a153d/pkg/acquisition/file_reader.go (about)

     1  package acquisition
     2  
     3  import (
     4  	"bufio"
     5  	"compress/gzip"
     6  	"fmt"
     7  	"os"
     8  	"path/filepath"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/pkg/errors"
    13  	"golang.org/x/sys/unix"
    14  
    15  	leaky "bitbucket.org/Aishee/synsec/pkg/leakybucket"
    16  
    17  	"bitbucket.org/Aishee/synsec/pkg/types"
    18  	"github.com/nxadm/tail"
    19  	log "github.com/sirupsen/logrus"
    20  
    21  	"github.com/prometheus/client_golang/prometheus"
    22  	tomb "gopkg.in/tomb.v2"
    23  )
    24  
    25  type FileSource struct {
    26  	Config DataSourceCfg
    27  	tails  []*tail.Tail
    28  	Files  []string
    29  }
    30  
    31  func (f *FileSource) Configure(Config DataSourceCfg) error {
    32  	f.Config = Config
    33  	if len(Config.Filename) == 0 && len(Config.Filenames) == 0 {
    34  		return fmt.Errorf("no filename or filenames")
    35  	}
    36  
    37  	//let's deal with the array no matter what
    38  	if len(Config.Filename) != 0 {
    39  		Config.Filenames = append(Config.Filenames, Config.Filename)
    40  	}
    41  
    42  	for _, fexpr := range Config.Filenames {
    43  		files, err := filepath.Glob(fexpr)
    44  		if err != nil {
    45  			return errors.Wrapf(err, "while globbing %s", fexpr)
    46  		}
    47  		if len(files) == 0 {
    48  			log.Warningf("[file datasource] no results for %s", fexpr)
    49  			continue
    50  		}
    51  
    52  		for _, file := range files {
    53  			/*check that we can read said file*/
    54  			if err := unix.Access(file, unix.R_OK); err != nil {
    55  				return fmt.Errorf("unable to open %s : %s", file, err)
    56  			}
    57  			log.Infof("[file datasource] opening file '%s'", file)
    58  
    59  			if f.Config.Mode == TAIL_MODE {
    60  				tail, err := tail.TailFile(file, tail.Config{ReOpen: true, Follow: true, Poll: true, Location: &tail.SeekInfo{Offset: 0, Whence: 2}})
    61  				if err != nil {
    62  					log.Errorf("[file datasource] skipping %s : %v", file, err)
    63  					continue
    64  				}
    65  				f.Files = append(f.Files, file)
    66  				f.tails = append(f.tails, tail)
    67  			} else if f.Config.Mode == CAT_MODE {
    68  				//simply check that the file exists, it will be read differently
    69  				if _, err := os.Stat(file); err != nil {
    70  					return fmt.Errorf("can't open file %s : %s", file, err)
    71  				}
    72  				f.Files = append(f.Files, file)
    73  			} else {
    74  				return fmt.Errorf("unknown mode %s for file acquisition", f.Config.Mode)
    75  			}
    76  
    77  		}
    78  	}
    79  	if len(f.Files) == 0 {
    80  		return fmt.Errorf("no files to read for %+v", Config.Filenames)
    81  	}
    82  
    83  	return nil
    84  }
    85  
    86  func (f *FileSource) Mode() string {
    87  	return f.Config.Mode
    88  }
    89  
    90  func (f *FileSource) StartReading(out chan types.Event, t *tomb.Tomb) error {
    91  
    92  	if f.Config.Mode == CAT_MODE {
    93  		return f.StartCat(out, t)
    94  	} else if f.Config.Mode == TAIL_MODE {
    95  		return f.StartTail(out, t)
    96  	} else {
    97  		return fmt.Errorf("unknown mode '%s' for file acquisition", f.Config.Mode)
    98  	}
    99  }
   100  
   101  /*A tail-mode file reader (tail) */
   102  func (f *FileSource) StartTail(output chan types.Event, AcquisTomb *tomb.Tomb) error {
   103  	log.Debugf("starting file tail with %d items", len(f.tails))
   104  	for i := 0; i < len(f.tails); i++ {
   105  		idx := i
   106  		log.Debugf("starting %d", idx)
   107  		AcquisTomb.Go(func() error {
   108  			defer types.CatchPanic("synsec/acquis/tailfile")
   109  			return f.TailOneFile(output, AcquisTomb, idx)
   110  		})
   111  	}
   112  	return nil
   113  }
   114  
   115  /*A one shot file reader (cat) */
   116  func (f *FileSource) StartCat(output chan types.Event, AcquisTomb *tomb.Tomb) error {
   117  	for i := 0; i < len(f.Files); i++ {
   118  		idx := i
   119  		log.Debugf("starting %d", idx)
   120  		AcquisTomb.Go(func() error {
   121  			defer types.CatchPanic("synsec/acquis/catfile")
   122  			return f.CatOneFile(output, AcquisTomb, idx)
   123  		})
   124  	}
   125  	return nil
   126  }
   127  
   128  /*A tail-mode file reader (tail) */
   129  func (f *FileSource) TailOneFile(output chan types.Event, AcquisTomb *tomb.Tomb, idx int) error {
   130  
   131  	file := f.Files[idx]
   132  	tail := f.tails[idx]
   133  
   134  	clog := log.WithFields(log.Fields{
   135  		"acquisition file": f.Files[idx],
   136  	})
   137  	clog.Debugf("starting")
   138  
   139  	timeout := time.Tick(1 * time.Second)
   140  
   141  	for {
   142  		l := types.Line{}
   143  		select {
   144  		case <-AcquisTomb.Dying(): //we are being killed by main
   145  			clog.Infof("file datasource %s stopping", file)
   146  			if err := tail.Stop(); err != nil {
   147  				clog.Errorf("error in stop : %s", err)
   148  			}
   149  			return nil
   150  		case <-tail.Tomb.Dying(): //our tailer is dying
   151  			clog.Warningf("File reader of %s died", file)
   152  			AcquisTomb.Kill(fmt.Errorf("dead reader for %s", file))
   153  			return fmt.Errorf("reader for %s is dead", file)
   154  		case line := <-tail.Lines:
   155  			if line == nil {
   156  				clog.Debugf("Nil line")
   157  				return fmt.Errorf("tail for %s is empty", file)
   158  			}
   159  			if line.Err != nil {
   160  				log.Warningf("fetch error : %v", line.Err)
   161  				return line.Err
   162  			}
   163  			if line.Text == "" { //skip empty lines
   164  				continue
   165  			}
   166  			ReaderHits.With(prometheus.Labels{"source": file}).Inc()
   167  
   168  			l.Raw = line.Text
   169  			l.Labels = f.Config.Labels
   170  			l.Time = line.Time
   171  			l.Src = file
   172  			l.Process = true
   173  			//we're tailing, it must be real time logs
   174  			log.Debugf("pushing %+v", l)
   175  			output <- types.Event{Line: l, Process: true, Type: types.LOG, ExpectMode: leaky.LIVE}
   176  		case <-timeout:
   177  			//time out, shall we do stuff ?
   178  			clog.Debugf("timeout")
   179  		}
   180  	}
   181  }
   182  
   183  /*A one shot file reader (cat) */
   184  func (f *FileSource) CatOneFile(output chan types.Event, AcquisTomb *tomb.Tomb, idx int) error {
   185  	var scanner *bufio.Scanner
   186  
   187  	log.Infof("reading %s at once", f.Files[idx])
   188  	file := f.Files[idx]
   189  
   190  	clog := log.WithFields(log.Fields{
   191  		"file": file,
   192  	})
   193  	fd, err := os.Open(file)
   194  	defer fd.Close()
   195  	if err != nil {
   196  		clog.Errorf("Failed opening file: %s", err)
   197  		return errors.Wrapf(err, "failed opening %s", f.Files[idx])
   198  	}
   199  
   200  	if strings.HasSuffix(file, ".gz") {
   201  		gz, err := gzip.NewReader(fd)
   202  		if err != nil {
   203  			clog.Errorf("Failed to read gz file: %s", err)
   204  			return errors.Wrapf(err, "failed to read gz %s", f.Files[idx])
   205  		}
   206  		defer gz.Close()
   207  		scanner = bufio.NewScanner(gz)
   208  
   209  	} else {
   210  		scanner = bufio.NewScanner(fd)
   211  	}
   212  	scanner.Split(bufio.ScanLines)
   213  	for scanner.Scan() {
   214  		log.Tracef("line %s", scanner.Text())
   215  		l := types.Line{}
   216  		l.Raw = scanner.Text()
   217  		l.Time = time.Now()
   218  		l.Src = file
   219  		l.Labels = f.Config.Labels
   220  		l.Process = true
   221  		ReaderHits.With(prometheus.Labels{"source": file}).Inc()
   222  		//we're reading logs at once, it must be time-machine buckets
   223  		output <- types.Event{Line: l, Process: true, Type: types.LOG, ExpectMode: leaky.TIMEMACHINE}
   224  	}
   225  	AcquisTomb.Kill(nil)
   226  	return nil
   227  }