bitbucket.org/Aishee/synsec@v0.0.0-20210414005726-236fc01a153d/pkg/acquisition/file_reader.go (about) 1 package acquisition 2 3 import ( 4 "bufio" 5 "compress/gzip" 6 "fmt" 7 "os" 8 "path/filepath" 9 "strings" 10 "time" 11 12 "github.com/pkg/errors" 13 "golang.org/x/sys/unix" 14 15 leaky "bitbucket.org/Aishee/synsec/pkg/leakybucket" 16 17 "bitbucket.org/Aishee/synsec/pkg/types" 18 "github.com/nxadm/tail" 19 log "github.com/sirupsen/logrus" 20 21 "github.com/prometheus/client_golang/prometheus" 22 tomb "gopkg.in/tomb.v2" 23 ) 24 25 type FileSource struct { 26 Config DataSourceCfg 27 tails []*tail.Tail 28 Files []string 29 } 30 31 func (f *FileSource) Configure(Config DataSourceCfg) error { 32 f.Config = Config 33 if len(Config.Filename) == 0 && len(Config.Filenames) == 0 { 34 return fmt.Errorf("no filename or filenames") 35 } 36 37 //let's deal with the array no matter what 38 if len(Config.Filename) != 0 { 39 Config.Filenames = append(Config.Filenames, Config.Filename) 40 } 41 42 for _, fexpr := range Config.Filenames { 43 files, err := filepath.Glob(fexpr) 44 if err != nil { 45 return errors.Wrapf(err, "while globbing %s", fexpr) 46 } 47 if len(files) == 0 { 48 log.Warningf("[file datasource] no results for %s", fexpr) 49 continue 50 } 51 52 for _, file := range files { 53 /*check that we can read said file*/ 54 if err := unix.Access(file, unix.R_OK); err != nil { 55 return fmt.Errorf("unable to open %s : %s", file, err) 56 } 57 log.Infof("[file datasource] opening file '%s'", file) 58 59 if f.Config.Mode == TAIL_MODE { 60 tail, err := tail.TailFile(file, tail.Config{ReOpen: true, Follow: true, Poll: true, Location: &tail.SeekInfo{Offset: 0, Whence: 2}}) 61 if err != nil { 62 log.Errorf("[file datasource] skipping %s : %v", file, err) 63 continue 64 } 65 f.Files = append(f.Files, file) 66 f.tails = append(f.tails, tail) 67 } else if f.Config.Mode == CAT_MODE { 68 //simply check that the file exists, it will be read differently 69 if _, err := os.Stat(file); err != nil { 70 return fmt.Errorf("can't open file %s : %s", file, err) 71 } 72 f.Files = append(f.Files, file) 73 } else { 74 return fmt.Errorf("unknown mode %s for file acquisition", f.Config.Mode) 75 } 76 77 } 78 } 79 if len(f.Files) == 0 { 80 return fmt.Errorf("no files to read for %+v", Config.Filenames) 81 } 82 83 return nil 84 } 85 86 func (f *FileSource) Mode() string { 87 return f.Config.Mode 88 } 89 90 func (f *FileSource) StartReading(out chan types.Event, t *tomb.Tomb) error { 91 92 if f.Config.Mode == CAT_MODE { 93 return f.StartCat(out, t) 94 } else if f.Config.Mode == TAIL_MODE { 95 return f.StartTail(out, t) 96 } else { 97 return fmt.Errorf("unknown mode '%s' for file acquisition", f.Config.Mode) 98 } 99 } 100 101 /*A tail-mode file reader (tail) */ 102 func (f *FileSource) StartTail(output chan types.Event, AcquisTomb *tomb.Tomb) error { 103 log.Debugf("starting file tail with %d items", len(f.tails)) 104 for i := 0; i < len(f.tails); i++ { 105 idx := i 106 log.Debugf("starting %d", idx) 107 AcquisTomb.Go(func() error { 108 defer types.CatchPanic("synsec/acquis/tailfile") 109 return f.TailOneFile(output, AcquisTomb, idx) 110 }) 111 } 112 return nil 113 } 114 115 /*A one shot file reader (cat) */ 116 func (f *FileSource) StartCat(output chan types.Event, AcquisTomb *tomb.Tomb) error { 117 for i := 0; i < len(f.Files); i++ { 118 idx := i 119 log.Debugf("starting %d", idx) 120 AcquisTomb.Go(func() error { 121 defer types.CatchPanic("synsec/acquis/catfile") 122 return f.CatOneFile(output, AcquisTomb, idx) 123 }) 124 } 125 return nil 126 } 127 128 /*A tail-mode file reader (tail) */ 129 func (f *FileSource) TailOneFile(output chan types.Event, AcquisTomb *tomb.Tomb, idx int) error { 130 131 file := f.Files[idx] 132 tail := f.tails[idx] 133 134 clog := log.WithFields(log.Fields{ 135 "acquisition file": f.Files[idx], 136 }) 137 clog.Debugf("starting") 138 139 timeout := time.Tick(1 * time.Second) 140 141 for { 142 l := types.Line{} 143 select { 144 case <-AcquisTomb.Dying(): //we are being killed by main 145 clog.Infof("file datasource %s stopping", file) 146 if err := tail.Stop(); err != nil { 147 clog.Errorf("error in stop : %s", err) 148 } 149 return nil 150 case <-tail.Tomb.Dying(): //our tailer is dying 151 clog.Warningf("File reader of %s died", file) 152 AcquisTomb.Kill(fmt.Errorf("dead reader for %s", file)) 153 return fmt.Errorf("reader for %s is dead", file) 154 case line := <-tail.Lines: 155 if line == nil { 156 clog.Debugf("Nil line") 157 return fmt.Errorf("tail for %s is empty", file) 158 } 159 if line.Err != nil { 160 log.Warningf("fetch error : %v", line.Err) 161 return line.Err 162 } 163 if line.Text == "" { //skip empty lines 164 continue 165 } 166 ReaderHits.With(prometheus.Labels{"source": file}).Inc() 167 168 l.Raw = line.Text 169 l.Labels = f.Config.Labels 170 l.Time = line.Time 171 l.Src = file 172 l.Process = true 173 //we're tailing, it must be real time logs 174 log.Debugf("pushing %+v", l) 175 output <- types.Event{Line: l, Process: true, Type: types.LOG, ExpectMode: leaky.LIVE} 176 case <-timeout: 177 //time out, shall we do stuff ? 178 clog.Debugf("timeout") 179 } 180 } 181 } 182 183 /*A one shot file reader (cat) */ 184 func (f *FileSource) CatOneFile(output chan types.Event, AcquisTomb *tomb.Tomb, idx int) error { 185 var scanner *bufio.Scanner 186 187 log.Infof("reading %s at once", f.Files[idx]) 188 file := f.Files[idx] 189 190 clog := log.WithFields(log.Fields{ 191 "file": file, 192 }) 193 fd, err := os.Open(file) 194 defer fd.Close() 195 if err != nil { 196 clog.Errorf("Failed opening file: %s", err) 197 return errors.Wrapf(err, "failed opening %s", f.Files[idx]) 198 } 199 200 if strings.HasSuffix(file, ".gz") { 201 gz, err := gzip.NewReader(fd) 202 if err != nil { 203 clog.Errorf("Failed to read gz file: %s", err) 204 return errors.Wrapf(err, "failed to read gz %s", f.Files[idx]) 205 } 206 defer gz.Close() 207 scanner = bufio.NewScanner(gz) 208 209 } else { 210 scanner = bufio.NewScanner(fd) 211 } 212 scanner.Split(bufio.ScanLines) 213 for scanner.Scan() { 214 log.Tracef("line %s", scanner.Text()) 215 l := types.Line{} 216 l.Raw = scanner.Text() 217 l.Time = time.Now() 218 l.Src = file 219 l.Labels = f.Config.Labels 220 l.Process = true 221 ReaderHits.With(prometheus.Labels{"source": file}).Inc() 222 //we're reading logs at once, it must be time-machine buckets 223 output <- types.Event{Line: l, Process: true, Type: types.LOG, ExpectMode: leaky.TIMEMACHINE} 224 } 225 AcquisTomb.Kill(nil) 226 return nil 227 }