github.com/honeycombio/honeytail@v1.9.0/tail/tail.go (about) 1 // Package tail implements tailing a log file. 2 // 3 // tail provides a channel on which log lines will be sent as string messages. 4 // one line in the log file is one message on the channel 5 package tail 6 7 import ( 8 "bufio" 9 "context" 10 "crypto/sha1" 11 "encoding/json" 12 "errors" 13 "fmt" 14 "math/rand" 15 "os" 16 "path/filepath" 17 "strings" 18 "time" 19 20 "github.com/sirupsen/logrus" 21 "github.com/tenebris-tech/tail" 22 "golang.org/x/sys/unix" 23 ) 24 25 type RotateStyle int 26 27 const ( 28 // foo.log gets rotated to foo.log.1, new entries go to foo.log 29 RotateStyleSyslog RotateStyle = iota 30 // foo.log.OLDSTAMP gets closed, new entries go to foo.log.NEWSTAMP 31 // NOT YET IMPLEMENTED 32 RotateStyleTimestamp 33 ) 34 35 type TailOptions struct { 36 ReadFrom string `long:"read_from" description:"Location in the file from which to start reading. Values: beginning, end, last. Last picks up where it left off, if the file has not been rotated, otherwise beginning. When --backfill is set, it will override this option to beginning" yaml:"read_from,omitempty"` 37 Stop bool `long:"stop" description:"Stop reading the file after reaching the end rather than continuing to tail. When --backfill is set, it will override this option=true" yaml:"stop,omitempty"` 38 Poll bool `long:"poll" description:"use poll instead of inotify to tail files" yaml:"poll,omitempty"` 39 StateFile string `long:"statefile" description:"File in which to store the last read position. Defaults to a file in /tmp named $logfile.leash.state. If tailing multiple files, default is forced." yaml:"statefile,omitempty"` 40 HashStateFileDirPaths bool `long:"hash_statefile_paths" description:"Generates a hash of the directory path for each file that is used to uniquely identify each statefile. Prevents re-using the same statefile for tailed files that have the same name." yaml:"hash_statefile_paths,omitempty"` 41 } 42 43 // Statefile mechanics when ReadFrom is 'last' 44 // missing statefile => ReadFrom = end 45 // empty statefile => ReadFrom = end 46 // permission denied => WARN and ReadFrom = end 47 // invalid location (aka logfile's been rotated) => ReadFrom = beginning 48 49 type Config struct { 50 // Path to the log file to tail 51 Paths []string 52 53 // Paths to exclude from tailing 54 FilterPaths []string 55 56 // Type of log rotation we expect on this file 57 Type RotateStyle 58 // Tail specific options 59 Options TailOptions 60 } 61 62 // State is what's stored in a statefile 63 type State struct { 64 INode uint64 // the inode 65 Offset int64 66 } 67 68 // GetSampledEntries wraps GetEntries and returns a list of channels that 69 // provide sampled entries 70 func GetSampledEntries(ctx context.Context, conf Config, sampleRate uint) ([]chan string, error) { 71 unsampledLinesChans, err := GetEntries(ctx, conf) 72 if err != nil { 73 return nil, err 74 } 75 if sampleRate == 1 { 76 return unsampledLinesChans, nil 77 } 78 79 sampledLinesChans := make([]chan string, 0, len(unsampledLinesChans)) 80 81 for _, lines := range unsampledLinesChans { 82 sampledLines := make(chan string) 83 go func(pLines chan string) { 84 defer close(sampledLines) 85 for line := range pLines { 86 if shouldDrop(sampleRate) { 87 logrus.WithFields(logrus.Fields{ 88 "line": line, 89 "samplerate": sampleRate, 90 }).Debug("Sampler says skip this line") 91 } else { 92 sampledLines <- line 93 } 94 } 95 }(lines) 96 sampledLinesChans = append(sampledLinesChans, sampledLines) 97 } 98 return sampledLinesChans, nil 99 } 100 101 // shouldDrop returns true if the line should be dropped 102 // false if it should be kept 103 // if sampleRate is 5, 104 // on average one out of every 5 calls should return false 105 func shouldDrop(rate uint) bool { 106 return rand.Intn(int(rate)) != 0 107 } 108 109 // GetEntries sets up a list of channels that get one line at a time from each 110 // file down each channel. 111 func GetEntries(ctx context.Context, conf Config) ([]chan string, error) { 112 if conf.Type != RotateStyleSyslog { 113 return nil, errors.New("Only Syslog style rotation currently supported") 114 } 115 // expand any globs in the list of files so our list all represents real files 116 var filenames []string 117 for _, filePath := range conf.Paths { 118 if filePath == "-" { 119 filenames = append(filenames, filePath) 120 } else { 121 files, err := filepath.Glob(filePath) 122 if err != nil { 123 return nil, err 124 } 125 files = removeStateFiles(files, conf) 126 files = removeFilteredPaths(files, conf.FilterPaths) 127 filenames = append(filenames, files...) 128 } 129 } 130 if len(filenames) == 0 { 131 return nil, errors.New("After removing missing files and state files from the list, there are no files left to tail") 132 } 133 134 // make our lines channel list; we'll get one channel for each file 135 linesChans := make([]chan string, 0, len(filenames)) 136 numFiles := len(filenames) 137 for _, file := range filenames { 138 var lines chan string 139 if file == "-" { 140 lines = tailStdIn(ctx) 141 } else { 142 stateFile := getStateFile(conf, file, numFiles) 143 tailer, err := getTailer(conf, file, stateFile) 144 if err != nil { 145 return nil, err 146 } 147 lines = tailSingleFile(ctx, tailer, file, stateFile) 148 } 149 linesChans = append(linesChans, lines) 150 } 151 152 return linesChans, nil 153 } 154 155 // removeStateFiles goes through the list of files and removes any that appear 156 // to be statefiles to avoid .leash.state.leash.state.leash.state from appearing 157 // when you use an overly permissive glob 158 func removeStateFiles(files []string, conf Config) []string { 159 newFiles := []string{} 160 for _, file := range files { 161 if file == conf.Options.StateFile { 162 logrus.WithFields(logrus.Fields{ 163 "file": file, 164 }).Debug("skipping tailing file because it is named the same as the statefile flag") 165 continue 166 } 167 if strings.HasSuffix(file, ".leash.state") { 168 logrus.WithFields(logrus.Fields{ 169 "file": file, 170 }).Debug("skipping tailing file because the filename ends with .leash.state") 171 continue 172 } 173 // great! it's not a state file. let's use it. 174 newFiles = append(newFiles, file) 175 } 176 return newFiles 177 } 178 179 func removeFilteredPaths(files []string, filteredPaths []string) []string { 180 newFiles := []string{} 181 182 for _, file := range files { 183 shouldRemove := false 184 for _, filteredPath := range filteredPaths { 185 if matched, err := filepath.Match(filteredPath, file); err != nil { 186 logrus.WithError(err).WithFields(logrus.Fields{ 187 "file": file, 188 "filter": filteredPath, 189 }).Warn("unable to compare file with filter, will exclude file") 190 shouldRemove = true 191 continue 192 } else if matched { 193 logrus.WithFields(logrus.Fields{ 194 "file": file, 195 "filter": filteredPath, 196 }).Info("removing file as it matches filter") 197 shouldRemove = true 198 } 199 } 200 if !shouldRemove { 201 newFiles = append(newFiles, file) 202 } 203 } 204 205 return newFiles 206 } 207 208 func tailSingleFile(ctx context.Context, tailer *tail.Tail, file string, stateFile string) chan string { 209 lines := make(chan string) 210 // TODO report some metric to indicate whether we're keeping up with the 211 // front of the file, of if it's being written faster than we can send 212 // events 213 214 stateFh, err := os.OpenFile(stateFile, os.O_RDWR|os.O_CREATE, 0644) 215 if err != nil { 216 logrus.WithFields(logrus.Fields{ 217 "logfile": file, 218 "statefile": stateFile, 219 }).Warn("Failed to open statefile for writing. File location will not be saved.") 220 } 221 222 ticker := time.NewTicker(time.Second) 223 state := State{} 224 go func() { 225 for range ticker.C { 226 updateStateFile(&state, tailer, file, stateFh) 227 } 228 }() 229 230 go func() { 231 ReadLines: 232 for { 233 select { 234 case line, ok := <-tailer.Lines: 235 if !ok { 236 // tailer.Lines is closed 237 break ReadLines 238 } 239 if line.Err != nil { 240 // skip errored lines 241 continue 242 } 243 lines <- line.Text 244 case <-ctx.Done(): 245 // will only trigger when the context is cancelled 246 break ReadLines 247 } 248 } 249 close(lines) 250 ticker.Stop() 251 updateStateFile(&state, tailer, file, stateFh) 252 stateFh.Close() 253 }() 254 return lines 255 } 256 257 // tailStdIn is a special case to tail STDIN without any of the 258 // fancy stuff that the tail module provides 259 func tailStdIn(ctx context.Context) chan string { 260 lines := make(chan string) 261 input := bufio.NewReader(os.Stdin) 262 go func() { 263 defer close(lines) 264 for { 265 // check for signal triggered exit 266 select { 267 case <-ctx.Done(): 268 return 269 default: 270 } 271 line, partialLine, err := input.ReadLine() 272 if err != nil { 273 logrus.Debug("stdin is closed") 274 // bail when STDIN closes 275 return 276 } 277 var parts []string 278 parts = append(parts, string(line)) 279 for partialLine { 280 line, partialLine, _ = input.ReadLine() 281 parts = append(parts, string(line)) 282 } 283 lines <- strings.Join(parts, "") 284 } 285 }() 286 return lines 287 } 288 289 // getStartLocation reads the state file and creates an appropriate start 290 // location. See details at the top of this file on how the loc is chosen. 291 func getStartLocation(stateFile string, logfile string) *tail.SeekInfo { 292 beginning := &tail.SeekInfo{} 293 end := &tail.SeekInfo{0, 2} 294 fh, err := os.Open(stateFile) 295 if err != nil { 296 logrus.WithFields(logrus.Fields{ 297 "starting at": "end", "error": err, 298 }).Debug("getStartLocation failed to open the statefile") 299 return end 300 } 301 defer fh.Close() 302 // read the contents of the state file (JSON) 303 content := make([]byte, 1024) 304 bytesRead, err := fh.Read(content) 305 if err != nil { 306 logrus.WithFields(logrus.Fields{ 307 "starting at": "end", "error": err, 308 }).Debug("getStartLocation failed to read the statefile contents") 309 return end 310 } 311 content = content[:bytesRead] 312 // decode the contents of the statefile 313 state := State{} 314 if err := json.Unmarshal(content, &state); err != nil { 315 logrus.WithFields(logrus.Fields{ 316 "starting at": "end", "error": err, 317 }).Debug("getStartLocation failed to json decode the statefile") 318 return end 319 } 320 // get the details of the existing log file 321 logStat := unix.Stat_t{} 322 if err := unix.Stat(logfile, &logStat); err != nil { 323 logrus.WithFields(logrus.Fields{ 324 "starting at": "end", "error": err, 325 }).Debug("getStartLocation failed to get unix.stat() on the logfile") 326 return end 327 } 328 // compare inode numbers of the last-seen and existing log files 329 if state.INode != uint64(logStat.Ino) { 330 logrus.WithFields(logrus.Fields{ 331 "starting at": "beginning", "error": err, 332 }).Debug("getStartLocation found a different inode number for the logfile") 333 // file's been rotated 334 return beginning 335 } 336 logrus.WithFields(logrus.Fields{ 337 "starting at": state.Offset, 338 }).Debug("getStartLocation seeking to offset in logfile") 339 // we're good; start reading from the remembered state 340 return &tail.SeekInfo{ 341 Offset: state.Offset, 342 Whence: 0, 343 } 344 } 345 346 // getTailer configures the *tail.Tail correctly to begin actually tailing the 347 // specified file. 348 func getTailer(conf Config, file string, stateFile string) (*tail.Tail, error) { 349 // tail a real file 350 var loc *tail.SeekInfo // 0 value means start at beginning 351 var reOpen, follow bool = true, true 352 var readFrom = conf.Options.ReadFrom 353 if readFrom == "" { 354 readFrom = "last" 355 } 356 switch readFrom { 357 case "start", "beginning": 358 // 0 value for tail.SeekInfo means start at beginning 359 case "end": 360 loc = &tail.SeekInfo{ 361 Offset: 0, 362 Whence: 2, 363 } 364 case "last": 365 loc = getStartLocation(stateFile, file) 366 default: 367 errMsg := fmt.Sprintf("unknown option to --read_from: %s", readFrom) 368 return nil, errors.New(errMsg) 369 } 370 if conf.Options.Stop { 371 reOpen = false 372 follow = false 373 } 374 tailConf := tail.Config{ 375 Location: loc, 376 ReOpen: reOpen, // keep reading on rotation, aka tail -F 377 MustExist: true, // fail if log file doesn't exist 378 Follow: follow, // don't stop at EOF, aka tail -f 379 Logger: tail.DiscardingLogger, 380 Poll: conf.Options.Poll, // use poll instead of inotify 381 } 382 logrus.WithFields(logrus.Fields{ 383 "tailConf": tailConf, 384 "conf": conf, 385 "statefile": stateFile, 386 "location": loc, 387 }).Debug("about to call tail.TailFile") 388 return tail.TailFile(file, tailConf) 389 } 390 391 // getStateFile returns the filename to use to track honeytail state. 392 // 393 // If a --tail.statefile parameter is provided, we try to respect it. 394 // It might describe an existing file, an existing directory, or a new path. 395 // 396 // If tailing a single logfile, we will use the specified --tail.statefile: 397 // - if it points to an existing file, that statefile will be used directly 398 // - if it points to a new path, that path will be written to directly 399 // - if it points to an existing directory, the statefile will be placed inside 400 // the directory (and the statefile's name will be derived from the logfile). 401 // 402 // If honeytail is asked to tail multiple files, we will only respect the 403 // third case, where --tail.statefile describes an existing directory. 404 // 405 // The default behavior (or if --tail.statefile isn't respected) will be to 406 // write to the system's $TMPDIR/ and write to a statefile (where the name will 407 // be derived from the logfile). 408 func getStateFile(conf Config, filename string, numFiles int) string { 409 confStateFile := os.TempDir() 410 if conf.Options.StateFile != "" { 411 info, err := os.Stat(conf.Options.StateFile) 412 if numFiles == 1 && (os.IsNotExist(err) || (err == nil && !info.IsDir())) { 413 return conf.Options.StateFile 414 } else if err == nil && info.IsDir() { 415 // If the --tail.statefile is a directory, write statefile inside the specified directory 416 confStateFile = conf.Options.StateFile 417 } else { 418 logrus.Debugf("Couldn't write to --tail.statefile=%s, writing honeytail state for %s to $TMPDIR (%s) instead.", 419 conf.Options.StateFile, filename, confStateFile) 420 } 421 } 422 423 stateFileName := strings.TrimSuffix(filepath.Base(filename), ".log") + ".leash.state" 424 if conf.Options.HashStateFileDirPaths { 425 // generate hash based on filepath, in format 'filename.leash.state-{hash}' 426 stateFileName += fmt.Sprintf("-%x", sha1.Sum([]byte(filename))) 427 } 428 return filepath.Join(confStateFile, stateFileName) 429 } 430 431 // updateStateFile updates the state file once per second with the current 432 // values for the logfile's inode number and offset 433 func updateStateFile(state *State, t *tail.Tail, file string, stateFh *os.File) { 434 logStat := unix.Stat_t{} 435 unix.Stat(file, &logStat) 436 currentPos, err := t.Tell() 437 if err != nil { 438 return 439 } 440 state.INode = uint64(logStat.Ino) 441 state.Offset = currentPos 442 out, err := json.Marshal(state) 443 if err != nil { 444 return 445 } 446 stateFh.Truncate(0) 447 out = append(out, '\n') 448 stateFh.WriteAt(out, 0) 449 stateFh.Sync() 450 }