github.com/honeycombio/honeytail@v1.9.0/tail/tail.go (about)

     1  // Package tail implements tailing a log file.
     2  //
     3  // tail provides a channel on which log lines will be sent as string messages.
     4  // one line in the log file is one message on the channel
     5  package tail
     6  
     7  import (
     8  	"bufio"
     9  	"context"
    10  	"crypto/sha1"
    11  	"encoding/json"
    12  	"errors"
    13  	"fmt"
    14  	"math/rand"
    15  	"os"
    16  	"path/filepath"
    17  	"strings"
    18  	"time"
    19  
    20  	"github.com/sirupsen/logrus"
    21  	"github.com/tenebris-tech/tail"
    22  	"golang.org/x/sys/unix"
    23  )
    24  
    25  type RotateStyle int
    26  
    27  const (
    28  	// foo.log gets rotated to foo.log.1, new entries go to foo.log
    29  	RotateStyleSyslog RotateStyle = iota
    30  	// foo.log.OLDSTAMP gets closed, new entries go to foo.log.NEWSTAMP
    31  	// NOT YET IMPLEMENTED
    32  	RotateStyleTimestamp
    33  )
    34  
    35  type TailOptions struct {
    36  	ReadFrom              string `long:"read_from" description:"Location in the file from which to start reading. Values: beginning, end, last. Last picks up where it left off, if the file has not been rotated, otherwise beginning. When --backfill is set, it will override this option to beginning" yaml:"read_from,omitempty"`
    37  	Stop                  bool   `long:"stop" description:"Stop reading the file after reaching the end rather than continuing to tail. When --backfill is set, it will override this option=true" yaml:"stop,omitempty"`
    38  	Poll                  bool   `long:"poll" description:"use poll instead of inotify to tail files" yaml:"poll,omitempty"`
    39  	StateFile             string `long:"statefile" description:"File in which to store the last read position. Defaults to a file in /tmp named $logfile.leash.state. If tailing multiple files, default is forced." yaml:"statefile,omitempty"`
    40  	HashStateFileDirPaths bool   `long:"hash_statefile_paths" description:"Generates a hash of the directory path for each file that is used to uniquely identify each statefile. Prevents re-using the same statefile for tailed files that have the same name." yaml:"hash_statefile_paths,omitempty"`
    41  }
    42  
    43  // Statefile mechanics when ReadFrom is 'last'
    44  // missing statefile => ReadFrom = end
    45  // empty statefile => ReadFrom = end
    46  // permission denied => WARN and ReadFrom = end
    47  // invalid location (aka logfile's been rotated) => ReadFrom = beginning
    48  
    49  type Config struct {
    50  	// Path to the log file to tail
    51  	Paths []string
    52  
    53  	// Paths to exclude from tailing
    54  	FilterPaths []string
    55  
    56  	// Type of log rotation we expect on this file
    57  	Type RotateStyle
    58  	// Tail specific options
    59  	Options TailOptions
    60  }
    61  
    62  // State is what's stored in a statefile
    63  type State struct {
    64  	INode  uint64 // the inode
    65  	Offset int64
    66  }
    67  
    68  // GetSampledEntries wraps GetEntries and returns a list of channels that
    69  // provide sampled entries
    70  func GetSampledEntries(ctx context.Context, conf Config, sampleRate uint) ([]chan string, error) {
    71  	unsampledLinesChans, err := GetEntries(ctx, conf)
    72  	if err != nil {
    73  		return nil, err
    74  	}
    75  	if sampleRate == 1 {
    76  		return unsampledLinesChans, nil
    77  	}
    78  
    79  	sampledLinesChans := make([]chan string, 0, len(unsampledLinesChans))
    80  
    81  	for _, lines := range unsampledLinesChans {
    82  		sampledLines := make(chan string)
    83  		go func(pLines chan string) {
    84  			defer close(sampledLines)
    85  			for line := range pLines {
    86  				if shouldDrop(sampleRate) {
    87  					logrus.WithFields(logrus.Fields{
    88  						"line":       line,
    89  						"samplerate": sampleRate,
    90  					}).Debug("Sampler says skip this line")
    91  				} else {
    92  					sampledLines <- line
    93  				}
    94  			}
    95  		}(lines)
    96  		sampledLinesChans = append(sampledLinesChans, sampledLines)
    97  	}
    98  	return sampledLinesChans, nil
    99  }
   100  
   101  // shouldDrop returns true if the line should be dropped
   102  // false if it should be kept
   103  // if sampleRate is 5,
   104  // on average one out of every 5 calls should return false
   105  func shouldDrop(rate uint) bool {
   106  	return rand.Intn(int(rate)) != 0
   107  }
   108  
   109  // GetEntries sets up a list of channels that get one line at a time from each
   110  // file down each channel.
   111  func GetEntries(ctx context.Context, conf Config) ([]chan string, error) {
   112  	if conf.Type != RotateStyleSyslog {
   113  		return nil, errors.New("Only Syslog style rotation currently supported")
   114  	}
   115  	// expand any globs in the list of files so our list all represents real files
   116  	var filenames []string
   117  	for _, filePath := range conf.Paths {
   118  		if filePath == "-" {
   119  			filenames = append(filenames, filePath)
   120  		} else {
   121  			files, err := filepath.Glob(filePath)
   122  			if err != nil {
   123  				return nil, err
   124  			}
   125  			files = removeStateFiles(files, conf)
   126  			files = removeFilteredPaths(files, conf.FilterPaths)
   127  			filenames = append(filenames, files...)
   128  		}
   129  	}
   130  	if len(filenames) == 0 {
   131  		return nil, errors.New("After removing missing files and state files from the list, there are no files left to tail")
   132  	}
   133  
   134  	// make our lines channel list; we'll get one channel for each file
   135  	linesChans := make([]chan string, 0, len(filenames))
   136  	numFiles := len(filenames)
   137  	for _, file := range filenames {
   138  		var lines chan string
   139  		if file == "-" {
   140  			lines = tailStdIn(ctx)
   141  		} else {
   142  			stateFile := getStateFile(conf, file, numFiles)
   143  			tailer, err := getTailer(conf, file, stateFile)
   144  			if err != nil {
   145  				return nil, err
   146  			}
   147  			lines = tailSingleFile(ctx, tailer, file, stateFile)
   148  		}
   149  		linesChans = append(linesChans, lines)
   150  	}
   151  
   152  	return linesChans, nil
   153  }
   154  
   155  // removeStateFiles goes through the list of files and removes any that appear
   156  // to be statefiles to avoid .leash.state.leash.state.leash.state from appearing
   157  // when you use an overly permissive glob
   158  func removeStateFiles(files []string, conf Config) []string {
   159  	newFiles := []string{}
   160  	for _, file := range files {
   161  		if file == conf.Options.StateFile {
   162  			logrus.WithFields(logrus.Fields{
   163  				"file": file,
   164  			}).Debug("skipping tailing file because it is named the same as the statefile flag")
   165  			continue
   166  		}
   167  		if strings.HasSuffix(file, ".leash.state") {
   168  			logrus.WithFields(logrus.Fields{
   169  				"file": file,
   170  			}).Debug("skipping tailing file because the filename ends with .leash.state")
   171  			continue
   172  		}
   173  		// great! it's not a state file. let's use it.
   174  		newFiles = append(newFiles, file)
   175  	}
   176  	return newFiles
   177  }
   178  
   179  func removeFilteredPaths(files []string, filteredPaths []string) []string {
   180  	newFiles := []string{}
   181  
   182  	for _, file := range files {
   183  		shouldRemove := false
   184  		for _, filteredPath := range filteredPaths {
   185  			if matched, err := filepath.Match(filteredPath, file); err != nil {
   186  				logrus.WithError(err).WithFields(logrus.Fields{
   187  					"file":   file,
   188  					"filter": filteredPath,
   189  				}).Warn("unable to compare file with filter, will exclude file")
   190  				shouldRemove = true
   191  				continue
   192  			} else if matched {
   193  				logrus.WithFields(logrus.Fields{
   194  					"file":   file,
   195  					"filter": filteredPath,
   196  				}).Info("removing file as it matches filter")
   197  				shouldRemove = true
   198  			}
   199  		}
   200  		if !shouldRemove {
   201  			newFiles = append(newFiles, file)
   202  		}
   203  	}
   204  
   205  	return newFiles
   206  }
   207  
   208  func tailSingleFile(ctx context.Context, tailer *tail.Tail, file string, stateFile string) chan string {
   209  	lines := make(chan string)
   210  	// TODO report some metric to indicate whether we're keeping up with the
   211  	// front of the file, of if it's being written faster than we can send
   212  	// events
   213  
   214  	stateFh, err := os.OpenFile(stateFile, os.O_RDWR|os.O_CREATE, 0644)
   215  	if err != nil {
   216  		logrus.WithFields(logrus.Fields{
   217  			"logfile":   file,
   218  			"statefile": stateFile,
   219  		}).Warn("Failed to open statefile for writing. File location will not be saved.")
   220  	}
   221  
   222  	ticker := time.NewTicker(time.Second)
   223  	state := State{}
   224  	go func() {
   225  		for range ticker.C {
   226  			updateStateFile(&state, tailer, file, stateFh)
   227  		}
   228  	}()
   229  
   230  	go func() {
   231  	ReadLines:
   232  		for {
   233  			select {
   234  			case line, ok := <-tailer.Lines:
   235  				if !ok {
   236  					// tailer.Lines is closed
   237  					break ReadLines
   238  				}
   239  				if line.Err != nil {
   240  					// skip errored lines
   241  					continue
   242  				}
   243  				lines <- line.Text
   244  			case <-ctx.Done():
   245  				// will only trigger when the context is cancelled
   246  				break ReadLines
   247  			}
   248  		}
   249  		close(lines)
   250  		ticker.Stop()
   251  		updateStateFile(&state, tailer, file, stateFh)
   252  		stateFh.Close()
   253  	}()
   254  	return lines
   255  }
   256  
   257  // tailStdIn is a special case to tail STDIN without any of the
   258  // fancy stuff that the tail module provides
   259  func tailStdIn(ctx context.Context) chan string {
   260  	lines := make(chan string)
   261  	input := bufio.NewReader(os.Stdin)
   262  	go func() {
   263  		defer close(lines)
   264  		for {
   265  			// check for signal triggered exit
   266  			select {
   267  			case <-ctx.Done():
   268  				return
   269  			default:
   270  			}
   271  			line, partialLine, err := input.ReadLine()
   272  			if err != nil {
   273  				logrus.Debug("stdin is closed")
   274  				// bail when STDIN closes
   275  				return
   276  			}
   277  			var parts []string
   278  			parts = append(parts, string(line))
   279  			for partialLine {
   280  				line, partialLine, _ = input.ReadLine()
   281  				parts = append(parts, string(line))
   282  			}
   283  			lines <- strings.Join(parts, "")
   284  		}
   285  	}()
   286  	return lines
   287  }
   288  
   289  // getStartLocation reads the state file and creates an appropriate start
   290  // location.  See details at the top of this file on how the loc is chosen.
   291  func getStartLocation(stateFile string, logfile string) *tail.SeekInfo {
   292  	beginning := &tail.SeekInfo{}
   293  	end := &tail.SeekInfo{0, 2}
   294  	fh, err := os.Open(stateFile)
   295  	if err != nil {
   296  		logrus.WithFields(logrus.Fields{
   297  			"starting at": "end", "error": err,
   298  		}).Debug("getStartLocation failed to open the statefile")
   299  		return end
   300  	}
   301  	defer fh.Close()
   302  	// read the contents of the state file (JSON)
   303  	content := make([]byte, 1024)
   304  	bytesRead, err := fh.Read(content)
   305  	if err != nil {
   306  		logrus.WithFields(logrus.Fields{
   307  			"starting at": "end", "error": err,
   308  		}).Debug("getStartLocation failed to read the statefile contents")
   309  		return end
   310  	}
   311  	content = content[:bytesRead]
   312  	// decode the contents of the statefile
   313  	state := State{}
   314  	if err := json.Unmarshal(content, &state); err != nil {
   315  		logrus.WithFields(logrus.Fields{
   316  			"starting at": "end", "error": err,
   317  		}).Debug("getStartLocation failed to json decode the statefile")
   318  		return end
   319  	}
   320  	// get the details of the existing log file
   321  	logStat := unix.Stat_t{}
   322  	if err := unix.Stat(logfile, &logStat); err != nil {
   323  		logrus.WithFields(logrus.Fields{
   324  			"starting at": "end", "error": err,
   325  		}).Debug("getStartLocation failed to get unix.stat() on the logfile")
   326  		return end
   327  	}
   328  	// compare inode numbers of the last-seen and existing log files
   329  	if state.INode != uint64(logStat.Ino) {
   330  		logrus.WithFields(logrus.Fields{
   331  			"starting at": "beginning", "error": err,
   332  		}).Debug("getStartLocation found a different inode number for the logfile")
   333  		// file's been rotated
   334  		return beginning
   335  	}
   336  	logrus.WithFields(logrus.Fields{
   337  		"starting at": state.Offset,
   338  	}).Debug("getStartLocation seeking to offset in logfile")
   339  	// we're good; start reading from the remembered state
   340  	return &tail.SeekInfo{
   341  		Offset: state.Offset,
   342  		Whence: 0,
   343  	}
   344  }
   345  
   346  // getTailer configures the *tail.Tail correctly to begin actually tailing the
   347  // specified file.
   348  func getTailer(conf Config, file string, stateFile string) (*tail.Tail, error) {
   349  	// tail a real file
   350  	var loc *tail.SeekInfo // 0 value means start at beginning
   351  	var reOpen, follow bool = true, true
   352  	var readFrom = conf.Options.ReadFrom
   353  	if readFrom == "" {
   354  		readFrom = "last"
   355  	}
   356  	switch readFrom {
   357  	case "start", "beginning":
   358  		// 0 value for tail.SeekInfo means start at beginning
   359  	case "end":
   360  		loc = &tail.SeekInfo{
   361  			Offset: 0,
   362  			Whence: 2,
   363  		}
   364  	case "last":
   365  		loc = getStartLocation(stateFile, file)
   366  	default:
   367  		errMsg := fmt.Sprintf("unknown option to --read_from: %s", readFrom)
   368  		return nil, errors.New(errMsg)
   369  	}
   370  	if conf.Options.Stop {
   371  		reOpen = false
   372  		follow = false
   373  	}
   374  	tailConf := tail.Config{
   375  		Location:  loc,
   376  		ReOpen:    reOpen, // keep reading on rotation, aka tail -F
   377  		MustExist: true,   // fail if log file doesn't exist
   378  		Follow:    follow, // don't stop at EOF, aka tail -f
   379  		Logger:    tail.DiscardingLogger,
   380  		Poll:      conf.Options.Poll, // use poll instead of inotify
   381  	}
   382  	logrus.WithFields(logrus.Fields{
   383  		"tailConf":  tailConf,
   384  		"conf":      conf,
   385  		"statefile": stateFile,
   386  		"location":  loc,
   387  	}).Debug("about to call tail.TailFile")
   388  	return tail.TailFile(file, tailConf)
   389  }
   390  
   391  // getStateFile returns the filename to use to track honeytail state.
   392  //
   393  // If a --tail.statefile parameter is provided, we try to respect it.
   394  // It might describe an existing file, an existing directory, or a new path.
   395  //
   396  // If tailing a single logfile, we will use the specified --tail.statefile:
   397  //   - if it points to an existing file, that statefile will be used directly
   398  //   - if it points to a new path, that path will be written to directly
   399  //   - if it points to an existing directory, the statefile will be placed inside
   400  //     the directory (and the statefile's name will be derived from the logfile).
   401  //
   402  // If honeytail is asked to tail multiple files, we will only respect the
   403  // third case, where --tail.statefile describes an existing directory.
   404  //
   405  // The default behavior (or if --tail.statefile isn't respected) will be to
   406  // write to the system's $TMPDIR/ and write to a statefile (where the name will
   407  // be derived from the logfile).
   408  func getStateFile(conf Config, filename string, numFiles int) string {
   409  	confStateFile := os.TempDir()
   410  	if conf.Options.StateFile != "" {
   411  		info, err := os.Stat(conf.Options.StateFile)
   412  		if numFiles == 1 && (os.IsNotExist(err) || (err == nil && !info.IsDir())) {
   413  			return conf.Options.StateFile
   414  		} else if err == nil && info.IsDir() {
   415  			// If the --tail.statefile is a directory, write statefile inside the specified directory
   416  			confStateFile = conf.Options.StateFile
   417  		} else {
   418  			logrus.Debugf("Couldn't write to --tail.statefile=%s, writing honeytail state for %s to $TMPDIR (%s) instead.",
   419  				conf.Options.StateFile, filename, confStateFile)
   420  		}
   421  	}
   422  
   423  	stateFileName := strings.TrimSuffix(filepath.Base(filename), ".log") + ".leash.state"
   424  	if conf.Options.HashStateFileDirPaths {
   425  		// generate hash based on filepath, in format 'filename.leash.state-{hash}'
   426  		stateFileName += fmt.Sprintf("-%x", sha1.Sum([]byte(filename)))
   427  	}
   428  	return filepath.Join(confStateFile, stateFileName)
   429  }
   430  
   431  // updateStateFile updates the state file once per second with the current
   432  // values for the logfile's inode number and offset
   433  func updateStateFile(state *State, t *tail.Tail, file string, stateFh *os.File) {
   434  	logStat := unix.Stat_t{}
   435  	unix.Stat(file, &logStat)
   436  	currentPos, err := t.Tell()
   437  	if err != nil {
   438  		return
   439  	}
   440  	state.INode = uint64(logStat.Ino)
   441  	state.Offset = currentPos
   442  	out, err := json.Marshal(state)
   443  	if err != nil {
   444  		return
   445  	}
   446  	stateFh.Truncate(0)
   447  	out = append(out, '\n')
   448  	stateFh.WriteAt(out, 0)
   449  	stateFh.Sync()
   450  }