github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/clients/pkg/promtail/targets/file/tailer.go (about)

     1  package file
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/go-kit/log"
    10  	"github.com/go-kit/log/level"
    11  	"github.com/hpcloud/tail"
    12  	"github.com/pkg/errors"
    13  	"github.com/prometheus/common/model"
    14  	"go.uber.org/atomic"
    15  	"golang.org/x/text/encoding"
    16  	"golang.org/x/text/encoding/ianaindex"
    17  	"golang.org/x/text/transform"
    18  
    19  	"github.com/grafana/loki/clients/pkg/promtail/api"
    20  	"github.com/grafana/loki/clients/pkg/promtail/positions"
    21  
    22  	"github.com/grafana/loki/pkg/logproto"
    23  	"github.com/grafana/loki/pkg/util"
    24  )
    25  
    26  type tailer struct {
    27  	metrics   *Metrics
    28  	logger    log.Logger
    29  	handler   api.EntryHandler
    30  	positions positions.Positions
    31  
    32  	path string
    33  	tail *tail.Tail
    34  
    35  	posAndSizeMtx sync.Mutex
    36  	stopOnce      sync.Once
    37  
    38  	running *atomic.Bool
    39  	posquit chan struct{}
    40  	posdone chan struct{}
    41  	done    chan struct{}
    42  
    43  	decoder *encoding.Decoder
    44  }
    45  
    46  func newTailer(metrics *Metrics, logger log.Logger, handler api.EntryHandler, positions positions.Positions, path string, encoding string) (*tailer, error) {
    47  	// Simple check to make sure the file we are tailing doesn't
    48  	// have a position already saved which is past the end of the file.
    49  	fi, err := os.Stat(path)
    50  	if err != nil {
    51  		return nil, err
    52  	}
    53  	pos, err := positions.Get(path)
    54  	if err != nil {
    55  		return nil, err
    56  	}
    57  
    58  	if fi.Size() < pos {
    59  		positions.Remove(path)
    60  	}
    61  
    62  	tail, err := tail.TailFile(path, tail.Config{
    63  		Follow:    true,
    64  		Poll:      true,
    65  		ReOpen:    true,
    66  		MustExist: true,
    67  		Location: &tail.SeekInfo{
    68  			Offset: pos,
    69  			Whence: 0,
    70  		},
    71  		Logger: util.NewLogAdapter(logger),
    72  	})
    73  	if err != nil {
    74  		return nil, err
    75  	}
    76  
    77  	logger = log.With(logger, "component", "tailer")
    78  	tailer := &tailer{
    79  		metrics:   metrics,
    80  		logger:    logger,
    81  		handler:   api.AddLabelsMiddleware(model.LabelSet{FilenameLabel: model.LabelValue(path)}).Wrap(handler),
    82  		positions: positions,
    83  		path:      path,
    84  		tail:      tail,
    85  		running:   atomic.NewBool(false),
    86  		posquit:   make(chan struct{}),
    87  		posdone:   make(chan struct{}),
    88  		done:      make(chan struct{}),
    89  	}
    90  
    91  	if encoding != "" {
    92  		level.Info(tailer.logger).Log("msg", "Will decode messages", "from", encoding, "to", "UTF8")
    93  		encoder, err := ianaindex.IANA.Encoding(encoding)
    94  		if err != nil {
    95  			return nil, errors.Wrap(err, "error doing IANA encoding")
    96  		}
    97  		decoder := encoder.NewDecoder()
    98  		tailer.decoder = decoder
    99  	}
   100  
   101  	go tailer.readLines()
   102  	go tailer.updatePosition()
   103  	metrics.filesActive.Add(1.)
   104  	return tailer, nil
   105  }
   106  
   107  // updatePosition is run in a goroutine and checks the current size of the file and saves it to the positions file
   108  // at a regular interval. If there is ever an error it stops the tailer and exits, the tailer will be re-opened
   109  // by the filetarget sync method if it still exists and will start reading from the last successful entry in the
   110  // positions file.
   111  func (t *tailer) updatePosition() {
   112  	positionSyncPeriod := t.positions.SyncPeriod()
   113  	positionWait := time.NewTicker(positionSyncPeriod)
   114  	defer func() {
   115  		positionWait.Stop()
   116  		level.Info(t.logger).Log("msg", "position timer: exited", "path", t.path)
   117  		close(t.posdone)
   118  	}()
   119  
   120  	for {
   121  		select {
   122  		case <-positionWait.C:
   123  			err := t.markPositionAndSize()
   124  			if err != nil {
   125  				level.Error(t.logger).Log("msg", "position timer: error getting tail position and/or size, stopping tailer", "path", t.path, "error", err)
   126  				err := t.tail.Stop()
   127  				if err != nil {
   128  					level.Error(t.logger).Log("msg", "position timer: error stopping tailer", "path", t.path, "error", err)
   129  				}
   130  				return
   131  			}
   132  		case <-t.posquit:
   133  			return
   134  		}
   135  	}
   136  }
   137  
   138  // readLines runs in a goroutine and consumes the t.tail.Lines channel from the underlying tailer.
   139  // it will only exit when that channel is closed. This is important to avoid a deadlock in the underlying
   140  // tailer which can happen if there are unread lines in this channel and the Stop method on the tailer
   141  // is called, the underlying tailer will never exit if there are unread lines in the t.tail.Lines channel
   142  func (t *tailer) readLines() {
   143  	level.Info(t.logger).Log("msg", "tail routine: started", "path", t.path)
   144  
   145  	t.running.Store(true)
   146  
   147  	// This function runs in a goroutine, if it exits this tailer will never do any more tailing.
   148  	// Clean everything up.
   149  	defer func() {
   150  		t.cleanupMetrics()
   151  		t.running.Store(false)
   152  		level.Info(t.logger).Log("msg", "tail routine: exited", "path", t.path)
   153  		close(t.done)
   154  	}()
   155  	entries := t.handler.Chan()
   156  	for {
   157  		line, ok := <-t.tail.Lines
   158  		if !ok {
   159  			level.Info(t.logger).Log("msg", "tail routine: tail channel closed, stopping tailer", "path", t.path, "reason", t.tail.Tomb.Err())
   160  			return
   161  		}
   162  
   163  		// Note currently the tail implementation hardcodes Err to nil, this should never hit.
   164  		if line.Err != nil {
   165  			level.Error(t.logger).Log("msg", "tail routine: error reading line", "path", t.path, "error", line.Err)
   166  			continue
   167  		}
   168  
   169  		var text string
   170  		if t.decoder != nil {
   171  			var err error
   172  			text, err = t.convertToUTF8(line.Text)
   173  			if err != nil {
   174  				level.Debug(t.logger).Log("msg", "failed to convert encoding", "error", err)
   175  				t.metrics.encodingFailures.WithLabelValues(t.path).Inc()
   176  				text = fmt.Sprintf("the requested encoding conversion for this line failed in Promtail/Grafana Agent: %s", err.Error())
   177  			}
   178  		} else {
   179  			text = line.Text
   180  		}
   181  
   182  		t.metrics.readLines.WithLabelValues(t.path).Inc()
   183  		entries <- api.Entry{
   184  			Labels: model.LabelSet{},
   185  			Entry: logproto.Entry{
   186  				Timestamp: line.Time,
   187  				Line:      text,
   188  			},
   189  		}
   190  	}
   191  }
   192  
   193  func (t *tailer) markPositionAndSize() error {
   194  	// Lock this update as there are 2 timers calling this routine, the sync in filetarget and the positions sync in this file.
   195  	t.posAndSizeMtx.Lock()
   196  	defer t.posAndSizeMtx.Unlock()
   197  
   198  	size, err := t.tail.Size()
   199  	if err != nil {
   200  		// If the file no longer exists, no need to save position information
   201  		if err == os.ErrNotExist {
   202  			level.Info(t.logger).Log("msg", "skipping update of position for a file which does not currently exist", "path", t.path)
   203  			return nil
   204  		}
   205  		return err
   206  	}
   207  	t.metrics.totalBytes.WithLabelValues(t.path).Set(float64(size))
   208  
   209  	pos, err := t.tail.Tell()
   210  	if err != nil {
   211  		return err
   212  	}
   213  	t.metrics.readBytes.WithLabelValues(t.path).Set(float64(pos))
   214  	t.positions.Put(t.path, pos)
   215  
   216  	return nil
   217  }
   218  
   219  func (t *tailer) stop() {
   220  	// stop can be called by two separate threads in filetarget, to avoid a panic closing channels more than once
   221  	// we wrap the stop in a sync.Once.
   222  	t.stopOnce.Do(func() {
   223  		// Shut down the position marker thread
   224  		close(t.posquit)
   225  		<-t.posdone
   226  
   227  		// Save the current position before shutting down tailer
   228  		err := t.markPositionAndSize()
   229  		if err != nil {
   230  			level.Error(t.logger).Log("msg", "error marking file position when stopping tailer", "path", t.path, "error", err)
   231  		}
   232  
   233  		// Stop the underlying tailer
   234  		err = t.tail.Stop()
   235  		if err != nil {
   236  			level.Error(t.logger).Log("msg", "error stopping tailer", "path", t.path, "error", err)
   237  		}
   238  		// Wait for readLines() to consume all the remaining messages and exit when the channel is closed
   239  		<-t.done
   240  		level.Info(t.logger).Log("msg", "stopped tailing file", "path", t.path)
   241  		t.handler.Stop()
   242  	})
   243  }
   244  
   245  func (t *tailer) isRunning() bool {
   246  	return t.running.Load()
   247  }
   248  
   249  func (t *tailer) convertToUTF8(text string) (string, error) {
   250  	res, _, err := transform.String(t.decoder, text)
   251  	if err != nil {
   252  		return "", errors.Wrap(err, "error decoding text")
   253  	}
   254  
   255  	return res, nil
   256  }
   257  
   258  // cleanupMetrics removes all metrics exported by this tailer
   259  func (t *tailer) cleanupMetrics() {
   260  	// When we stop tailing the file, also un-export metrics related to the file
   261  	t.metrics.filesActive.Add(-1.)
   262  	t.metrics.readLines.DeleteLabelValues(t.path)
   263  	t.metrics.readBytes.DeleteLabelValues(t.path)
   264  	t.metrics.totalBytes.DeleteLabelValues(t.path)
   265  }