github.com/Heebron/moby@v0.0.0-20221111184709-6eab4f55faf7/daemon/logger/journald/read.go (about)

     1  //go:build linux && cgo && !static_build && journald
     2  // +build linux,cgo,!static_build,journald
     3  
     4  package journald // import "github.com/docker/docker/daemon/logger/journald"
     5  
     6  import (
     7  	"errors"
     8  	"runtime"
     9  	"strconv"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	"github.com/coreos/go-systemd/v22/journal"
    14  	"github.com/sirupsen/logrus"
    15  
    16  	"github.com/docker/docker/api/types/backend"
    17  	"github.com/docker/docker/daemon/logger"
    18  	"github.com/docker/docker/daemon/logger/journald/internal/sdjournal"
    19  )
    20  
    21  const closedDrainTimeout = 5 * time.Second
    22  
    23  // Fields which we know are not user-provided attribute fields.
    24  var wellKnownFields = map[string]bool{
    25  	"MESSAGE":             true,
    26  	"MESSAGE_ID":          true,
    27  	"PRIORITY":            true,
    28  	"CODE_FILE":           true,
    29  	"CODE_LINE":           true,
    30  	"CODE_FUNC":           true,
    31  	"ERRNO":               true,
    32  	"SYSLOG_FACILITY":     true,
    33  	fieldSyslogIdentifier: true,
    34  	"SYSLOG_PID":          true,
    35  	fieldSyslogTimestamp:  true,
    36  	fieldContainerName:    true,
    37  	fieldContainerID:      true,
    38  	fieldContainerIDFull:  true,
    39  	fieldContainerTag:     true,
    40  	fieldImageName:        true,
    41  	fieldPLogID:           true,
    42  	fieldPLogOrdinal:      true,
    43  	fieldPLogLast:         true,
    44  	fieldPartialMessage:   true,
    45  	fieldLogEpoch:         true,
    46  	fieldLogOrdinal:       true,
    47  }
    48  
    49  type reader struct {
    50  	s           *journald
    51  	j           *sdjournal.Journal
    52  	logWatcher  *logger.LogWatcher
    53  	config      logger.ReadConfig
    54  	maxOrdinal  uint64
    55  	initialized bool
    56  	ready       chan struct{}
    57  }
    58  
    59  func getMessage(d map[string]string) (line []byte, ok bool) {
    60  	m, ok := d["MESSAGE"]
    61  	if ok {
    62  		line = []byte(m)
    63  		if d[fieldPartialMessage] != "true" {
    64  			line = append(line, "\n"...)
    65  		}
    66  	}
    67  	return line, ok
    68  }
    69  
    70  func getPriority(d map[string]string) (journal.Priority, bool) {
    71  	if pri, ok := d["PRIORITY"]; ok {
    72  		i, err := strconv.Atoi(pri)
    73  		return journal.Priority(i), err == nil
    74  	}
    75  	return -1, false
    76  }
    77  
    78  // getSource recovers the stream name from the entry data by mapping from the
    79  // journal priority field back to the stream that we would have assigned that
    80  // value.
    81  func getSource(d map[string]string) string {
    82  	source := ""
    83  	if priority, ok := getPriority(d); ok {
    84  		if priority == journal.PriErr {
    85  			source = "stderr"
    86  		} else if priority == journal.PriInfo {
    87  			source = "stdout"
    88  		}
    89  	}
    90  	return source
    91  }
    92  
    93  func getAttrs(d map[string]string) []backend.LogAttr {
    94  	var attrs []backend.LogAttr
    95  	for k, v := range d {
    96  		if k[0] != '_' && !wellKnownFields[k] {
    97  			attrs = append(attrs, backend.LogAttr{Key: k, Value: v})
    98  		}
    99  	}
   100  	return attrs
   101  }
   102  
   103  // errDrainDone is the error returned by drainJournal to signal that there are
   104  // no more log entries to send to the log watcher.
   105  var errDrainDone = errors.New("journald drain done")
   106  
   107  // drainJournal reads and sends log messages from the journal.
   108  //
   109  // drainJournal returns errDrainDone when a terminal stopping condition has been
   110  // reached: the watch consumer is gone, a log entry is read which has a
   111  // timestamp after until (if until is nonzero), or the log driver is closed and
   112  // the last message logged has been sent from the journal. If the end of the
   113  // journal is reached without encountering a terminal stopping condition, a nil
   114  // error is returned.
   115  func (r *reader) drainJournal() error {
   116  	if !r.initialized {
   117  		defer func() {
   118  			r.signalReady()
   119  			r.initialized = true
   120  		}()
   121  
   122  		var (
   123  			err          error
   124  			seekedToTail bool
   125  		)
   126  		if r.config.Tail >= 0 {
   127  			if r.config.Until.IsZero() {
   128  				err = r.j.SeekTail()
   129  				seekedToTail = true
   130  			} else {
   131  				err = r.j.SeekRealtime(r.config.Until)
   132  			}
   133  		} else {
   134  			if r.config.Since.IsZero() {
   135  				err = r.j.SeekHead()
   136  			} else {
   137  				err = r.j.SeekRealtime(r.config.Since)
   138  			}
   139  		}
   140  		if err != nil {
   141  			return err
   142  		}
   143  
   144  		// SeekTail() followed by Next() behaves incorrectly, so we need
   145  		// to work around the bug by ensuring the first discrete
   146  		// movement of the read pointer is Previous() or PreviousSkip().
   147  		// PreviousSkip() is called inside the loop when config.Tail > 0
   148  		// so the only special case requiring special handling is
   149  		// config.Tail == 0.
   150  		// https://github.com/systemd/systemd/issues/9934
   151  		if seekedToTail && r.config.Tail == 0 {
   152  			// Resolve the read pointer to the last entry in the
   153  			// journal so that the call to Next() inside the loop
   154  			// advances past it.
   155  			if ok, err := r.j.Previous(); err != nil || !ok {
   156  				return err
   157  			}
   158  		}
   159  	}
   160  
   161  	for i := 0; ; i++ {
   162  		if !r.initialized && i == 0 && r.config.Tail > 0 {
   163  			if n, err := r.j.PreviousSkip(uint(r.config.Tail)); err != nil || n == 0 {
   164  				return err
   165  			}
   166  		} else if ok, err := r.j.Next(); err != nil || !ok {
   167  			return err
   168  		}
   169  
   170  		if !r.initialized && i == 0 {
   171  			// The cursor is in a position which will be unaffected
   172  			// by subsequent logging.
   173  			r.signalReady()
   174  		}
   175  
   176  		// Read the entry's timestamp.
   177  		timestamp, err := r.j.Realtime()
   178  		if err != nil {
   179  			return err
   180  		}
   181  		// Check if the PreviousSkip went too far back. Check only the
   182  		// initial position as we are comparing wall-clock timestamps,
   183  		// which may not be monotonic. We don't want to skip over
   184  		// messages sent later in time just because the clock moved
   185  		// backwards.
   186  		if !r.initialized && i == 0 && r.config.Tail > 0 && timestamp.Before(r.config.Since) {
   187  			r.j.SeekRealtime(r.config.Since)
   188  			continue
   189  		}
   190  		if !r.config.Until.IsZero() && r.config.Until.Before(timestamp) {
   191  			return errDrainDone
   192  		}
   193  
   194  		// Read and send the logged message, if there is one to read.
   195  		data, err := r.j.Data()
   196  		if err != nil {
   197  			return err
   198  		}
   199  
   200  		if data[fieldLogEpoch] == r.s.epoch {
   201  			seq, err := strconv.ParseUint(data[fieldLogOrdinal], 10, 64)
   202  			if err == nil && seq > r.maxOrdinal {
   203  				r.maxOrdinal = seq
   204  			}
   205  		}
   206  
   207  		if line, ok := getMessage(data); ok {
   208  			// Send the log message, unless the consumer is gone
   209  			msg := &logger.Message{
   210  				Line:      line,
   211  				Source:    getSource(data),
   212  				Timestamp: timestamp.In(time.UTC),
   213  				Attrs:     getAttrs(data),
   214  			}
   215  			// The daemon timestamp will differ from the "trusted"
   216  			// timestamp of when the event was received by journald.
   217  			// We can efficiently seek around the journal by the
   218  			// event timestamp, and the event timestamp is what
   219  			// journalctl displays. The daemon timestamp is just an
   220  			// application-supplied field with no special
   221  			// significance; libsystemd won't help us seek to the
   222  			// entry with the closest timestamp.
   223  			/*
   224  				if sts := data["SYSLOG_TIMESTAMP"]; sts != "" {
   225  					if tv, err := time.Parse(time.RFC3339Nano, sts); err == nil {
   226  						msg.Timestamp = tv
   227  					}
   228  				}
   229  			*/
   230  			select {
   231  			case <-r.logWatcher.WatchConsumerGone():
   232  				return errDrainDone
   233  			case r.logWatcher.Msg <- msg:
   234  			}
   235  		}
   236  
   237  		// Call sd_journal_process() periodically during the processing loop
   238  		// to close any opened file descriptors for rotated (deleted) journal files.
   239  		if i != 0 && i%1024 == 0 {
   240  			if _, err := r.j.Process(); err != nil {
   241  				// log a warning but ignore it for now
   242  				logrus.WithField("container", r.s.vars[fieldContainerIDFull]).
   243  					WithField("error", err).
   244  					Warn("journald: error processing journal")
   245  			}
   246  		}
   247  	}
   248  }
   249  
   250  func (r *reader) readJournal() error {
   251  	caughtUp := atomic.LoadUint64(&r.s.ordinal)
   252  	if err := r.drainJournal(); err != nil {
   253  		if err != errDrainDone {
   254  			return err
   255  		}
   256  		return nil
   257  	}
   258  
   259  	var drainTimeout <-chan time.Time
   260  	if !r.config.Follow {
   261  		if r.s.readSyncTimeout == 0 {
   262  			return nil
   263  		}
   264  		tmr := time.NewTimer(r.s.readSyncTimeout)
   265  		defer tmr.Stop()
   266  		drainTimeout = tmr.C
   267  	}
   268  
   269  	for {
   270  		status, err := r.j.Wait(250 * time.Millisecond)
   271  		if err != nil {
   272  			return err
   273  		}
   274  		select {
   275  		case <-r.logWatcher.WatchConsumerGone():
   276  			return nil // won't be able to write anything anymore
   277  		case <-drainTimeout:
   278  			// Container is gone but we haven't found the end of the
   279  			// logs within the timeout. Maybe it was dropped by
   280  			// journald, e.g. due to rate-limiting.
   281  			return nil
   282  		case <-r.s.closed:
   283  			// container is gone, drain journal
   284  			lastSeq := atomic.LoadUint64(&r.s.ordinal)
   285  			if r.maxOrdinal >= lastSeq {
   286  				// All caught up with the logger!
   287  				return nil
   288  			}
   289  			if drainTimeout == nil {
   290  				tmr := time.NewTimer(closedDrainTimeout)
   291  				defer tmr.Stop()
   292  				drainTimeout = tmr.C
   293  			}
   294  		default:
   295  			// container is still alive
   296  			if status == sdjournal.StatusNOP {
   297  				// no new data -- keep waiting
   298  				continue
   299  			}
   300  		}
   301  		err = r.drainJournal()
   302  		if err != nil {
   303  			if err != errDrainDone {
   304  				return err
   305  			}
   306  			return nil
   307  		}
   308  		if !r.config.Follow && r.s.readSyncTimeout > 0 && r.maxOrdinal >= caughtUp {
   309  			return nil
   310  		}
   311  	}
   312  }
   313  
   314  func (r *reader) readLogs() {
   315  	defer close(r.logWatcher.Msg)
   316  
   317  	// Make sure the ready channel is closed in the event of an early
   318  	// return.
   319  	defer r.signalReady()
   320  
   321  	// Quoting https://www.freedesktop.org/software/systemd/man/sd-journal.html:
   322  	//     Functions that operate on sd_journal objects are thread
   323  	//     agnostic — given sd_journal pointer may only be used from one
   324  	//     specific thread at all times (and it has to be the very same one
   325  	//     during the entire lifetime of the object), but multiple,
   326  	//     independent threads may use multiple, independent objects safely.
   327  	//
   328  	// sdjournal.Open returns a value which wraps an sd_journal pointer so
   329  	// we need to abide by those rules.
   330  	runtime.LockOSThread()
   331  	defer runtime.UnlockOSThread()
   332  
   333  	// Get a handle to the journal.
   334  	var err error
   335  	if r.s.journalReadDir != "" {
   336  		r.j, err = sdjournal.OpenDir(r.s.journalReadDir, 0)
   337  	} else {
   338  		r.j, err = sdjournal.Open(0)
   339  	}
   340  	if err != nil {
   341  		r.logWatcher.Err <- err
   342  		return
   343  	}
   344  	defer r.j.Close()
   345  
   346  	if r.config.Follow {
   347  		// Initialize library inotify watches early
   348  		if err := r.j.InitializeInotify(); err != nil {
   349  			r.logWatcher.Err <- err
   350  			return
   351  		}
   352  	}
   353  
   354  	// Remove limits on the size of data items that we'll retrieve.
   355  	if err := r.j.SetDataThreshold(0); err != nil {
   356  		r.logWatcher.Err <- err
   357  		return
   358  	}
   359  	// Add a match to have the library do the searching for us.
   360  	if err := r.j.AddMatch(fieldContainerIDFull, r.s.vars[fieldContainerIDFull]); err != nil {
   361  		r.logWatcher.Err <- err
   362  		return
   363  	}
   364  
   365  	if err := r.readJournal(); err != nil {
   366  		r.logWatcher.Err <- err
   367  		return
   368  	}
   369  }
   370  
   371  func (r *reader) signalReady() {
   372  	select {
   373  	case <-r.ready:
   374  	default:
   375  		close(r.ready)
   376  	}
   377  }
   378  
   379  func (s *journald) ReadLogs(config logger.ReadConfig) *logger.LogWatcher {
   380  	r := &reader{
   381  		s:          s,
   382  		logWatcher: logger.NewLogWatcher(),
   383  		config:     config,
   384  		ready:      make(chan struct{}),
   385  	}
   386  	go r.readLogs()
   387  	// Block until the reader is in position to read from the current config
   388  	// location to prevent race conditions in tests.
   389  	<-r.ready
   390  	return r.logWatcher
   391  }
   392  
   393  func waitUntilFlushedImpl(s *journald) error {
   394  	if s.readSyncTimeout == 0 {
   395  		return nil
   396  	}
   397  
   398  	ordinal := atomic.LoadUint64(&s.ordinal)
   399  	if ordinal == 0 {
   400  		return nil // No logs were sent; nothing to wait for.
   401  	}
   402  
   403  	flushed := make(chan error)
   404  	go func() {
   405  		defer close(flushed)
   406  		runtime.LockOSThread()
   407  		defer runtime.UnlockOSThread()
   408  
   409  		var (
   410  			j   *sdjournal.Journal
   411  			err error
   412  		)
   413  		if s.journalReadDir != "" {
   414  			j, err = sdjournal.OpenDir(s.journalReadDir, 0)
   415  		} else {
   416  			j, err = sdjournal.Open(0)
   417  		}
   418  		if err != nil {
   419  			flushed <- err
   420  			return
   421  		}
   422  		defer j.Close()
   423  
   424  		if err := j.AddMatch(fieldContainerIDFull, s.vars[fieldContainerIDFull]); err != nil {
   425  			flushed <- err
   426  			return
   427  		}
   428  		if err := j.AddMatch(fieldLogEpoch, s.epoch); err != nil {
   429  			flushed <- err
   430  			return
   431  		}
   432  		if err := j.AddMatch(fieldLogOrdinal, strconv.FormatUint(ordinal, 10)); err != nil {
   433  			flushed <- err
   434  			return
   435  		}
   436  
   437  		deadline := time.Now().Add(s.readSyncTimeout)
   438  		for time.Now().Before(deadline) {
   439  			if ok, err := j.Next(); ok {
   440  				// Found it!
   441  				return
   442  			} else if err != nil {
   443  				flushed <- err
   444  				return
   445  			}
   446  			if _, err := j.Wait(100 * time.Millisecond); err != nil {
   447  				flushed <- err
   448  				return
   449  			}
   450  		}
   451  		logrus.WithField("container", s.vars[fieldContainerIDFull]).
   452  			Warn("journald: deadline exceeded waiting for logs to be committed to journal")
   453  	}()
   454  	return <-flushed
   455  }
   456  
   457  func init() {
   458  	waitUntilFlushed = waitUntilFlushedImpl
   459  }