github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/daemon/logger/journald/read.go (about)

     1  //go:build linux && cgo && !static_build && journald
     2  
     3  package journald // import "github.com/Prakhar-Agarwal-byte/moby/daemon/logger/journald"
     4  
     5  import (
     6  	"context"
     7  	"errors"
     8  	"runtime"
     9  	"strconv"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	"github.com/containerd/log"
    14  	"github.com/coreos/go-systemd/v22/journal"
    15  	"github.com/Prakhar-Agarwal-byte/moby/api/types/backend"
    16  	"github.com/Prakhar-Agarwal-byte/moby/daemon/logger"
    17  	"github.com/Prakhar-Agarwal-byte/moby/daemon/logger/journald/internal/sdjournal"
    18  )
    19  
    20  const closedDrainTimeout = 5 * time.Second
    21  
    22  // Fields which we know are not user-provided attribute fields.
    23  var wellKnownFields = map[string]bool{
    24  	"MESSAGE":             true,
    25  	"MESSAGE_ID":          true,
    26  	"PRIORITY":            true,
    27  	"CODE_FILE":           true,
    28  	"CODE_LINE":           true,
    29  	"CODE_FUNC":           true,
    30  	"ERRNO":               true,
    31  	"SYSLOG_FACILITY":     true,
    32  	fieldSyslogIdentifier: true,
    33  	"SYSLOG_PID":          true,
    34  	fieldSyslogTimestamp:  true,
    35  	fieldContainerName:    true,
    36  	fieldContainerID:      true,
    37  	fieldContainerIDFull:  true,
    38  	fieldContainerTag:     true,
    39  	fieldImageName:        true,
    40  	fieldPLogID:           true,
    41  	fieldPLogOrdinal:      true,
    42  	fieldPLogLast:         true,
    43  	fieldPartialMessage:   true,
    44  	fieldLogEpoch:         true,
    45  	fieldLogOrdinal:       true,
    46  }
    47  
    48  type reader struct {
    49  	s           *journald
    50  	j           *sdjournal.Journal
    51  	logWatcher  *logger.LogWatcher
    52  	config      logger.ReadConfig
    53  	maxOrdinal  uint64
    54  	initialized bool
    55  	ready       chan struct{}
    56  }
    57  
    58  func getMessage(d map[string]string) (line []byte, ok bool) {
    59  	m, ok := d["MESSAGE"]
    60  	if ok {
    61  		line = []byte(m)
    62  		if d[fieldPartialMessage] != "true" {
    63  			line = append(line, "\n"...)
    64  		}
    65  	}
    66  	return line, ok
    67  }
    68  
    69  func getPriority(d map[string]string) (journal.Priority, bool) {
    70  	if pri, ok := d["PRIORITY"]; ok {
    71  		i, err := strconv.Atoi(pri)
    72  		return journal.Priority(i), err == nil
    73  	}
    74  	return -1, false
    75  }
    76  
    77  // getSource recovers the stream name from the entry data by mapping from the
    78  // journal priority field back to the stream that we would have assigned that
    79  // value.
    80  func getSource(d map[string]string) string {
    81  	source := ""
    82  	if priority, ok := getPriority(d); ok {
    83  		if priority == journal.PriErr {
    84  			source = "stderr"
    85  		} else if priority == journal.PriInfo {
    86  			source = "stdout"
    87  		}
    88  	}
    89  	return source
    90  }
    91  
    92  func getAttrs(d map[string]string) []backend.LogAttr {
    93  	var attrs []backend.LogAttr
    94  	for k, v := range d {
    95  		if k[0] != '_' && !wellKnownFields[k] {
    96  			attrs = append(attrs, backend.LogAttr{Key: k, Value: v})
    97  		}
    98  	}
    99  	return attrs
   100  }
   101  
   102  // errDrainDone is the error returned by drainJournal to signal that there are
   103  // no more log entries to send to the log watcher.
   104  var errDrainDone = errors.New("journald drain done")
   105  
   106  // drainJournal reads and sends log messages from the journal.
   107  //
   108  // drainJournal returns errDrainDone when a terminal stopping condition has been
   109  // reached: the watch consumer is gone, a log entry is read which has a
   110  // timestamp after until (if until is nonzero), or the log driver is closed and
   111  // the last message logged has been sent from the journal. If the end of the
   112  // journal is reached without encountering a terminal stopping condition, a nil
   113  // error is returned.
   114  func (r *reader) drainJournal() error {
   115  	if !r.initialized {
   116  		defer func() {
   117  			r.signalReady()
   118  			r.initialized = true
   119  		}()
   120  
   121  		var (
   122  			err          error
   123  			seekedToTail bool
   124  		)
   125  		if r.config.Tail >= 0 {
   126  			if r.config.Until.IsZero() {
   127  				err = r.j.SeekTail()
   128  				seekedToTail = true
   129  			} else {
   130  				err = r.j.SeekRealtime(r.config.Until)
   131  			}
   132  		} else {
   133  			if r.config.Since.IsZero() {
   134  				err = r.j.SeekHead()
   135  			} else {
   136  				err = r.j.SeekRealtime(r.config.Since)
   137  			}
   138  		}
   139  		if err != nil {
   140  			return err
   141  		}
   142  
   143  		// SeekTail() followed by Next() behaves incorrectly, so we need
   144  		// to work around the bug by ensuring the first discrete
   145  		// movement of the read pointer is Previous() or PreviousSkip().
   146  		// PreviousSkip() is called inside the loop when config.Tail > 0
   147  		// so the only special case requiring special handling is
   148  		// config.Tail == 0.
   149  		// https://github.com/systemd/systemd/issues/9934
   150  		if seekedToTail && r.config.Tail == 0 {
   151  			// Resolve the read pointer to the last entry in the
   152  			// journal so that the call to Next() inside the loop
   153  			// advances past it.
   154  			if ok, err := r.j.Previous(); err != nil || !ok {
   155  				return err
   156  			}
   157  		}
   158  	}
   159  
   160  	for i := 0; ; i++ {
   161  		if !r.initialized && i == 0 && r.config.Tail > 0 {
   162  			if n, err := r.j.PreviousSkip(uint(r.config.Tail)); err != nil || n == 0 {
   163  				return err
   164  			}
   165  		} else if ok, err := r.j.Next(); err != nil || !ok {
   166  			return err
   167  		}
   168  
   169  		if !r.initialized && i == 0 {
   170  			// The cursor is in a position which will be unaffected
   171  			// by subsequent logging.
   172  			r.signalReady()
   173  		}
   174  
   175  		// Read the entry's timestamp.
   176  		timestamp, err := r.j.Realtime()
   177  		if err != nil {
   178  			return err
   179  		}
   180  		// Check if the PreviousSkip went too far back. Check only the
   181  		// initial position as we are comparing wall-clock timestamps,
   182  		// which may not be monotonic. We don't want to skip over
   183  		// messages sent later in time just because the clock moved
   184  		// backwards.
   185  		if !r.initialized && i == 0 && r.config.Tail > 0 && timestamp.Before(r.config.Since) {
   186  			r.j.SeekRealtime(r.config.Since)
   187  			continue
   188  		}
   189  		if !r.config.Until.IsZero() && r.config.Until.Before(timestamp) {
   190  			return errDrainDone
   191  		}
   192  
   193  		// Read and send the logged message, if there is one to read.
   194  		data, err := r.j.Data()
   195  		if err != nil {
   196  			return err
   197  		}
   198  
   199  		if data[fieldLogEpoch] == r.s.epoch {
   200  			seq, err := strconv.ParseUint(data[fieldLogOrdinal], 10, 64)
   201  			if err == nil && seq > r.maxOrdinal {
   202  				r.maxOrdinal = seq
   203  			}
   204  		}
   205  
   206  		if line, ok := getMessage(data); ok {
   207  			// Send the log message, unless the consumer is gone
   208  			msg := &logger.Message{
   209  				Line:      line,
   210  				Source:    getSource(data),
   211  				Timestamp: timestamp.In(time.UTC),
   212  				Attrs:     getAttrs(data),
   213  			}
   214  			// The daemon timestamp will differ from the "trusted"
   215  			// timestamp of when the event was received by journald.
   216  			// We can efficiently seek around the journal by the
   217  			// event timestamp, and the event timestamp is what
   218  			// journalctl displays. The daemon timestamp is just an
   219  			// application-supplied field with no special
   220  			// significance; libsystemd won't help us seek to the
   221  			// entry with the closest timestamp.
   222  			/*
   223  				if sts := data["SYSLOG_TIMESTAMP"]; sts != "" {
   224  					if tv, err := time.Parse(time.RFC3339Nano, sts); err == nil {
   225  						msg.Timestamp = tv
   226  					}
   227  				}
   228  			*/
   229  			select {
   230  			case <-r.logWatcher.WatchConsumerGone():
   231  				return errDrainDone
   232  			case r.logWatcher.Msg <- msg:
   233  			}
   234  		}
   235  
   236  		// Call sd_journal_process() periodically during the processing loop
   237  		// to close any opened file descriptors for rotated (deleted) journal files.
   238  		if i != 0 && i%1024 == 0 {
   239  			if _, err := r.j.Process(); err != nil {
   240  				// log a warning but ignore it for now
   241  				log.G(context.TODO()).WithField("container", r.s.vars[fieldContainerIDFull]).
   242  					WithField("error", err).
   243  					Warn("journald: error processing journal")
   244  			}
   245  		}
   246  	}
   247  }
   248  
   249  func (r *reader) readJournal() error {
   250  	caughtUp := atomic.LoadUint64(&r.s.ordinal)
   251  	if err := r.drainJournal(); err != nil {
   252  		if err != errDrainDone {
   253  			return err
   254  		}
   255  		return nil
   256  	}
   257  
   258  	var drainTimeout <-chan time.Time
   259  	if !r.config.Follow {
   260  		if r.s.readSyncTimeout == 0 {
   261  			return nil
   262  		}
   263  		tmr := time.NewTimer(r.s.readSyncTimeout)
   264  		defer tmr.Stop()
   265  		drainTimeout = tmr.C
   266  	}
   267  
   268  	for {
   269  		status, err := r.j.Wait(250 * time.Millisecond)
   270  		if err != nil {
   271  			return err
   272  		}
   273  		select {
   274  		case <-r.logWatcher.WatchConsumerGone():
   275  			return nil // won't be able to write anything anymore
   276  		case <-drainTimeout:
   277  			// Container is gone but we haven't found the end of the
   278  			// logs within the timeout. Maybe it was dropped by
   279  			// journald, e.g. due to rate-limiting.
   280  			return nil
   281  		case <-r.s.closed:
   282  			// container is gone, drain journal
   283  			lastSeq := atomic.LoadUint64(&r.s.ordinal)
   284  			if r.maxOrdinal >= lastSeq {
   285  				// All caught up with the logger!
   286  				return nil
   287  			}
   288  			if drainTimeout == nil {
   289  				tmr := time.NewTimer(closedDrainTimeout)
   290  				defer tmr.Stop()
   291  				drainTimeout = tmr.C
   292  			}
   293  		default:
   294  			// container is still alive
   295  			if status == sdjournal.StatusNOP {
   296  				// no new data -- keep waiting
   297  				continue
   298  			}
   299  		}
   300  		err = r.drainJournal()
   301  		if err != nil {
   302  			if err != errDrainDone {
   303  				return err
   304  			}
   305  			return nil
   306  		}
   307  		if !r.config.Follow && r.s.readSyncTimeout > 0 && r.maxOrdinal >= caughtUp {
   308  			return nil
   309  		}
   310  	}
   311  }
   312  
   313  func (r *reader) readLogs() {
   314  	defer close(r.logWatcher.Msg)
   315  
   316  	// Make sure the ready channel is closed in the event of an early
   317  	// return.
   318  	defer r.signalReady()
   319  
   320  	// Quoting https://www.freedesktop.org/software/systemd/man/sd-journal.html:
   321  	//     Functions that operate on sd_journal objects are thread
   322  	//     agnostic — given sd_journal pointer may only be used from one
   323  	//     specific thread at all times (and it has to be the very same one
   324  	//     during the entire lifetime of the object), but multiple,
   325  	//     independent threads may use multiple, independent objects safely.
   326  	//
   327  	// sdjournal.Open returns a value which wraps an sd_journal pointer so
   328  	// we need to abide by those rules.
   329  	runtime.LockOSThread()
   330  	defer runtime.UnlockOSThread()
   331  
   332  	// Get a handle to the journal.
   333  	var err error
   334  	if r.s.journalReadDir != "" {
   335  		r.j, err = sdjournal.OpenDir(r.s.journalReadDir, 0)
   336  	} else {
   337  		r.j, err = sdjournal.Open(0)
   338  	}
   339  	if err != nil {
   340  		r.logWatcher.Err <- err
   341  		return
   342  	}
   343  	defer r.j.Close()
   344  
   345  	if r.config.Follow {
   346  		// Initialize library inotify watches early
   347  		if err := r.j.InitializeInotify(); err != nil {
   348  			r.logWatcher.Err <- err
   349  			return
   350  		}
   351  	}
   352  
   353  	// Remove limits on the size of data items that we'll retrieve.
   354  	if err := r.j.SetDataThreshold(0); err != nil {
   355  		r.logWatcher.Err <- err
   356  		return
   357  	}
   358  	// Add a match to have the library do the searching for us.
   359  	if err := r.j.AddMatch(fieldContainerIDFull, r.s.vars[fieldContainerIDFull]); err != nil {
   360  		r.logWatcher.Err <- err
   361  		return
   362  	}
   363  
   364  	if err := r.readJournal(); err != nil {
   365  		r.logWatcher.Err <- err
   366  		return
   367  	}
   368  }
   369  
   370  func (r *reader) signalReady() {
   371  	select {
   372  	case <-r.ready:
   373  	default:
   374  		close(r.ready)
   375  	}
   376  }
   377  
   378  func (s *journald) ReadLogs(config logger.ReadConfig) *logger.LogWatcher {
   379  	r := &reader{
   380  		s:          s,
   381  		logWatcher: logger.NewLogWatcher(),
   382  		config:     config,
   383  		ready:      make(chan struct{}),
   384  	}
   385  	go r.readLogs()
   386  	// Block until the reader is in position to read from the current config
   387  	// location to prevent race conditions in tests.
   388  	<-r.ready
   389  	return r.logWatcher
   390  }
   391  
   392  func waitUntilFlushedImpl(s *journald) error {
   393  	if s.readSyncTimeout == 0 {
   394  		return nil
   395  	}
   396  
   397  	ordinal := atomic.LoadUint64(&s.ordinal)
   398  	if ordinal == 0 {
   399  		return nil // No logs were sent; nothing to wait for.
   400  	}
   401  
   402  	flushed := make(chan error)
   403  	go func() {
   404  		defer close(flushed)
   405  		runtime.LockOSThread()
   406  		defer runtime.UnlockOSThread()
   407  
   408  		var (
   409  			j   *sdjournal.Journal
   410  			err error
   411  		)
   412  		if s.journalReadDir != "" {
   413  			j, err = sdjournal.OpenDir(s.journalReadDir, 0)
   414  		} else {
   415  			j, err = sdjournal.Open(0)
   416  		}
   417  		if err != nil {
   418  			flushed <- err
   419  			return
   420  		}
   421  		defer j.Close()
   422  
   423  		if err := j.AddMatch(fieldContainerIDFull, s.vars[fieldContainerIDFull]); err != nil {
   424  			flushed <- err
   425  			return
   426  		}
   427  		if err := j.AddMatch(fieldLogEpoch, s.epoch); err != nil {
   428  			flushed <- err
   429  			return
   430  		}
   431  		if err := j.AddMatch(fieldLogOrdinal, strconv.FormatUint(ordinal, 10)); err != nil {
   432  			flushed <- err
   433  			return
   434  		}
   435  
   436  		deadline := time.Now().Add(s.readSyncTimeout)
   437  		for time.Now().Before(deadline) {
   438  			if ok, err := j.Next(); ok {
   439  				// Found it!
   440  				return
   441  			} else if err != nil {
   442  				flushed <- err
   443  				return
   444  			}
   445  			if _, err := j.Wait(100 * time.Millisecond); err != nil {
   446  				flushed <- err
   447  				return
   448  			}
   449  		}
   450  		log.G(context.TODO()).WithField("container", s.vars[fieldContainerIDFull]).
   451  			Warn("journald: deadline exceeded waiting for logs to be committed to journal")
   452  	}()
   453  	return <-flushed
   454  }
   455  
   456  func init() {
   457  	waitUntilFlushed = waitUntilFlushedImpl
   458  }