github.com/jwhonce/docker@v0.6.7-0.20190327063223-da823cf3a5a3/daemon/logger/journald/read.go (about)

     1  // +build linux,cgo,!static_build,journald
     2  
     3  package journald // import "github.com/docker/docker/daemon/logger/journald"
     4  
     5  // #include <sys/types.h>
     6  // #include <sys/poll.h>
     7  // #include <systemd/sd-journal.h>
     8  // #include <errno.h>
     9  // #include <stdio.h>
    10  // #include <stdlib.h>
    11  // #include <string.h>
    12  // #include <time.h>
    13  // #include <unistd.h>
    14  //
    15  //static int get_message(sd_journal *j, const char **msg, size_t *length, int *partial)
    16  //{
    17  //	int rc;
    18  //	size_t plength;
    19  //	*msg = NULL;
    20  //	*length = 0;
    21  //	plength = strlen("CONTAINER_PARTIAL_MESSAGE=true");
    22  //	rc = sd_journal_get_data(j, "CONTAINER_PARTIAL_MESSAGE", (const void **) msg, length);
    23  //	*partial = ((rc == 0) && (*length == plength) && (memcmp(*msg, "CONTAINER_PARTIAL_MESSAGE=true", plength) == 0));
    24  //	rc = sd_journal_get_data(j, "MESSAGE", (const void **) msg, length);
    25  //	if (rc == 0) {
    26  //		if (*length > 8) {
    27  //			(*msg) += 8;
    28  //			*length -= 8;
    29  //		} else {
    30  //			*msg = NULL;
    31  //			*length = 0;
    32  //			rc = -ENOENT;
    33  //		}
    34  //	}
    35  //	return rc;
    36  //}
    37  //static int get_priority(sd_journal *j, int *priority)
    38  //{
    39  //	const void *data;
    40  //	size_t i, length;
    41  //	int rc;
    42  //	*priority = -1;
    43  //	rc = sd_journal_get_data(j, "PRIORITY", &data, &length);
    44  //	if (rc == 0) {
    45  //		if ((length > 9) && (strncmp(data, "PRIORITY=", 9) == 0)) {
    46  //			*priority = 0;
    47  //			for (i = 9; i < length; i++) {
    48  //				*priority = *priority * 10 + ((const char *)data)[i] - '0';
    49  //			}
    50  //			if (length > 9) {
    51  //				rc = 0;
    52  //			}
    53  //		}
    54  //	}
    55  //	return rc;
    56  //}
    57  //static int is_attribute_field(const char *msg, size_t length)
    58  //{
    59  //	static const struct known_field {
    60  //		const char *name;
    61  //		size_t length;
    62  //	} fields[] = {
    63  //		{"MESSAGE", sizeof("MESSAGE") - 1},
    64  //		{"MESSAGE_ID", sizeof("MESSAGE_ID") - 1},
    65  //		{"PRIORITY", sizeof("PRIORITY") - 1},
    66  //		{"CODE_FILE", sizeof("CODE_FILE") - 1},
    67  //		{"CODE_LINE", sizeof("CODE_LINE") - 1},
    68  //		{"CODE_FUNC", sizeof("CODE_FUNC") - 1},
    69  //		{"ERRNO", sizeof("ERRNO") - 1},
    70  //		{"SYSLOG_FACILITY", sizeof("SYSLOG_FACILITY") - 1},
    71  //		{"SYSLOG_IDENTIFIER", sizeof("SYSLOG_IDENTIFIER") - 1},
    72  //		{"SYSLOG_PID", sizeof("SYSLOG_PID") - 1},
    73  //		{"CONTAINER_NAME", sizeof("CONTAINER_NAME") - 1},
    74  //		{"CONTAINER_ID", sizeof("CONTAINER_ID") - 1},
    75  //		{"CONTAINER_ID_FULL", sizeof("CONTAINER_ID_FULL") - 1},
    76  //		{"CONTAINER_TAG", sizeof("CONTAINER_TAG") - 1},
    77  //	};
    78  //	unsigned int i;
    79  //	void *p;
    80  //	if ((length < 1) || (msg[0] == '_') || ((p = memchr(msg, '=', length)) == NULL)) {
    81  //		return -1;
    82  //	}
    83  //	length = ((const char *) p) - msg;
    84  //	for (i = 0; i < sizeof(fields) / sizeof(fields[0]); i++) {
    85  //		if ((fields[i].length == length) && (memcmp(fields[i].name, msg, length) == 0)) {
    86  //			return -1;
    87  //		}
    88  //	}
    89  //	return 0;
    90  //}
    91  //static int get_attribute_field(sd_journal *j, const char **msg, size_t *length)
    92  //{
    93  //	int rc;
    94  //	*msg = NULL;
    95  //	*length = 0;
    96  //	while ((rc = sd_journal_enumerate_data(j, (const void **) msg, length)) > 0) {
    97  //		if (is_attribute_field(*msg, *length) == 0) {
    98  //			break;
    99  //		}
   100  //		rc = -ENOENT;
   101  //	}
   102  //	return rc;
   103  //}
   104  //static int wait_for_data_cancelable(sd_journal *j, int pipefd)
   105  //{
   106  //	struct pollfd fds[2];
   107  //	uint64_t when = 0;
   108  //	int timeout, jevents, i;
   109  //	struct timespec ts;
   110  //	uint64_t now;
   111  //
   112  //	memset(&fds, 0, sizeof(fds));
   113  //	fds[0].fd = pipefd;
   114  //	fds[0].events = POLLHUP;
   115  //	fds[1].fd = sd_journal_get_fd(j);
   116  //	if (fds[1].fd < 0) {
   117  //		return fds[1].fd;
   118  //	}
   119  //
   120  //	do {
   121  //		jevents = sd_journal_get_events(j);
   122  //		if (jevents < 0) {
   123  //			return jevents;
   124  //		}
   125  //		fds[1].events = jevents;
   126  //		sd_journal_get_timeout(j, &when);
   127  //		if (when == -1) {
   128  //			timeout = -1;
   129  //		} else {
   130  //			clock_gettime(CLOCK_MONOTONIC, &ts);
   131  //			now = (uint64_t) ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
   132  //			timeout = when > now ? (int) ((when - now + 999) / 1000) : 0;
   133  //		}
   134  //		i = poll(fds, 2, timeout);
   135  //		if ((i == -1) && (errno != EINTR)) {
   136  //			/* An unexpected error. */
   137  //			return (errno != 0) ? -errno : -EINTR;
   138  //		}
   139  //		if (fds[0].revents & POLLHUP) {
   140  //			/* The close notification pipe was closed. */
   141  //			return 0;
   142  //		}
   143  //		if (sd_journal_process(j) == SD_JOURNAL_APPEND) {
   144  //			/* Data, which we might care about, was appended. */
   145  //			return 1;
   146  //		}
   147  //	} while ((fds[0].revents & POLLHUP) == 0);
   148  //	return 0;
   149  //}
   150  import "C"
   151  
   152  import (
   153  	"fmt"
   154  	"strings"
   155  	"time"
   156  	"unsafe"
   157  
   158  	"github.com/coreos/go-systemd/journal"
   159  	"github.com/docker/docker/api/types/backend"
   160  	"github.com/docker/docker/daemon/logger"
   161  	"github.com/sirupsen/logrus"
   162  )
   163  
   164  func (s *journald) Close() error {
   165  	s.mu.Lock()
   166  	s.closed = true
   167  	for r := range s.readers {
   168  		r.ProducerGone()
   169  		delete(s.readers, r)
   170  
   171  	}
   172  	s.mu.Unlock()
   173  	return nil
   174  }
   175  
   176  func (s *journald) drainJournal(logWatcher *logger.LogWatcher, j *C.sd_journal, oldCursor *C.char, untilUnixMicro uint64) (*C.char, bool) {
   177  	var msg, data, cursor *C.char
   178  	var length C.size_t
   179  	var stamp C.uint64_t
   180  	var priority, partial C.int
   181  	var done bool
   182  
   183  	// Walk the journal from here forward until we run out of new entries
   184  	// or we reach the until value (if provided).
   185  drain:
   186  	for {
   187  		// Try not to send a given entry twice.
   188  		if oldCursor != nil {
   189  			for C.sd_journal_test_cursor(j, oldCursor) > 0 {
   190  				if C.sd_journal_next(j) <= 0 {
   191  					break drain
   192  				}
   193  			}
   194  		}
   195  		// Read and send the logged message, if there is one to read.
   196  		i := C.get_message(j, &msg, &length, &partial)
   197  		if i != -C.ENOENT && i != -C.EADDRNOTAVAIL {
   198  			// Read the entry's timestamp.
   199  			if C.sd_journal_get_realtime_usec(j, &stamp) != 0 {
   200  				break
   201  			}
   202  			// Break if the timestamp exceeds any provided until flag.
   203  			if untilUnixMicro != 0 && untilUnixMicro < uint64(stamp) {
   204  				done = true
   205  				break
   206  			}
   207  
   208  			// Set up the time and text of the entry.
   209  			timestamp := time.Unix(int64(stamp)/1000000, (int64(stamp)%1000000)*1000)
   210  			line := C.GoBytes(unsafe.Pointer(msg), C.int(length))
   211  			if partial == 0 {
   212  				line = append(line, "\n"...)
   213  			}
   214  			// Recover the stream name by mapping
   215  			// from the journal priority back to
   216  			// the stream that we would have
   217  			// assigned that value.
   218  			source := ""
   219  			if C.get_priority(j, &priority) != 0 {
   220  				source = ""
   221  			} else if priority == C.int(journal.PriErr) {
   222  				source = "stderr"
   223  			} else if priority == C.int(journal.PriInfo) {
   224  				source = "stdout"
   225  			}
   226  			// Retrieve the values of any variables we're adding to the journal.
   227  			var attrs []backend.LogAttr
   228  			C.sd_journal_restart_data(j)
   229  			for C.get_attribute_field(j, &data, &length) > C.int(0) {
   230  				kv := strings.SplitN(C.GoStringN(data, C.int(length)), "=", 2)
   231  				attrs = append(attrs, backend.LogAttr{Key: kv[0], Value: kv[1]})
   232  			}
   233  			// Send the log message.
   234  			logWatcher.Msg <- &logger.Message{
   235  				Line:      line,
   236  				Source:    source,
   237  				Timestamp: timestamp.In(time.UTC),
   238  				Attrs:     attrs,
   239  			}
   240  		}
   241  		// If we're at the end of the journal, we're done (for now).
   242  		if C.sd_journal_next(j) <= 0 {
   243  			break
   244  		}
   245  	}
   246  
   247  	// free(NULL) is safe
   248  	C.free(unsafe.Pointer(oldCursor))
   249  	if C.sd_journal_get_cursor(j, &cursor) != 0 {
   250  		// ensure that we won't be freeing an address that's invalid
   251  		cursor = nil
   252  	}
   253  	return cursor, done
   254  }
   255  
   256  func (s *journald) followJournal(logWatcher *logger.LogWatcher, j *C.sd_journal, pfd [2]C.int, cursor *C.char, untilUnixMicro uint64) *C.char {
   257  	s.mu.Lock()
   258  	s.readers[logWatcher] = struct{}{}
   259  	if s.closed {
   260  		// the journald Logger is closed, presumably because the container has been
   261  		// reset.  So we shouldn't follow, because we'll never be woken up.  But we
   262  		// should make one more drainJournal call to be sure we've got all the logs.
   263  		// Close pfd[1] so that one drainJournal happens, then cleanup, then return.
   264  		C.close(pfd[1])
   265  	}
   266  	s.mu.Unlock()
   267  
   268  	newCursor := make(chan *C.char)
   269  
   270  	go func() {
   271  		for {
   272  			// Keep copying journal data out until we're notified to stop
   273  			// or we hit an error.
   274  			status := C.wait_for_data_cancelable(j, pfd[0])
   275  			if status < 0 {
   276  				cerrstr := C.strerror(C.int(-status))
   277  				errstr := C.GoString(cerrstr)
   278  				fmtstr := "error %q while attempting to follow journal for container %q"
   279  				logrus.Errorf(fmtstr, errstr, s.vars["CONTAINER_ID_FULL"])
   280  				break
   281  			}
   282  
   283  			var done bool
   284  			cursor, done = s.drainJournal(logWatcher, j, cursor, untilUnixMicro)
   285  
   286  			if status != 1 || done {
   287  				// We were notified to stop
   288  				break
   289  			}
   290  		}
   291  
   292  		// Clean up.
   293  		C.close(pfd[0])
   294  		s.mu.Lock()
   295  		delete(s.readers, logWatcher)
   296  		s.mu.Unlock()
   297  		close(logWatcher.Msg)
   298  		newCursor <- cursor
   299  	}()
   300  
   301  	// Wait until we're told to stop.
   302  	select {
   303  	case cursor = <-newCursor:
   304  	case <-logWatcher.WatchConsumerGone():
   305  		// Notify the other goroutine that its work is done.
   306  		C.close(pfd[1])
   307  		cursor = <-newCursor
   308  	}
   309  
   310  	return cursor
   311  }
   312  
   313  func (s *journald) readLogs(logWatcher *logger.LogWatcher, config logger.ReadConfig) {
   314  	var j *C.sd_journal
   315  	var cmatch, cursor *C.char
   316  	var stamp C.uint64_t
   317  	var sinceUnixMicro uint64
   318  	var untilUnixMicro uint64
   319  	var pipes [2]C.int
   320  
   321  	// Get a handle to the journal.
   322  	rc := C.sd_journal_open(&j, C.int(0))
   323  	if rc != 0 {
   324  		logWatcher.Err <- fmt.Errorf("error opening journal")
   325  		close(logWatcher.Msg)
   326  		return
   327  	}
   328  	// If we end up following the log, we can set the journal context
   329  	// pointer and the channel pointer to nil so that we won't close them
   330  	// here, potentially while the goroutine that uses them is still
   331  	// running.  Otherwise, close them when we return from this function.
   332  	following := false
   333  	defer func(pfollowing *bool) {
   334  		if !*pfollowing {
   335  			close(logWatcher.Msg)
   336  		}
   337  		C.sd_journal_close(j)
   338  	}(&following)
   339  	// Remove limits on the size of data items that we'll retrieve.
   340  	rc = C.sd_journal_set_data_threshold(j, C.size_t(0))
   341  	if rc != 0 {
   342  		logWatcher.Err <- fmt.Errorf("error setting journal data threshold")
   343  		return
   344  	}
   345  	// Add a match to have the library do the searching for us.
   346  	cmatch = C.CString("CONTAINER_ID_FULL=" + s.vars["CONTAINER_ID_FULL"])
   347  	defer C.free(unsafe.Pointer(cmatch))
   348  	rc = C.sd_journal_add_match(j, unsafe.Pointer(cmatch), C.strlen(cmatch))
   349  	if rc != 0 {
   350  		logWatcher.Err <- fmt.Errorf("error setting journal match")
   351  		return
   352  	}
   353  	// If we have a cutoff time, convert it to Unix time once.
   354  	if !config.Since.IsZero() {
   355  		nano := config.Since.UnixNano()
   356  		sinceUnixMicro = uint64(nano / 1000)
   357  	}
   358  	// If we have an until value, convert it too
   359  	if !config.Until.IsZero() {
   360  		nano := config.Until.UnixNano()
   361  		untilUnixMicro = uint64(nano / 1000)
   362  	}
   363  	if config.Tail > 0 {
   364  		lines := config.Tail
   365  		// If until time provided, start from there.
   366  		// Otherwise start at the end of the journal.
   367  		if untilUnixMicro != 0 && C.sd_journal_seek_realtime_usec(j, C.uint64_t(untilUnixMicro)) < 0 {
   368  			logWatcher.Err <- fmt.Errorf("error seeking provided until value")
   369  			return
   370  		} else if C.sd_journal_seek_tail(j) < 0 {
   371  			logWatcher.Err <- fmt.Errorf("error seeking to end of journal")
   372  			return
   373  		}
   374  		if C.sd_journal_previous(j) < 0 {
   375  			logWatcher.Err <- fmt.Errorf("error backtracking to previous journal entry")
   376  			return
   377  		}
   378  		// Walk backward.
   379  		for lines > 0 {
   380  			// Stop if the entry time is before our cutoff.
   381  			// We'll need the entry time if it isn't, so go
   382  			// ahead and parse it now.
   383  			if C.sd_journal_get_realtime_usec(j, &stamp) != 0 {
   384  				break
   385  			} else {
   386  				// Compare the timestamp on the entry to our threshold value.
   387  				if sinceUnixMicro != 0 && sinceUnixMicro > uint64(stamp) {
   388  					break
   389  				}
   390  			}
   391  			lines--
   392  			// If we're at the start of the journal, or
   393  			// don't need to back up past any more entries,
   394  			// stop.
   395  			if lines == 0 || C.sd_journal_previous(j) <= 0 {
   396  				break
   397  			}
   398  		}
   399  	} else {
   400  		// Start at the beginning of the journal.
   401  		if C.sd_journal_seek_head(j) < 0 {
   402  			logWatcher.Err <- fmt.Errorf("error seeking to start of journal")
   403  			return
   404  		}
   405  		// If we have a cutoff date, fast-forward to it.
   406  		if sinceUnixMicro != 0 && C.sd_journal_seek_realtime_usec(j, C.uint64_t(sinceUnixMicro)) != 0 {
   407  			logWatcher.Err <- fmt.Errorf("error seeking to start time in journal")
   408  			return
   409  		}
   410  		if C.sd_journal_next(j) < 0 {
   411  			logWatcher.Err <- fmt.Errorf("error skipping to next journal entry")
   412  			return
   413  		}
   414  	}
   415  	cursor, _ = s.drainJournal(logWatcher, j, nil, untilUnixMicro)
   416  	if config.Follow {
   417  		// Allocate a descriptor for following the journal, if we'll
   418  		// need one.  Do it here so that we can report if it fails.
   419  		if fd := C.sd_journal_get_fd(j); fd < C.int(0) {
   420  			logWatcher.Err <- fmt.Errorf("error opening journald follow descriptor: %q", C.GoString(C.strerror(-fd)))
   421  		} else {
   422  			// Create a pipe that we can poll at the same time as
   423  			// the journald descriptor.
   424  			if C.pipe(&pipes[0]) == C.int(-1) {
   425  				logWatcher.Err <- fmt.Errorf("error opening journald close notification pipe")
   426  			} else {
   427  				cursor = s.followJournal(logWatcher, j, pipes, cursor, untilUnixMicro)
   428  				// Let followJournal handle freeing the journal context
   429  				// object and closing the channel.
   430  				following = true
   431  			}
   432  		}
   433  	}
   434  
   435  	C.free(unsafe.Pointer(cursor))
   436  	return
   437  }
   438  
   439  func (s *journald) ReadLogs(config logger.ReadConfig) *logger.LogWatcher {
   440  	logWatcher := logger.NewLogWatcher()
   441  	go s.readLogs(logWatcher, config)
   442  	return logWatcher
   443  }