github.com/jwhonce/docker@v0.6.7-0.20190327063223-da823cf3a5a3/daemon/logger/journald/read.go (about) 1 // +build linux,cgo,!static_build,journald 2 3 package journald // import "github.com/docker/docker/daemon/logger/journald" 4 5 // #include <sys/types.h> 6 // #include <sys/poll.h> 7 // #include <systemd/sd-journal.h> 8 // #include <errno.h> 9 // #include <stdio.h> 10 // #include <stdlib.h> 11 // #include <string.h> 12 // #include <time.h> 13 // #include <unistd.h> 14 // 15 //static int get_message(sd_journal *j, const char **msg, size_t *length, int *partial) 16 //{ 17 // int rc; 18 // size_t plength; 19 // *msg = NULL; 20 // *length = 0; 21 // plength = strlen("CONTAINER_PARTIAL_MESSAGE=true"); 22 // rc = sd_journal_get_data(j, "CONTAINER_PARTIAL_MESSAGE", (const void **) msg, length); 23 // *partial = ((rc == 0) && (*length == plength) && (memcmp(*msg, "CONTAINER_PARTIAL_MESSAGE=true", plength) == 0)); 24 // rc = sd_journal_get_data(j, "MESSAGE", (const void **) msg, length); 25 // if (rc == 0) { 26 // if (*length > 8) { 27 // (*msg) += 8; 28 // *length -= 8; 29 // } else { 30 // *msg = NULL; 31 // *length = 0; 32 // rc = -ENOENT; 33 // } 34 // } 35 // return rc; 36 //} 37 //static int get_priority(sd_journal *j, int *priority) 38 //{ 39 // const void *data; 40 // size_t i, length; 41 // int rc; 42 // *priority = -1; 43 // rc = sd_journal_get_data(j, "PRIORITY", &data, &length); 44 // if (rc == 0) { 45 // if ((length > 9) && (strncmp(data, "PRIORITY=", 9) == 0)) { 46 // *priority = 0; 47 // for (i = 9; i < length; i++) { 48 // *priority = *priority * 10 + ((const char *)data)[i] - '0'; 49 // } 50 // if (length > 9) { 51 // rc = 0; 52 // } 53 // } 54 // } 55 // return rc; 56 //} 57 //static int is_attribute_field(const char *msg, size_t length) 58 //{ 59 // static const struct known_field { 60 // const char *name; 61 // size_t length; 62 // } fields[] = { 63 // {"MESSAGE", sizeof("MESSAGE") - 1}, 64 // {"MESSAGE_ID", sizeof("MESSAGE_ID") - 1}, 65 // {"PRIORITY", sizeof("PRIORITY") - 1}, 66 // {"CODE_FILE", sizeof("CODE_FILE") - 1}, 67 // {"CODE_LINE", sizeof("CODE_LINE") - 1}, 68 // {"CODE_FUNC", sizeof("CODE_FUNC") - 1}, 69 // {"ERRNO", sizeof("ERRNO") - 1}, 70 // {"SYSLOG_FACILITY", sizeof("SYSLOG_FACILITY") - 1}, 71 // {"SYSLOG_IDENTIFIER", sizeof("SYSLOG_IDENTIFIER") - 1}, 72 // {"SYSLOG_PID", sizeof("SYSLOG_PID") - 1}, 73 // {"CONTAINER_NAME", sizeof("CONTAINER_NAME") - 1}, 74 // {"CONTAINER_ID", sizeof("CONTAINER_ID") - 1}, 75 // {"CONTAINER_ID_FULL", sizeof("CONTAINER_ID_FULL") - 1}, 76 // {"CONTAINER_TAG", sizeof("CONTAINER_TAG") - 1}, 77 // }; 78 // unsigned int i; 79 // void *p; 80 // if ((length < 1) || (msg[0] == '_') || ((p = memchr(msg, '=', length)) == NULL)) { 81 // return -1; 82 // } 83 // length = ((const char *) p) - msg; 84 // for (i = 0; i < sizeof(fields) / sizeof(fields[0]); i++) { 85 // if ((fields[i].length == length) && (memcmp(fields[i].name, msg, length) == 0)) { 86 // return -1; 87 // } 88 // } 89 // return 0; 90 //} 91 //static int get_attribute_field(sd_journal *j, const char **msg, size_t *length) 92 //{ 93 // int rc; 94 // *msg = NULL; 95 // *length = 0; 96 // while ((rc = sd_journal_enumerate_data(j, (const void **) msg, length)) > 0) { 97 // if (is_attribute_field(*msg, *length) == 0) { 98 // break; 99 // } 100 // rc = -ENOENT; 101 // } 102 // return rc; 103 //} 104 //static int wait_for_data_cancelable(sd_journal *j, int pipefd) 105 //{ 106 // struct pollfd fds[2]; 107 // uint64_t when = 0; 108 // int timeout, jevents, i; 109 // struct timespec ts; 110 // uint64_t now; 111 // 112 // memset(&fds, 0, sizeof(fds)); 113 // fds[0].fd = pipefd; 114 // fds[0].events = POLLHUP; 115 // fds[1].fd = sd_journal_get_fd(j); 116 // if (fds[1].fd < 0) { 117 // return fds[1].fd; 118 // } 119 // 120 // do { 121 // jevents = sd_journal_get_events(j); 122 // if (jevents < 0) { 123 // return jevents; 124 // } 125 // fds[1].events = jevents; 126 // sd_journal_get_timeout(j, &when); 127 // if (when == -1) { 128 // timeout = -1; 129 // } else { 130 // clock_gettime(CLOCK_MONOTONIC, &ts); 131 // now = (uint64_t) ts.tv_sec * 1000000 + ts.tv_nsec / 1000; 132 // timeout = when > now ? (int) ((when - now + 999) / 1000) : 0; 133 // } 134 // i = poll(fds, 2, timeout); 135 // if ((i == -1) && (errno != EINTR)) { 136 // /* An unexpected error. */ 137 // return (errno != 0) ? -errno : -EINTR; 138 // } 139 // if (fds[0].revents & POLLHUP) { 140 // /* The close notification pipe was closed. */ 141 // return 0; 142 // } 143 // if (sd_journal_process(j) == SD_JOURNAL_APPEND) { 144 // /* Data, which we might care about, was appended. */ 145 // return 1; 146 // } 147 // } while ((fds[0].revents & POLLHUP) == 0); 148 // return 0; 149 //} 150 import "C" 151 152 import ( 153 "fmt" 154 "strings" 155 "time" 156 "unsafe" 157 158 "github.com/coreos/go-systemd/journal" 159 "github.com/docker/docker/api/types/backend" 160 "github.com/docker/docker/daemon/logger" 161 "github.com/sirupsen/logrus" 162 ) 163 164 func (s *journald) Close() error { 165 s.mu.Lock() 166 s.closed = true 167 for r := range s.readers { 168 r.ProducerGone() 169 delete(s.readers, r) 170 171 } 172 s.mu.Unlock() 173 return nil 174 } 175 176 func (s *journald) drainJournal(logWatcher *logger.LogWatcher, j *C.sd_journal, oldCursor *C.char, untilUnixMicro uint64) (*C.char, bool) { 177 var msg, data, cursor *C.char 178 var length C.size_t 179 var stamp C.uint64_t 180 var priority, partial C.int 181 var done bool 182 183 // Walk the journal from here forward until we run out of new entries 184 // or we reach the until value (if provided). 185 drain: 186 for { 187 // Try not to send a given entry twice. 188 if oldCursor != nil { 189 for C.sd_journal_test_cursor(j, oldCursor) > 0 { 190 if C.sd_journal_next(j) <= 0 { 191 break drain 192 } 193 } 194 } 195 // Read and send the logged message, if there is one to read. 196 i := C.get_message(j, &msg, &length, &partial) 197 if i != -C.ENOENT && i != -C.EADDRNOTAVAIL { 198 // Read the entry's timestamp. 199 if C.sd_journal_get_realtime_usec(j, &stamp) != 0 { 200 break 201 } 202 // Break if the timestamp exceeds any provided until flag. 203 if untilUnixMicro != 0 && untilUnixMicro < uint64(stamp) { 204 done = true 205 break 206 } 207 208 // Set up the time and text of the entry. 209 timestamp := time.Unix(int64(stamp)/1000000, (int64(stamp)%1000000)*1000) 210 line := C.GoBytes(unsafe.Pointer(msg), C.int(length)) 211 if partial == 0 { 212 line = append(line, "\n"...) 213 } 214 // Recover the stream name by mapping 215 // from the journal priority back to 216 // the stream that we would have 217 // assigned that value. 218 source := "" 219 if C.get_priority(j, &priority) != 0 { 220 source = "" 221 } else if priority == C.int(journal.PriErr) { 222 source = "stderr" 223 } else if priority == C.int(journal.PriInfo) { 224 source = "stdout" 225 } 226 // Retrieve the values of any variables we're adding to the journal. 227 var attrs []backend.LogAttr 228 C.sd_journal_restart_data(j) 229 for C.get_attribute_field(j, &data, &length) > C.int(0) { 230 kv := strings.SplitN(C.GoStringN(data, C.int(length)), "=", 2) 231 attrs = append(attrs, backend.LogAttr{Key: kv[0], Value: kv[1]}) 232 } 233 // Send the log message. 234 logWatcher.Msg <- &logger.Message{ 235 Line: line, 236 Source: source, 237 Timestamp: timestamp.In(time.UTC), 238 Attrs: attrs, 239 } 240 } 241 // If we're at the end of the journal, we're done (for now). 242 if C.sd_journal_next(j) <= 0 { 243 break 244 } 245 } 246 247 // free(NULL) is safe 248 C.free(unsafe.Pointer(oldCursor)) 249 if C.sd_journal_get_cursor(j, &cursor) != 0 { 250 // ensure that we won't be freeing an address that's invalid 251 cursor = nil 252 } 253 return cursor, done 254 } 255 256 func (s *journald) followJournal(logWatcher *logger.LogWatcher, j *C.sd_journal, pfd [2]C.int, cursor *C.char, untilUnixMicro uint64) *C.char { 257 s.mu.Lock() 258 s.readers[logWatcher] = struct{}{} 259 if s.closed { 260 // the journald Logger is closed, presumably because the container has been 261 // reset. So we shouldn't follow, because we'll never be woken up. But we 262 // should make one more drainJournal call to be sure we've got all the logs. 263 // Close pfd[1] so that one drainJournal happens, then cleanup, then return. 264 C.close(pfd[1]) 265 } 266 s.mu.Unlock() 267 268 newCursor := make(chan *C.char) 269 270 go func() { 271 for { 272 // Keep copying journal data out until we're notified to stop 273 // or we hit an error. 274 status := C.wait_for_data_cancelable(j, pfd[0]) 275 if status < 0 { 276 cerrstr := C.strerror(C.int(-status)) 277 errstr := C.GoString(cerrstr) 278 fmtstr := "error %q while attempting to follow journal for container %q" 279 logrus.Errorf(fmtstr, errstr, s.vars["CONTAINER_ID_FULL"]) 280 break 281 } 282 283 var done bool 284 cursor, done = s.drainJournal(logWatcher, j, cursor, untilUnixMicro) 285 286 if status != 1 || done { 287 // We were notified to stop 288 break 289 } 290 } 291 292 // Clean up. 293 C.close(pfd[0]) 294 s.mu.Lock() 295 delete(s.readers, logWatcher) 296 s.mu.Unlock() 297 close(logWatcher.Msg) 298 newCursor <- cursor 299 }() 300 301 // Wait until we're told to stop. 302 select { 303 case cursor = <-newCursor: 304 case <-logWatcher.WatchConsumerGone(): 305 // Notify the other goroutine that its work is done. 306 C.close(pfd[1]) 307 cursor = <-newCursor 308 } 309 310 return cursor 311 } 312 313 func (s *journald) readLogs(logWatcher *logger.LogWatcher, config logger.ReadConfig) { 314 var j *C.sd_journal 315 var cmatch, cursor *C.char 316 var stamp C.uint64_t 317 var sinceUnixMicro uint64 318 var untilUnixMicro uint64 319 var pipes [2]C.int 320 321 // Get a handle to the journal. 322 rc := C.sd_journal_open(&j, C.int(0)) 323 if rc != 0 { 324 logWatcher.Err <- fmt.Errorf("error opening journal") 325 close(logWatcher.Msg) 326 return 327 } 328 // If we end up following the log, we can set the journal context 329 // pointer and the channel pointer to nil so that we won't close them 330 // here, potentially while the goroutine that uses them is still 331 // running. Otherwise, close them when we return from this function. 332 following := false 333 defer func(pfollowing *bool) { 334 if !*pfollowing { 335 close(logWatcher.Msg) 336 } 337 C.sd_journal_close(j) 338 }(&following) 339 // Remove limits on the size of data items that we'll retrieve. 340 rc = C.sd_journal_set_data_threshold(j, C.size_t(0)) 341 if rc != 0 { 342 logWatcher.Err <- fmt.Errorf("error setting journal data threshold") 343 return 344 } 345 // Add a match to have the library do the searching for us. 346 cmatch = C.CString("CONTAINER_ID_FULL=" + s.vars["CONTAINER_ID_FULL"]) 347 defer C.free(unsafe.Pointer(cmatch)) 348 rc = C.sd_journal_add_match(j, unsafe.Pointer(cmatch), C.strlen(cmatch)) 349 if rc != 0 { 350 logWatcher.Err <- fmt.Errorf("error setting journal match") 351 return 352 } 353 // If we have a cutoff time, convert it to Unix time once. 354 if !config.Since.IsZero() { 355 nano := config.Since.UnixNano() 356 sinceUnixMicro = uint64(nano / 1000) 357 } 358 // If we have an until value, convert it too 359 if !config.Until.IsZero() { 360 nano := config.Until.UnixNano() 361 untilUnixMicro = uint64(nano / 1000) 362 } 363 if config.Tail > 0 { 364 lines := config.Tail 365 // If until time provided, start from there. 366 // Otherwise start at the end of the journal. 367 if untilUnixMicro != 0 && C.sd_journal_seek_realtime_usec(j, C.uint64_t(untilUnixMicro)) < 0 { 368 logWatcher.Err <- fmt.Errorf("error seeking provided until value") 369 return 370 } else if C.sd_journal_seek_tail(j) < 0 { 371 logWatcher.Err <- fmt.Errorf("error seeking to end of journal") 372 return 373 } 374 if C.sd_journal_previous(j) < 0 { 375 logWatcher.Err <- fmt.Errorf("error backtracking to previous journal entry") 376 return 377 } 378 // Walk backward. 379 for lines > 0 { 380 // Stop if the entry time is before our cutoff. 381 // We'll need the entry time if it isn't, so go 382 // ahead and parse it now. 383 if C.sd_journal_get_realtime_usec(j, &stamp) != 0 { 384 break 385 } else { 386 // Compare the timestamp on the entry to our threshold value. 387 if sinceUnixMicro != 0 && sinceUnixMicro > uint64(stamp) { 388 break 389 } 390 } 391 lines-- 392 // If we're at the start of the journal, or 393 // don't need to back up past any more entries, 394 // stop. 395 if lines == 0 || C.sd_journal_previous(j) <= 0 { 396 break 397 } 398 } 399 } else { 400 // Start at the beginning of the journal. 401 if C.sd_journal_seek_head(j) < 0 { 402 logWatcher.Err <- fmt.Errorf("error seeking to start of journal") 403 return 404 } 405 // If we have a cutoff date, fast-forward to it. 406 if sinceUnixMicro != 0 && C.sd_journal_seek_realtime_usec(j, C.uint64_t(sinceUnixMicro)) != 0 { 407 logWatcher.Err <- fmt.Errorf("error seeking to start time in journal") 408 return 409 } 410 if C.sd_journal_next(j) < 0 { 411 logWatcher.Err <- fmt.Errorf("error skipping to next journal entry") 412 return 413 } 414 } 415 cursor, _ = s.drainJournal(logWatcher, j, nil, untilUnixMicro) 416 if config.Follow { 417 // Allocate a descriptor for following the journal, if we'll 418 // need one. Do it here so that we can report if it fails. 419 if fd := C.sd_journal_get_fd(j); fd < C.int(0) { 420 logWatcher.Err <- fmt.Errorf("error opening journald follow descriptor: %q", C.GoString(C.strerror(-fd))) 421 } else { 422 // Create a pipe that we can poll at the same time as 423 // the journald descriptor. 424 if C.pipe(&pipes[0]) == C.int(-1) { 425 logWatcher.Err <- fmt.Errorf("error opening journald close notification pipe") 426 } else { 427 cursor = s.followJournal(logWatcher, j, pipes, cursor, untilUnixMicro) 428 // Let followJournal handle freeing the journal context 429 // object and closing the channel. 430 following = true 431 } 432 } 433 } 434 435 C.free(unsafe.Pointer(cursor)) 436 return 437 } 438 439 func (s *journald) ReadLogs(config logger.ReadConfig) *logger.LogWatcher { 440 logWatcher := logger.NewLogWatcher() 441 go s.readLogs(logWatcher, config) 442 return logWatcher 443 }