github.com/dpiddy/docker@v1.12.2-rc1/daemon/logger/journald/read.go (about) 1 // +build linux,cgo,!static_build,journald 2 3 package journald 4 5 // #include <sys/types.h> 6 // #include <sys/poll.h> 7 // #include <systemd/sd-journal.h> 8 // #include <errno.h> 9 // #include <stdio.h> 10 // #include <stdlib.h> 11 // #include <string.h> 12 // #include <time.h> 13 // #include <unistd.h> 14 // 15 //static int get_message(sd_journal *j, const char **msg, size_t *length) 16 //{ 17 // int rc; 18 // *msg = NULL; 19 // *length = 0; 20 // rc = sd_journal_get_data(j, "MESSAGE", (const void **) msg, length); 21 // if (rc == 0) { 22 // if (*length > 8) { 23 // (*msg) += 8; 24 // *length -= 8; 25 // } else { 26 // *msg = NULL; 27 // *length = 0; 28 // rc = -ENOENT; 29 // } 30 // } 31 // return rc; 32 //} 33 //static int get_priority(sd_journal *j, int *priority) 34 //{ 35 // const void *data; 36 // size_t i, length; 37 // int rc; 38 // *priority = -1; 39 // rc = sd_journal_get_data(j, "PRIORITY", &data, &length); 40 // if (rc == 0) { 41 // if ((length > 9) && (strncmp(data, "PRIORITY=", 9) == 0)) { 42 // *priority = 0; 43 // for (i = 9; i < length; i++) { 44 // *priority = *priority * 10 + ((const char *)data)[i] - '0'; 45 // } 46 // if (length > 9) { 47 // rc = 0; 48 // } 49 // } 50 // } 51 // return rc; 52 //} 53 //static int is_attribute_field(const char *msg, size_t length) 54 //{ 55 // const struct known_field { 56 // const char *name; 57 // size_t length; 58 // } fields[] = { 59 // {"MESSAGE", sizeof("MESSAGE") - 1}, 60 // {"MESSAGE_ID", sizeof("MESSAGE_ID") - 1}, 61 // {"PRIORITY", sizeof("PRIORITY") - 1}, 62 // {"CODE_FILE", sizeof("CODE_FILE") - 1}, 63 // {"CODE_LINE", sizeof("CODE_LINE") - 1}, 64 // {"CODE_FUNC", sizeof("CODE_FUNC") - 1}, 65 // {"ERRNO", sizeof("ERRNO") - 1}, 66 // {"SYSLOG_FACILITY", sizeof("SYSLOG_FACILITY") - 1}, 67 // {"SYSLOG_IDENTIFIER", sizeof("SYSLOG_IDENTIFIER") - 1}, 68 // {"SYSLOG_PID", sizeof("SYSLOG_PID") - 1}, 69 // {"CONTAINER_NAME", sizeof("CONTAINER_NAME") - 1}, 70 // {"CONTAINER_ID", sizeof("CONTAINER_ID") - 1}, 71 // {"CONTAINER_ID_FULL", sizeof("CONTAINER_ID_FULL") - 1}, 72 // {"CONTAINER_TAG", sizeof("CONTAINER_TAG") - 1}, 73 // }; 74 // unsigned int i; 75 // void *p; 76 // if ((length < 1) || (msg[0] == '_') || ((p = memchr(msg, '=', length)) == NULL)) { 77 // return -1; 78 // } 79 // length = ((const char *) p) - msg; 80 // for (i = 0; i < sizeof(fields) / sizeof(fields[0]); i++) { 81 // if ((fields[i].length == length) && (memcmp(fields[i].name, msg, length) == 0)) { 82 // return -1; 83 // } 84 // } 85 // return 0; 86 //} 87 //static int get_attribute_field(sd_journal *j, const char **msg, size_t *length) 88 //{ 89 // int rc; 90 // *msg = NULL; 91 // *length = 0; 92 // while ((rc = sd_journal_enumerate_data(j, (const void **) msg, length)) > 0) { 93 // if (is_attribute_field(*msg, *length) == 0) { 94 // break; 95 // } 96 // rc = -ENOENT; 97 // } 98 // return rc; 99 //} 100 //static int wait_for_data_or_close(sd_journal *j, int pipefd) 101 //{ 102 // struct pollfd fds[2]; 103 // uint64_t when = 0; 104 // int timeout, jevents, i; 105 // struct timespec ts; 106 // uint64_t now; 107 // do { 108 // memset(&fds, 0, sizeof(fds)); 109 // fds[0].fd = pipefd; 110 // fds[0].events = POLLHUP; 111 // fds[1].fd = sd_journal_get_fd(j); 112 // if (fds[1].fd < 0) { 113 // return fds[1].fd; 114 // } 115 // jevents = sd_journal_get_events(j); 116 // if (jevents < 0) { 117 // return jevents; 118 // } 119 // fds[1].events = jevents; 120 // sd_journal_get_timeout(j, &when); 121 // if (when == -1) { 122 // timeout = -1; 123 // } else { 124 // clock_gettime(CLOCK_MONOTONIC, &ts); 125 // now = (uint64_t) ts.tv_sec * 1000000 + ts.tv_nsec / 1000; 126 // timeout = when > now ? (int) ((when - now + 999) / 1000) : 0; 127 // } 128 // i = poll(fds, 2, timeout); 129 // if ((i == -1) && (errno != EINTR)) { 130 // /* An unexpected error. */ 131 // return (errno != 0) ? -errno : -EINTR; 132 // } 133 // if (fds[0].revents & POLLHUP) { 134 // /* The close notification pipe was closed. */ 135 // return 0; 136 // } 137 // if (sd_journal_process(j) == SD_JOURNAL_APPEND) { 138 // /* Data, which we might care about, was appended. */ 139 // return 1; 140 // } 141 // } while ((fds[0].revents & POLLHUP) == 0); 142 // return 0; 143 //} 144 import "C" 145 146 import ( 147 "fmt" 148 "strings" 149 "time" 150 "unsafe" 151 152 "github.com/Sirupsen/logrus" 153 "github.com/coreos/go-systemd/journal" 154 "github.com/docker/docker/daemon/logger" 155 ) 156 157 func (s *journald) Close() error { 158 s.readers.mu.Lock() 159 for reader := range s.readers.readers { 160 reader.Close() 161 } 162 s.readers.mu.Unlock() 163 return nil 164 } 165 166 func (s *journald) drainJournal(logWatcher *logger.LogWatcher, config logger.ReadConfig, j *C.sd_journal, oldCursor string) string { 167 var msg, data, cursor *C.char 168 var length C.size_t 169 var stamp C.uint64_t 170 var priority C.int 171 172 // Walk the journal from here forward until we run out of new entries. 173 drain: 174 for { 175 // Try not to send a given entry twice. 176 if oldCursor != "" { 177 ccursor := C.CString(oldCursor) 178 defer C.free(unsafe.Pointer(ccursor)) 179 for C.sd_journal_test_cursor(j, ccursor) > 0 { 180 if C.sd_journal_next(j) <= 0 { 181 break drain 182 } 183 } 184 } 185 // Read and send the logged message, if there is one to read. 186 i := C.get_message(j, &msg, &length) 187 if i != -C.ENOENT && i != -C.EADDRNOTAVAIL { 188 // Read the entry's timestamp. 189 if C.sd_journal_get_realtime_usec(j, &stamp) != 0 { 190 break 191 } 192 // Set up the time and text of the entry. 193 timestamp := time.Unix(int64(stamp)/1000000, (int64(stamp)%1000000)*1000) 194 line := append(C.GoBytes(unsafe.Pointer(msg), C.int(length)), "\n"...) 195 // Recover the stream name by mapping 196 // from the journal priority back to 197 // the stream that we would have 198 // assigned that value. 199 source := "" 200 if C.get_priority(j, &priority) != 0 { 201 source = "" 202 } else if priority == C.int(journal.PriErr) { 203 source = "stderr" 204 } else if priority == C.int(journal.PriInfo) { 205 source = "stdout" 206 } 207 // Retrieve the values of any variables we're adding to the journal. 208 attrs := make(map[string]string) 209 C.sd_journal_restart_data(j) 210 for C.get_attribute_field(j, &data, &length) > C.int(0) { 211 kv := strings.SplitN(C.GoStringN(data, C.int(length)), "=", 2) 212 attrs[kv[0]] = kv[1] 213 } 214 if len(attrs) == 0 { 215 attrs = nil 216 } 217 // Send the log message. 218 logWatcher.Msg <- &logger.Message{ 219 Line: line, 220 Source: source, 221 Timestamp: timestamp.In(time.UTC), 222 Attrs: attrs, 223 } 224 } 225 // If we're at the end of the journal, we're done (for now). 226 if C.sd_journal_next(j) <= 0 { 227 break 228 } 229 } 230 retCursor := "" 231 if C.sd_journal_get_cursor(j, &cursor) == 0 { 232 retCursor = C.GoString(cursor) 233 C.free(unsafe.Pointer(cursor)) 234 } 235 return retCursor 236 } 237 238 func (s *journald) followJournal(logWatcher *logger.LogWatcher, config logger.ReadConfig, j *C.sd_journal, pfd [2]C.int, cursor string) { 239 s.readers.mu.Lock() 240 s.readers.readers[logWatcher] = logWatcher 241 s.readers.mu.Unlock() 242 go func() { 243 // Keep copying journal data out until we're notified to stop 244 // or we hit an error. 245 status := C.wait_for_data_or_close(j, pfd[0]) 246 for status == 1 { 247 cursor = s.drainJournal(logWatcher, config, j, cursor) 248 status = C.wait_for_data_or_close(j, pfd[0]) 249 } 250 if status < 0 { 251 cerrstr := C.strerror(C.int(-status)) 252 errstr := C.GoString(cerrstr) 253 fmtstr := "error %q while attempting to follow journal for container %q" 254 logrus.Errorf(fmtstr, errstr, s.vars["CONTAINER_ID_FULL"]) 255 } 256 // Clean up. 257 C.close(pfd[0]) 258 s.readers.mu.Lock() 259 delete(s.readers.readers, logWatcher) 260 s.readers.mu.Unlock() 261 C.sd_journal_close(j) 262 close(logWatcher.Msg) 263 }() 264 // Wait until we're told to stop. 265 select { 266 case <-logWatcher.WatchClose(): 267 // Notify the other goroutine that its work is done. 268 C.close(pfd[1]) 269 } 270 } 271 272 func (s *journald) readLogs(logWatcher *logger.LogWatcher, config logger.ReadConfig) { 273 var j *C.sd_journal 274 var cmatch *C.char 275 var stamp C.uint64_t 276 var sinceUnixMicro uint64 277 var pipes [2]C.int 278 cursor := "" 279 280 // Get a handle to the journal. 281 rc := C.sd_journal_open(&j, C.int(0)) 282 if rc != 0 { 283 logWatcher.Err <- fmt.Errorf("error opening journal") 284 close(logWatcher.Msg) 285 return 286 } 287 // If we end up following the log, we can set the journal context 288 // pointer and the channel pointer to nil so that we won't close them 289 // here, potentially while the goroutine that uses them is still 290 // running. Otherwise, close them when we return from this function. 291 following := false 292 defer func(pfollowing *bool) { 293 if !*pfollowing { 294 C.sd_journal_close(j) 295 close(logWatcher.Msg) 296 } 297 }(&following) 298 // Remove limits on the size of data items that we'll retrieve. 299 rc = C.sd_journal_set_data_threshold(j, C.size_t(0)) 300 if rc != 0 { 301 logWatcher.Err <- fmt.Errorf("error setting journal data threshold") 302 return 303 } 304 // Add a match to have the library do the searching for us. 305 cmatch = C.CString("CONTAINER_ID_FULL=" + s.vars["CONTAINER_ID_FULL"]) 306 defer C.free(unsafe.Pointer(cmatch)) 307 rc = C.sd_journal_add_match(j, unsafe.Pointer(cmatch), C.strlen(cmatch)) 308 if rc != 0 { 309 logWatcher.Err <- fmt.Errorf("error setting journal match") 310 return 311 } 312 // If we have a cutoff time, convert it to Unix time once. 313 if !config.Since.IsZero() { 314 nano := config.Since.UnixNano() 315 sinceUnixMicro = uint64(nano / 1000) 316 } 317 if config.Tail > 0 { 318 lines := config.Tail 319 // Start at the end of the journal. 320 if C.sd_journal_seek_tail(j) < 0 { 321 logWatcher.Err <- fmt.Errorf("error seeking to end of journal") 322 return 323 } 324 if C.sd_journal_previous(j) < 0 { 325 logWatcher.Err <- fmt.Errorf("error backtracking to previous journal entry") 326 return 327 } 328 // Walk backward. 329 for lines > 0 { 330 // Stop if the entry time is before our cutoff. 331 // We'll need the entry time if it isn't, so go 332 // ahead and parse it now. 333 if C.sd_journal_get_realtime_usec(j, &stamp) != 0 { 334 break 335 } else { 336 // Compare the timestamp on the entry 337 // to our threshold value. 338 if sinceUnixMicro != 0 && sinceUnixMicro > uint64(stamp) { 339 break 340 } 341 } 342 lines-- 343 // If we're at the start of the journal, or 344 // don't need to back up past any more entries, 345 // stop. 346 if lines == 0 || C.sd_journal_previous(j) <= 0 { 347 break 348 } 349 } 350 } else { 351 // Start at the beginning of the journal. 352 if C.sd_journal_seek_head(j) < 0 { 353 logWatcher.Err <- fmt.Errorf("error seeking to start of journal") 354 return 355 } 356 // If we have a cutoff date, fast-forward to it. 357 if sinceUnixMicro != 0 && C.sd_journal_seek_realtime_usec(j, C.uint64_t(sinceUnixMicro)) != 0 { 358 logWatcher.Err <- fmt.Errorf("error seeking to start time in journal") 359 return 360 } 361 if C.sd_journal_next(j) < 0 { 362 logWatcher.Err <- fmt.Errorf("error skipping to next journal entry") 363 return 364 } 365 } 366 cursor = s.drainJournal(logWatcher, config, j, "") 367 if config.Follow { 368 // Allocate a descriptor for following the journal, if we'll 369 // need one. Do it here so that we can report if it fails. 370 if fd := C.sd_journal_get_fd(j); fd < C.int(0) { 371 logWatcher.Err <- fmt.Errorf("error opening journald follow descriptor: %q", C.GoString(C.strerror(-fd))) 372 } else { 373 // Create a pipe that we can poll at the same time as 374 // the journald descriptor. 375 if C.pipe(&pipes[0]) == C.int(-1) { 376 logWatcher.Err <- fmt.Errorf("error opening journald close notification pipe") 377 } else { 378 s.followJournal(logWatcher, config, j, pipes, cursor) 379 // Let followJournal handle freeing the journal context 380 // object and closing the channel. 381 following = true 382 } 383 } 384 } 385 return 386 } 387 388 func (s *journald) ReadLogs(config logger.ReadConfig) *logger.LogWatcher { 389 logWatcher := logger.NewLogWatcher() 390 go s.readLogs(logWatcher, config) 391 return logWatcher 392 }