github.com/moby/docker@v26.1.3+incompatible/daemon/logger/journald/read.go (about) 1 //go:build linux && cgo && !static_build && journald 2 3 package journald // import "github.com/docker/docker/daemon/logger/journald" 4 5 import ( 6 "context" 7 "runtime" 8 "strconv" 9 "sync/atomic" 10 "time" 11 12 "github.com/containerd/log" 13 "github.com/coreos/go-systemd/v22/journal" 14 "github.com/docker/docker/api/types/backend" 15 "github.com/docker/docker/daemon/logger" 16 "github.com/docker/docker/daemon/logger/journald/internal/sdjournal" 17 ) 18 19 const ( 20 closedDrainTimeout = 5 * time.Second 21 waitInterval = 250 * time.Millisecond 22 ) 23 24 // Fields which we know are not user-provided attribute fields. 25 var wellKnownFields = map[string]bool{ 26 "MESSAGE": true, 27 "MESSAGE_ID": true, 28 "PRIORITY": true, 29 "CODE_FILE": true, 30 "CODE_LINE": true, 31 "CODE_FUNC": true, 32 "ERRNO": true, 33 "SYSLOG_FACILITY": true, 34 fieldSyslogIdentifier: true, 35 "SYSLOG_PID": true, 36 fieldSyslogTimestamp: true, 37 fieldContainerName: true, 38 fieldContainerID: true, 39 fieldContainerIDFull: true, 40 fieldContainerTag: true, 41 fieldImageName: true, 42 fieldPLogID: true, 43 fieldPLogOrdinal: true, 44 fieldPLogLast: true, 45 fieldPartialMessage: true, 46 fieldLogEpoch: true, 47 fieldLogOrdinal: true, 48 } 49 50 type reader struct { 51 s *journald 52 j *sdjournal.Journal 53 logWatcher *logger.LogWatcher 54 config logger.ReadConfig 55 maxOrdinal uint64 56 ready chan struct{} 57 drainDeadline time.Time 58 } 59 60 func getMessage(d map[string]string) (line []byte, ok bool) { 61 m, ok := d["MESSAGE"] 62 if ok { 63 line = []byte(m) 64 if d[fieldPartialMessage] != "true" { 65 line = append(line, "\n"...) 66 } 67 } 68 return line, ok 69 } 70 71 func getPriority(d map[string]string) (journal.Priority, bool) { 72 if pri, ok := d["PRIORITY"]; ok { 73 i, err := strconv.Atoi(pri) 74 return journal.Priority(i), err == nil 75 } 76 return -1, false 77 } 78 79 // getSource recovers the stream name from the entry data by mapping from the 80 // journal priority field back to the stream that we would have assigned that 81 // value. 82 func getSource(d map[string]string) string { 83 source := "" 84 if priority, ok := getPriority(d); ok { 85 if priority == journal.PriErr { 86 source = "stderr" 87 } else if priority == journal.PriInfo { 88 source = "stdout" 89 } 90 } 91 return source 92 } 93 94 func getAttrs(d map[string]string) []backend.LogAttr { 95 var attrs []backend.LogAttr 96 for k, v := range d { 97 if k[0] != '_' && !wellKnownFields[k] { 98 attrs = append(attrs, backend.LogAttr{Key: k, Value: v}) 99 } 100 } 101 return attrs 102 } 103 104 // The SeekXYZ() methods all move the journal read pointer to a "conceptual" 105 // position which does not correspond to any journal entry. A subsequent call to 106 // Next(), Previous() or similar is necessary to resolve the read pointer to a 107 // discrete entry. 108 // https://github.com/systemd/systemd/pull/5930#issuecomment-300878104 109 // But that's not all! If there is no discrete entry to resolve the position to, 110 // the call to Next() or Previous() will just leave the read pointer in a 111 // conceptual position, or do something even more bizarre. 112 // https://github.com/systemd/systemd/issues/9934 113 114 // initialSeekHead positions the journal read pointer at the earliest journal 115 // entry with a timestamp of at least r.config.Since. It returns true if there 116 // is an entry to read at the read pointer. 117 func (r *reader) initialSeekHead() (bool, error) { 118 var err error 119 if r.config.Since.IsZero() { 120 err = r.j.SeekHead() 121 } else { 122 err = r.j.SeekRealtime(r.config.Since) 123 } 124 if err != nil { 125 return false, err 126 } 127 return r.j.Next() 128 } 129 130 // initialSeekTail positions the journal read pointer at a journal entry 131 // relative to the tail of the journal at the time of the call based on the 132 // specification in r.config. It returns true if there is an entry to read at 133 // the read pointer. Otherwise the read pointer is set to a conceptual position 134 // which will be resolved to the desired entry (once written) by advancing 135 // forward with r.j.Next() or similar. 136 func (r *reader) initialSeekTail() (bool, error) { 137 var err error 138 if r.config.Until.IsZero() { 139 err = r.j.SeekTail() 140 } else { 141 err = r.j.SeekRealtime(r.config.Until) 142 } 143 if err != nil { 144 return false, err 145 } 146 147 var ok bool 148 if r.config.Tail == 0 { 149 ok, err = r.j.Previous() 150 } else { 151 var n int 152 n, err = r.j.PreviousSkip(uint(r.config.Tail)) 153 ok = n > 0 154 } 155 if err != nil { 156 return ok, err 157 } 158 if !ok { 159 // The (filtered) journal has no entries. The tail is the head: all new 160 // entries which get written into the journal from this point forward 161 // should be read from the journal. However the read pointer is 162 // positioned at a conceptual position which is not condusive to reading 163 // those entries. The tail of the journal is resolved to the last entry 164 // in the journal _at the time of the first successful Previous() call_, 165 // which means that an arbitrary number of journal entries added in the 166 // interim may be skipped: race condition. While the realtime conceptual 167 // position is not so racy, it is also unhelpful: it is the timestamp 168 // past where reading should stop, so all logs that should be followed 169 // would be skipped over. 170 // Reset the read pointer position to avoid these problems. 171 return r.initialSeekHead() 172 } else if r.config.Tail == 0 { 173 // The journal read pointer is positioned at the discrete position of 174 // the journal entry _before_ the entry to send. 175 return r.j.Next() 176 } 177 178 // Check if the PreviousSkip went too far back. 179 timestamp, err := r.j.Realtime() 180 if err != nil { 181 return false, err 182 } 183 if timestamp.Before(r.config.Since) { 184 if err := r.j.SeekRealtime(r.config.Since); err != nil { 185 return false, err 186 } 187 return r.j.Next() 188 } 189 return true, nil 190 } 191 192 // wait blocks until the journal has new data to read, the reader's drain 193 // deadline is exceeded, or the log reading consumer is gone. 194 func (r *reader) wait() (bool, error) { 195 for { 196 dur := waitInterval 197 if !r.drainDeadline.IsZero() { 198 dur = time.Until(r.drainDeadline) 199 if dur < 0 { 200 // Container is gone but we haven't found the end of the 201 // logs before the deadline. Maybe it was dropped by 202 // journald, e.g. due to rate-limiting. 203 return false, nil 204 } else if dur > waitInterval { 205 dur = waitInterval 206 } 207 } 208 status, err := r.j.Wait(dur) 209 if err != nil { 210 return false, err 211 } else if status != sdjournal.StatusNOP { 212 return true, nil 213 } 214 select { 215 case <-r.logWatcher.WatchConsumerGone(): 216 return false, nil 217 case <-r.s.closed: 218 // Container is gone; don't wait indefinitely for journal entries that will never arrive. 219 if r.maxOrdinal >= atomic.LoadUint64(&r.s.ordinal) { 220 return false, nil 221 } 222 if r.drainDeadline.IsZero() { 223 r.drainDeadline = time.Now().Add(closedDrainTimeout) 224 } 225 default: 226 } 227 } 228 } 229 230 // nextWait blocks until there is a new journal entry to read, and advances the 231 // journal read pointer to it. 232 func (r *reader) nextWait() (bool, error) { 233 for { 234 if ok, err := r.j.Next(); err != nil || ok { 235 return ok, err 236 } 237 if ok, err := r.wait(); err != nil || !ok { 238 return false, err 239 } 240 } 241 } 242 243 // drainJournal reads and sends log messages from the journal, starting from the 244 // current read pointer, until the end of the journal or a terminal stopping 245 // condition is reached. 246 // 247 // It returns false when a terminal stopping condition has been reached: 248 // - the watch consumer is gone, or 249 // - (if until is nonzero) a log entry is read which has a timestamp after 250 // until 251 func (r *reader) drainJournal() (bool, error) { 252 for i := 0; ; i++ { 253 // Read the entry's timestamp. 254 timestamp, err := r.j.Realtime() 255 if err != nil { 256 return true, err 257 } 258 if !r.config.Until.IsZero() && r.config.Until.Before(timestamp) { 259 return false, nil 260 } 261 262 // Read and send the logged message, if there is one to read. 263 data, err := r.j.Data() 264 if err != nil { 265 return true, err 266 } 267 268 if data[fieldLogEpoch] == r.s.epoch { 269 seq, err := strconv.ParseUint(data[fieldLogOrdinal], 10, 64) 270 if err == nil && seq > r.maxOrdinal { 271 r.maxOrdinal = seq 272 } 273 } 274 275 if line, ok := getMessage(data); ok { 276 // Send the log message, unless the consumer is gone 277 msg := &logger.Message{ 278 Line: line, 279 Source: getSource(data), 280 Timestamp: timestamp.In(time.UTC), 281 Attrs: getAttrs(data), 282 } 283 // The daemon timestamp will differ from the "trusted" 284 // timestamp of when the event was received by journald. 285 // We can efficiently seek around the journal by the 286 // event timestamp, and the event timestamp is what 287 // journalctl displays. The daemon timestamp is just an 288 // application-supplied field with no special 289 // significance; libsystemd won't help us seek to the 290 // entry with the closest timestamp. 291 /* 292 if sts := data["SYSLOG_TIMESTAMP"]; sts != "" { 293 if tv, err := time.Parse(time.RFC3339Nano, sts); err == nil { 294 msg.Timestamp = tv 295 } 296 } 297 */ 298 select { 299 case <-r.logWatcher.WatchConsumerGone(): 300 return false, nil 301 case r.logWatcher.Msg <- msg: 302 } 303 } 304 305 // Call sd_journal_process() periodically during the processing loop 306 // to close any opened file descriptors for rotated (deleted) journal files. 307 if i != 0 && i%1024 == 0 { 308 if _, err := r.j.Process(); err != nil { 309 // log a warning but ignore it for now 310 log.G(context.TODO()).WithField("container", r.s.vars[fieldContainerIDFull]). 311 WithField("error", err). 312 Warn("journald: error processing journal") 313 } 314 } 315 316 if ok, err := r.j.Next(); err != nil || !ok { 317 return true, err 318 } 319 } 320 } 321 322 func (r *reader) readJournal() error { 323 caughtUp := atomic.LoadUint64(&r.s.ordinal) 324 if more, err := r.drainJournal(); err != nil || !more { 325 return err 326 } 327 328 if !r.config.Follow { 329 if r.s.readSyncTimeout == 0 { 330 return nil 331 } 332 r.drainDeadline = time.Now().Add(r.s.readSyncTimeout) 333 } 334 335 for { 336 select { 337 case <-r.s.closed: 338 // container is gone, drain journal 339 lastSeq := atomic.LoadUint64(&r.s.ordinal) 340 if r.maxOrdinal >= lastSeq { 341 // All caught up with the logger! 342 return nil 343 } 344 default: 345 } 346 347 if more, err := r.nextWait(); err != nil || !more { 348 return err 349 } 350 if more, err := r.drainJournal(); err != nil || !more { 351 return err 352 } 353 if !r.config.Follow && r.s.readSyncTimeout > 0 && r.maxOrdinal >= caughtUp { 354 return nil 355 } 356 } 357 } 358 359 func (r *reader) readLogs() { 360 defer close(r.logWatcher.Msg) 361 362 // Make sure the ready channel is closed in the event of an early 363 // return. 364 defer r.signalReady() 365 366 // Quoting https://www.freedesktop.org/software/systemd/man/sd-journal.html: 367 // Functions that operate on sd_journal objects are thread 368 // agnostic — given sd_journal pointer may only be used from one 369 // specific thread at all times (and it has to be the very same one 370 // during the entire lifetime of the object), but multiple, 371 // independent threads may use multiple, independent objects safely. 372 // 373 // sdjournal.Open returns a value which wraps an sd_journal pointer so 374 // we need to abide by those rules. 375 runtime.LockOSThread() 376 defer runtime.UnlockOSThread() 377 378 // Get a handle to the journal. 379 var err error 380 if r.s.journalReadDir != "" { 381 r.j, err = sdjournal.OpenDir(r.s.journalReadDir, 0) 382 } else { 383 r.j, err = sdjournal.Open(0) 384 } 385 if err != nil { 386 r.logWatcher.Err <- err 387 return 388 } 389 defer r.j.Close() 390 391 if r.config.Follow { 392 // Initialize library inotify watches early 393 if err := r.j.InitializeInotify(); err != nil { 394 r.logWatcher.Err <- err 395 return 396 } 397 } 398 399 // Remove limits on the size of data items that we'll retrieve. 400 if err := r.j.SetDataThreshold(0); err != nil { 401 r.logWatcher.Err <- err 402 return 403 } 404 // Add a match to have the library do the searching for us. 405 if err := r.j.AddMatch(fieldContainerIDFull, r.s.vars[fieldContainerIDFull]); err != nil { 406 r.logWatcher.Err <- err 407 return 408 } 409 410 var ok bool 411 if r.config.Tail >= 0 { 412 ok, err = r.initialSeekTail() 413 } else { 414 ok, err = r.initialSeekHead() 415 } 416 if err != nil { 417 r.logWatcher.Err <- err 418 return 419 } 420 r.signalReady() 421 if !ok { 422 if !r.config.Follow { 423 return 424 } 425 // Either the read pointer is positioned at a discrete journal entry, in 426 // which case the position will be unaffected by subsequent logging, or 427 // the read pointer is in the conceptual position corresponding to the 428 // first journal entry to send once it is logged in the future. 429 if more, err := r.nextWait(); err != nil || !more { 430 if err != nil { 431 r.logWatcher.Err <- err 432 } 433 return 434 } 435 } 436 437 if err := r.readJournal(); err != nil { 438 r.logWatcher.Err <- err 439 return 440 } 441 } 442 443 func (r *reader) signalReady() { 444 select { 445 case <-r.ready: 446 default: 447 close(r.ready) 448 } 449 } 450 451 func (s *journald) ReadLogs(config logger.ReadConfig) *logger.LogWatcher { 452 r := &reader{ 453 s: s, 454 logWatcher: logger.NewLogWatcher(), 455 config: config, 456 ready: make(chan struct{}), 457 } 458 go r.readLogs() 459 // Block until the reader is in position to read from the current config 460 // location to prevent race conditions in tests. 461 <-r.ready 462 return r.logWatcher 463 } 464 465 func waitUntilFlushedImpl(s *journald) error { 466 if s.readSyncTimeout == 0 { 467 return nil 468 } 469 470 ordinal := atomic.LoadUint64(&s.ordinal) 471 if ordinal == 0 { 472 return nil // No logs were sent; nothing to wait for. 473 } 474 475 flushed := make(chan error) 476 go func() { 477 defer close(flushed) 478 runtime.LockOSThread() 479 defer runtime.UnlockOSThread() 480 481 var ( 482 j *sdjournal.Journal 483 err error 484 ) 485 if s.journalReadDir != "" { 486 j, err = sdjournal.OpenDir(s.journalReadDir, 0) 487 } else { 488 j, err = sdjournal.Open(0) 489 } 490 if err != nil { 491 flushed <- err 492 return 493 } 494 defer j.Close() 495 496 if err := j.AddMatch(fieldContainerIDFull, s.vars[fieldContainerIDFull]); err != nil { 497 flushed <- err 498 return 499 } 500 if err := j.AddMatch(fieldLogEpoch, s.epoch); err != nil { 501 flushed <- err 502 return 503 } 504 if err := j.AddMatch(fieldLogOrdinal, strconv.FormatUint(ordinal, 10)); err != nil { 505 flushed <- err 506 return 507 } 508 509 deadline := time.Now().Add(s.readSyncTimeout) 510 for time.Now().Before(deadline) { 511 if ok, err := j.Next(); ok { 512 // Found it! 513 return 514 } else if err != nil { 515 flushed <- err 516 return 517 } 518 if _, err := j.Wait(100 * time.Millisecond); err != nil { 519 flushed <- err 520 return 521 } 522 } 523 log.G(context.TODO()).WithField("container", s.vars[fieldContainerIDFull]). 524 Warn("journald: deadline exceeded waiting for logs to be committed to journal") 525 }() 526 return <-flushed 527 } 528 529 func init() { 530 waitUntilFlushed = waitUntilFlushedImpl 531 }