github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/daemon/logger/journald/read.go (about) 1 //go:build linux && cgo && !static_build && journald 2 3 package journald // import "github.com/Prakhar-Agarwal-byte/moby/daemon/logger/journald" 4 5 import ( 6 "context" 7 "errors" 8 "runtime" 9 "strconv" 10 "sync/atomic" 11 "time" 12 13 "github.com/containerd/log" 14 "github.com/coreos/go-systemd/v22/journal" 15 "github.com/Prakhar-Agarwal-byte/moby/api/types/backend" 16 "github.com/Prakhar-Agarwal-byte/moby/daemon/logger" 17 "github.com/Prakhar-Agarwal-byte/moby/daemon/logger/journald/internal/sdjournal" 18 ) 19 20 const closedDrainTimeout = 5 * time.Second 21 22 // Fields which we know are not user-provided attribute fields. 23 var wellKnownFields = map[string]bool{ 24 "MESSAGE": true, 25 "MESSAGE_ID": true, 26 "PRIORITY": true, 27 "CODE_FILE": true, 28 "CODE_LINE": true, 29 "CODE_FUNC": true, 30 "ERRNO": true, 31 "SYSLOG_FACILITY": true, 32 fieldSyslogIdentifier: true, 33 "SYSLOG_PID": true, 34 fieldSyslogTimestamp: true, 35 fieldContainerName: true, 36 fieldContainerID: true, 37 fieldContainerIDFull: true, 38 fieldContainerTag: true, 39 fieldImageName: true, 40 fieldPLogID: true, 41 fieldPLogOrdinal: true, 42 fieldPLogLast: true, 43 fieldPartialMessage: true, 44 fieldLogEpoch: true, 45 fieldLogOrdinal: true, 46 } 47 48 type reader struct { 49 s *journald 50 j *sdjournal.Journal 51 logWatcher *logger.LogWatcher 52 config logger.ReadConfig 53 maxOrdinal uint64 54 initialized bool 55 ready chan struct{} 56 } 57 58 func getMessage(d map[string]string) (line []byte, ok bool) { 59 m, ok := d["MESSAGE"] 60 if ok { 61 line = []byte(m) 62 if d[fieldPartialMessage] != "true" { 63 line = append(line, "\n"...) 64 } 65 } 66 return line, ok 67 } 68 69 func getPriority(d map[string]string) (journal.Priority, bool) { 70 if pri, ok := d["PRIORITY"]; ok { 71 i, err := strconv.Atoi(pri) 72 return journal.Priority(i), err == nil 73 } 74 return -1, false 75 } 76 77 // getSource recovers the stream name from the entry data by mapping from the 78 // journal priority field back to the stream that we would have assigned that 79 // value. 80 func getSource(d map[string]string) string { 81 source := "" 82 if priority, ok := getPriority(d); ok { 83 if priority == journal.PriErr { 84 source = "stderr" 85 } else if priority == journal.PriInfo { 86 source = "stdout" 87 } 88 } 89 return source 90 } 91 92 func getAttrs(d map[string]string) []backend.LogAttr { 93 var attrs []backend.LogAttr 94 for k, v := range d { 95 if k[0] != '_' && !wellKnownFields[k] { 96 attrs = append(attrs, backend.LogAttr{Key: k, Value: v}) 97 } 98 } 99 return attrs 100 } 101 102 // errDrainDone is the error returned by drainJournal to signal that there are 103 // no more log entries to send to the log watcher. 104 var errDrainDone = errors.New("journald drain done") 105 106 // drainJournal reads and sends log messages from the journal. 107 // 108 // drainJournal returns errDrainDone when a terminal stopping condition has been 109 // reached: the watch consumer is gone, a log entry is read which has a 110 // timestamp after until (if until is nonzero), or the log driver is closed and 111 // the last message logged has been sent from the journal. If the end of the 112 // journal is reached without encountering a terminal stopping condition, a nil 113 // error is returned. 114 func (r *reader) drainJournal() error { 115 if !r.initialized { 116 defer func() { 117 r.signalReady() 118 r.initialized = true 119 }() 120 121 var ( 122 err error 123 seekedToTail bool 124 ) 125 if r.config.Tail >= 0 { 126 if r.config.Until.IsZero() { 127 err = r.j.SeekTail() 128 seekedToTail = true 129 } else { 130 err = r.j.SeekRealtime(r.config.Until) 131 } 132 } else { 133 if r.config.Since.IsZero() { 134 err = r.j.SeekHead() 135 } else { 136 err = r.j.SeekRealtime(r.config.Since) 137 } 138 } 139 if err != nil { 140 return err 141 } 142 143 // SeekTail() followed by Next() behaves incorrectly, so we need 144 // to work around the bug by ensuring the first discrete 145 // movement of the read pointer is Previous() or PreviousSkip(). 146 // PreviousSkip() is called inside the loop when config.Tail > 0 147 // so the only special case requiring special handling is 148 // config.Tail == 0. 149 // https://github.com/systemd/systemd/issues/9934 150 if seekedToTail && r.config.Tail == 0 { 151 // Resolve the read pointer to the last entry in the 152 // journal so that the call to Next() inside the loop 153 // advances past it. 154 if ok, err := r.j.Previous(); err != nil || !ok { 155 return err 156 } 157 } 158 } 159 160 for i := 0; ; i++ { 161 if !r.initialized && i == 0 && r.config.Tail > 0 { 162 if n, err := r.j.PreviousSkip(uint(r.config.Tail)); err != nil || n == 0 { 163 return err 164 } 165 } else if ok, err := r.j.Next(); err != nil || !ok { 166 return err 167 } 168 169 if !r.initialized && i == 0 { 170 // The cursor is in a position which will be unaffected 171 // by subsequent logging. 172 r.signalReady() 173 } 174 175 // Read the entry's timestamp. 176 timestamp, err := r.j.Realtime() 177 if err != nil { 178 return err 179 } 180 // Check if the PreviousSkip went too far back. Check only the 181 // initial position as we are comparing wall-clock timestamps, 182 // which may not be monotonic. We don't want to skip over 183 // messages sent later in time just because the clock moved 184 // backwards. 185 if !r.initialized && i == 0 && r.config.Tail > 0 && timestamp.Before(r.config.Since) { 186 r.j.SeekRealtime(r.config.Since) 187 continue 188 } 189 if !r.config.Until.IsZero() && r.config.Until.Before(timestamp) { 190 return errDrainDone 191 } 192 193 // Read and send the logged message, if there is one to read. 194 data, err := r.j.Data() 195 if err != nil { 196 return err 197 } 198 199 if data[fieldLogEpoch] == r.s.epoch { 200 seq, err := strconv.ParseUint(data[fieldLogOrdinal], 10, 64) 201 if err == nil && seq > r.maxOrdinal { 202 r.maxOrdinal = seq 203 } 204 } 205 206 if line, ok := getMessage(data); ok { 207 // Send the log message, unless the consumer is gone 208 msg := &logger.Message{ 209 Line: line, 210 Source: getSource(data), 211 Timestamp: timestamp.In(time.UTC), 212 Attrs: getAttrs(data), 213 } 214 // The daemon timestamp will differ from the "trusted" 215 // timestamp of when the event was received by journald. 216 // We can efficiently seek around the journal by the 217 // event timestamp, and the event timestamp is what 218 // journalctl displays. The daemon timestamp is just an 219 // application-supplied field with no special 220 // significance; libsystemd won't help us seek to the 221 // entry with the closest timestamp. 222 /* 223 if sts := data["SYSLOG_TIMESTAMP"]; sts != "" { 224 if tv, err := time.Parse(time.RFC3339Nano, sts); err == nil { 225 msg.Timestamp = tv 226 } 227 } 228 */ 229 select { 230 case <-r.logWatcher.WatchConsumerGone(): 231 return errDrainDone 232 case r.logWatcher.Msg <- msg: 233 } 234 } 235 236 // Call sd_journal_process() periodically during the processing loop 237 // to close any opened file descriptors for rotated (deleted) journal files. 238 if i != 0 && i%1024 == 0 { 239 if _, err := r.j.Process(); err != nil { 240 // log a warning but ignore it for now 241 log.G(context.TODO()).WithField("container", r.s.vars[fieldContainerIDFull]). 242 WithField("error", err). 243 Warn("journald: error processing journal") 244 } 245 } 246 } 247 } 248 249 func (r *reader) readJournal() error { 250 caughtUp := atomic.LoadUint64(&r.s.ordinal) 251 if err := r.drainJournal(); err != nil { 252 if err != errDrainDone { 253 return err 254 } 255 return nil 256 } 257 258 var drainTimeout <-chan time.Time 259 if !r.config.Follow { 260 if r.s.readSyncTimeout == 0 { 261 return nil 262 } 263 tmr := time.NewTimer(r.s.readSyncTimeout) 264 defer tmr.Stop() 265 drainTimeout = tmr.C 266 } 267 268 for { 269 status, err := r.j.Wait(250 * time.Millisecond) 270 if err != nil { 271 return err 272 } 273 select { 274 case <-r.logWatcher.WatchConsumerGone(): 275 return nil // won't be able to write anything anymore 276 case <-drainTimeout: 277 // Container is gone but we haven't found the end of the 278 // logs within the timeout. Maybe it was dropped by 279 // journald, e.g. due to rate-limiting. 280 return nil 281 case <-r.s.closed: 282 // container is gone, drain journal 283 lastSeq := atomic.LoadUint64(&r.s.ordinal) 284 if r.maxOrdinal >= lastSeq { 285 // All caught up with the logger! 286 return nil 287 } 288 if drainTimeout == nil { 289 tmr := time.NewTimer(closedDrainTimeout) 290 defer tmr.Stop() 291 drainTimeout = tmr.C 292 } 293 default: 294 // container is still alive 295 if status == sdjournal.StatusNOP { 296 // no new data -- keep waiting 297 continue 298 } 299 } 300 err = r.drainJournal() 301 if err != nil { 302 if err != errDrainDone { 303 return err 304 } 305 return nil 306 } 307 if !r.config.Follow && r.s.readSyncTimeout > 0 && r.maxOrdinal >= caughtUp { 308 return nil 309 } 310 } 311 } 312 313 func (r *reader) readLogs() { 314 defer close(r.logWatcher.Msg) 315 316 // Make sure the ready channel is closed in the event of an early 317 // return. 318 defer r.signalReady() 319 320 // Quoting https://www.freedesktop.org/software/systemd/man/sd-journal.html: 321 // Functions that operate on sd_journal objects are thread 322 // agnostic — given sd_journal pointer may only be used from one 323 // specific thread at all times (and it has to be the very same one 324 // during the entire lifetime of the object), but multiple, 325 // independent threads may use multiple, independent objects safely. 326 // 327 // sdjournal.Open returns a value which wraps an sd_journal pointer so 328 // we need to abide by those rules. 329 runtime.LockOSThread() 330 defer runtime.UnlockOSThread() 331 332 // Get a handle to the journal. 333 var err error 334 if r.s.journalReadDir != "" { 335 r.j, err = sdjournal.OpenDir(r.s.journalReadDir, 0) 336 } else { 337 r.j, err = sdjournal.Open(0) 338 } 339 if err != nil { 340 r.logWatcher.Err <- err 341 return 342 } 343 defer r.j.Close() 344 345 if r.config.Follow { 346 // Initialize library inotify watches early 347 if err := r.j.InitializeInotify(); err != nil { 348 r.logWatcher.Err <- err 349 return 350 } 351 } 352 353 // Remove limits on the size of data items that we'll retrieve. 354 if err := r.j.SetDataThreshold(0); err != nil { 355 r.logWatcher.Err <- err 356 return 357 } 358 // Add a match to have the library do the searching for us. 359 if err := r.j.AddMatch(fieldContainerIDFull, r.s.vars[fieldContainerIDFull]); err != nil { 360 r.logWatcher.Err <- err 361 return 362 } 363 364 if err := r.readJournal(); err != nil { 365 r.logWatcher.Err <- err 366 return 367 } 368 } 369 370 func (r *reader) signalReady() { 371 select { 372 case <-r.ready: 373 default: 374 close(r.ready) 375 } 376 } 377 378 func (s *journald) ReadLogs(config logger.ReadConfig) *logger.LogWatcher { 379 r := &reader{ 380 s: s, 381 logWatcher: logger.NewLogWatcher(), 382 config: config, 383 ready: make(chan struct{}), 384 } 385 go r.readLogs() 386 // Block until the reader is in position to read from the current config 387 // location to prevent race conditions in tests. 388 <-r.ready 389 return r.logWatcher 390 } 391 392 func waitUntilFlushedImpl(s *journald) error { 393 if s.readSyncTimeout == 0 { 394 return nil 395 } 396 397 ordinal := atomic.LoadUint64(&s.ordinal) 398 if ordinal == 0 { 399 return nil // No logs were sent; nothing to wait for. 400 } 401 402 flushed := make(chan error) 403 go func() { 404 defer close(flushed) 405 runtime.LockOSThread() 406 defer runtime.UnlockOSThread() 407 408 var ( 409 j *sdjournal.Journal 410 err error 411 ) 412 if s.journalReadDir != "" { 413 j, err = sdjournal.OpenDir(s.journalReadDir, 0) 414 } else { 415 j, err = sdjournal.Open(0) 416 } 417 if err != nil { 418 flushed <- err 419 return 420 } 421 defer j.Close() 422 423 if err := j.AddMatch(fieldContainerIDFull, s.vars[fieldContainerIDFull]); err != nil { 424 flushed <- err 425 return 426 } 427 if err := j.AddMatch(fieldLogEpoch, s.epoch); err != nil { 428 flushed <- err 429 return 430 } 431 if err := j.AddMatch(fieldLogOrdinal, strconv.FormatUint(ordinal, 10)); err != nil { 432 flushed <- err 433 return 434 } 435 436 deadline := time.Now().Add(s.readSyncTimeout) 437 for time.Now().Before(deadline) { 438 if ok, err := j.Next(); ok { 439 // Found it! 440 return 441 } else if err != nil { 442 flushed <- err 443 return 444 } 445 if _, err := j.Wait(100 * time.Millisecond); err != nil { 446 flushed <- err 447 return 448 } 449 } 450 log.G(context.TODO()).WithField("container", s.vars[fieldContainerIDFull]). 451 Warn("journald: deadline exceeded waiting for logs to be committed to journal") 452 }() 453 return <-flushed 454 } 455 456 func init() { 457 waitUntilFlushed = waitUntilFlushedImpl 458 }