github.com/Heebron/moby@v0.0.0-20221111184709-6eab4f55faf7/daemon/logger/journald/read.go (about) 1 //go:build linux && cgo && !static_build && journald 2 // +build linux,cgo,!static_build,journald 3 4 package journald // import "github.com/docker/docker/daemon/logger/journald" 5 6 import ( 7 "errors" 8 "runtime" 9 "strconv" 10 "sync/atomic" 11 "time" 12 13 "github.com/coreos/go-systemd/v22/journal" 14 "github.com/sirupsen/logrus" 15 16 "github.com/docker/docker/api/types/backend" 17 "github.com/docker/docker/daemon/logger" 18 "github.com/docker/docker/daemon/logger/journald/internal/sdjournal" 19 ) 20 21 const closedDrainTimeout = 5 * time.Second 22 23 // Fields which we know are not user-provided attribute fields. 24 var wellKnownFields = map[string]bool{ 25 "MESSAGE": true, 26 "MESSAGE_ID": true, 27 "PRIORITY": true, 28 "CODE_FILE": true, 29 "CODE_LINE": true, 30 "CODE_FUNC": true, 31 "ERRNO": true, 32 "SYSLOG_FACILITY": true, 33 fieldSyslogIdentifier: true, 34 "SYSLOG_PID": true, 35 fieldSyslogTimestamp: true, 36 fieldContainerName: true, 37 fieldContainerID: true, 38 fieldContainerIDFull: true, 39 fieldContainerTag: true, 40 fieldImageName: true, 41 fieldPLogID: true, 42 fieldPLogOrdinal: true, 43 fieldPLogLast: true, 44 fieldPartialMessage: true, 45 fieldLogEpoch: true, 46 fieldLogOrdinal: true, 47 } 48 49 type reader struct { 50 s *journald 51 j *sdjournal.Journal 52 logWatcher *logger.LogWatcher 53 config logger.ReadConfig 54 maxOrdinal uint64 55 initialized bool 56 ready chan struct{} 57 } 58 59 func getMessage(d map[string]string) (line []byte, ok bool) { 60 m, ok := d["MESSAGE"] 61 if ok { 62 line = []byte(m) 63 if d[fieldPartialMessage] != "true" { 64 line = append(line, "\n"...) 65 } 66 } 67 return line, ok 68 } 69 70 func getPriority(d map[string]string) (journal.Priority, bool) { 71 if pri, ok := d["PRIORITY"]; ok { 72 i, err := strconv.Atoi(pri) 73 return journal.Priority(i), err == nil 74 } 75 return -1, false 76 } 77 78 // getSource recovers the stream name from the entry data by mapping from the 79 // journal priority field back to the stream that we would have assigned that 80 // value. 81 func getSource(d map[string]string) string { 82 source := "" 83 if priority, ok := getPriority(d); ok { 84 if priority == journal.PriErr { 85 source = "stderr" 86 } else if priority == journal.PriInfo { 87 source = "stdout" 88 } 89 } 90 return source 91 } 92 93 func getAttrs(d map[string]string) []backend.LogAttr { 94 var attrs []backend.LogAttr 95 for k, v := range d { 96 if k[0] != '_' && !wellKnownFields[k] { 97 attrs = append(attrs, backend.LogAttr{Key: k, Value: v}) 98 } 99 } 100 return attrs 101 } 102 103 // errDrainDone is the error returned by drainJournal to signal that there are 104 // no more log entries to send to the log watcher. 105 var errDrainDone = errors.New("journald drain done") 106 107 // drainJournal reads and sends log messages from the journal. 108 // 109 // drainJournal returns errDrainDone when a terminal stopping condition has been 110 // reached: the watch consumer is gone, a log entry is read which has a 111 // timestamp after until (if until is nonzero), or the log driver is closed and 112 // the last message logged has been sent from the journal. If the end of the 113 // journal is reached without encountering a terminal stopping condition, a nil 114 // error is returned. 115 func (r *reader) drainJournal() error { 116 if !r.initialized { 117 defer func() { 118 r.signalReady() 119 r.initialized = true 120 }() 121 122 var ( 123 err error 124 seekedToTail bool 125 ) 126 if r.config.Tail >= 0 { 127 if r.config.Until.IsZero() { 128 err = r.j.SeekTail() 129 seekedToTail = true 130 } else { 131 err = r.j.SeekRealtime(r.config.Until) 132 } 133 } else { 134 if r.config.Since.IsZero() { 135 err = r.j.SeekHead() 136 } else { 137 err = r.j.SeekRealtime(r.config.Since) 138 } 139 } 140 if err != nil { 141 return err 142 } 143 144 // SeekTail() followed by Next() behaves incorrectly, so we need 145 // to work around the bug by ensuring the first discrete 146 // movement of the read pointer is Previous() or PreviousSkip(). 147 // PreviousSkip() is called inside the loop when config.Tail > 0 148 // so the only special case requiring special handling is 149 // config.Tail == 0. 150 // https://github.com/systemd/systemd/issues/9934 151 if seekedToTail && r.config.Tail == 0 { 152 // Resolve the read pointer to the last entry in the 153 // journal so that the call to Next() inside the loop 154 // advances past it. 155 if ok, err := r.j.Previous(); err != nil || !ok { 156 return err 157 } 158 } 159 } 160 161 for i := 0; ; i++ { 162 if !r.initialized && i == 0 && r.config.Tail > 0 { 163 if n, err := r.j.PreviousSkip(uint(r.config.Tail)); err != nil || n == 0 { 164 return err 165 } 166 } else if ok, err := r.j.Next(); err != nil || !ok { 167 return err 168 } 169 170 if !r.initialized && i == 0 { 171 // The cursor is in a position which will be unaffected 172 // by subsequent logging. 173 r.signalReady() 174 } 175 176 // Read the entry's timestamp. 177 timestamp, err := r.j.Realtime() 178 if err != nil { 179 return err 180 } 181 // Check if the PreviousSkip went too far back. Check only the 182 // initial position as we are comparing wall-clock timestamps, 183 // which may not be monotonic. We don't want to skip over 184 // messages sent later in time just because the clock moved 185 // backwards. 186 if !r.initialized && i == 0 && r.config.Tail > 0 && timestamp.Before(r.config.Since) { 187 r.j.SeekRealtime(r.config.Since) 188 continue 189 } 190 if !r.config.Until.IsZero() && r.config.Until.Before(timestamp) { 191 return errDrainDone 192 } 193 194 // Read and send the logged message, if there is one to read. 195 data, err := r.j.Data() 196 if err != nil { 197 return err 198 } 199 200 if data[fieldLogEpoch] == r.s.epoch { 201 seq, err := strconv.ParseUint(data[fieldLogOrdinal], 10, 64) 202 if err == nil && seq > r.maxOrdinal { 203 r.maxOrdinal = seq 204 } 205 } 206 207 if line, ok := getMessage(data); ok { 208 // Send the log message, unless the consumer is gone 209 msg := &logger.Message{ 210 Line: line, 211 Source: getSource(data), 212 Timestamp: timestamp.In(time.UTC), 213 Attrs: getAttrs(data), 214 } 215 // The daemon timestamp will differ from the "trusted" 216 // timestamp of when the event was received by journald. 217 // We can efficiently seek around the journal by the 218 // event timestamp, and the event timestamp is what 219 // journalctl displays. The daemon timestamp is just an 220 // application-supplied field with no special 221 // significance; libsystemd won't help us seek to the 222 // entry with the closest timestamp. 223 /* 224 if sts := data["SYSLOG_TIMESTAMP"]; sts != "" { 225 if tv, err := time.Parse(time.RFC3339Nano, sts); err == nil { 226 msg.Timestamp = tv 227 } 228 } 229 */ 230 select { 231 case <-r.logWatcher.WatchConsumerGone(): 232 return errDrainDone 233 case r.logWatcher.Msg <- msg: 234 } 235 } 236 237 // Call sd_journal_process() periodically during the processing loop 238 // to close any opened file descriptors for rotated (deleted) journal files. 239 if i != 0 && i%1024 == 0 { 240 if _, err := r.j.Process(); err != nil { 241 // log a warning but ignore it for now 242 logrus.WithField("container", r.s.vars[fieldContainerIDFull]). 243 WithField("error", err). 244 Warn("journald: error processing journal") 245 } 246 } 247 } 248 } 249 250 func (r *reader) readJournal() error { 251 caughtUp := atomic.LoadUint64(&r.s.ordinal) 252 if err := r.drainJournal(); err != nil { 253 if err != errDrainDone { 254 return err 255 } 256 return nil 257 } 258 259 var drainTimeout <-chan time.Time 260 if !r.config.Follow { 261 if r.s.readSyncTimeout == 0 { 262 return nil 263 } 264 tmr := time.NewTimer(r.s.readSyncTimeout) 265 defer tmr.Stop() 266 drainTimeout = tmr.C 267 } 268 269 for { 270 status, err := r.j.Wait(250 * time.Millisecond) 271 if err != nil { 272 return err 273 } 274 select { 275 case <-r.logWatcher.WatchConsumerGone(): 276 return nil // won't be able to write anything anymore 277 case <-drainTimeout: 278 // Container is gone but we haven't found the end of the 279 // logs within the timeout. Maybe it was dropped by 280 // journald, e.g. due to rate-limiting. 281 return nil 282 case <-r.s.closed: 283 // container is gone, drain journal 284 lastSeq := atomic.LoadUint64(&r.s.ordinal) 285 if r.maxOrdinal >= lastSeq { 286 // All caught up with the logger! 287 return nil 288 } 289 if drainTimeout == nil { 290 tmr := time.NewTimer(closedDrainTimeout) 291 defer tmr.Stop() 292 drainTimeout = tmr.C 293 } 294 default: 295 // container is still alive 296 if status == sdjournal.StatusNOP { 297 // no new data -- keep waiting 298 continue 299 } 300 } 301 err = r.drainJournal() 302 if err != nil { 303 if err != errDrainDone { 304 return err 305 } 306 return nil 307 } 308 if !r.config.Follow && r.s.readSyncTimeout > 0 && r.maxOrdinal >= caughtUp { 309 return nil 310 } 311 } 312 } 313 314 func (r *reader) readLogs() { 315 defer close(r.logWatcher.Msg) 316 317 // Make sure the ready channel is closed in the event of an early 318 // return. 319 defer r.signalReady() 320 321 // Quoting https://www.freedesktop.org/software/systemd/man/sd-journal.html: 322 // Functions that operate on sd_journal objects are thread 323 // agnostic — given sd_journal pointer may only be used from one 324 // specific thread at all times (and it has to be the very same one 325 // during the entire lifetime of the object), but multiple, 326 // independent threads may use multiple, independent objects safely. 327 // 328 // sdjournal.Open returns a value which wraps an sd_journal pointer so 329 // we need to abide by those rules. 330 runtime.LockOSThread() 331 defer runtime.UnlockOSThread() 332 333 // Get a handle to the journal. 334 var err error 335 if r.s.journalReadDir != "" { 336 r.j, err = sdjournal.OpenDir(r.s.journalReadDir, 0) 337 } else { 338 r.j, err = sdjournal.Open(0) 339 } 340 if err != nil { 341 r.logWatcher.Err <- err 342 return 343 } 344 defer r.j.Close() 345 346 if r.config.Follow { 347 // Initialize library inotify watches early 348 if err := r.j.InitializeInotify(); err != nil { 349 r.logWatcher.Err <- err 350 return 351 } 352 } 353 354 // Remove limits on the size of data items that we'll retrieve. 355 if err := r.j.SetDataThreshold(0); err != nil { 356 r.logWatcher.Err <- err 357 return 358 } 359 // Add a match to have the library do the searching for us. 360 if err := r.j.AddMatch(fieldContainerIDFull, r.s.vars[fieldContainerIDFull]); err != nil { 361 r.logWatcher.Err <- err 362 return 363 } 364 365 if err := r.readJournal(); err != nil { 366 r.logWatcher.Err <- err 367 return 368 } 369 } 370 371 func (r *reader) signalReady() { 372 select { 373 case <-r.ready: 374 default: 375 close(r.ready) 376 } 377 } 378 379 func (s *journald) ReadLogs(config logger.ReadConfig) *logger.LogWatcher { 380 r := &reader{ 381 s: s, 382 logWatcher: logger.NewLogWatcher(), 383 config: config, 384 ready: make(chan struct{}), 385 } 386 go r.readLogs() 387 // Block until the reader is in position to read from the current config 388 // location to prevent race conditions in tests. 389 <-r.ready 390 return r.logWatcher 391 } 392 393 func waitUntilFlushedImpl(s *journald) error { 394 if s.readSyncTimeout == 0 { 395 return nil 396 } 397 398 ordinal := atomic.LoadUint64(&s.ordinal) 399 if ordinal == 0 { 400 return nil // No logs were sent; nothing to wait for. 401 } 402 403 flushed := make(chan error) 404 go func() { 405 defer close(flushed) 406 runtime.LockOSThread() 407 defer runtime.UnlockOSThread() 408 409 var ( 410 j *sdjournal.Journal 411 err error 412 ) 413 if s.journalReadDir != "" { 414 j, err = sdjournal.OpenDir(s.journalReadDir, 0) 415 } else { 416 j, err = sdjournal.Open(0) 417 } 418 if err != nil { 419 flushed <- err 420 return 421 } 422 defer j.Close() 423 424 if err := j.AddMatch(fieldContainerIDFull, s.vars[fieldContainerIDFull]); err != nil { 425 flushed <- err 426 return 427 } 428 if err := j.AddMatch(fieldLogEpoch, s.epoch); err != nil { 429 flushed <- err 430 return 431 } 432 if err := j.AddMatch(fieldLogOrdinal, strconv.FormatUint(ordinal, 10)); err != nil { 433 flushed <- err 434 return 435 } 436 437 deadline := time.Now().Add(s.readSyncTimeout) 438 for time.Now().Before(deadline) { 439 if ok, err := j.Next(); ok { 440 // Found it! 441 return 442 } else if err != nil { 443 flushed <- err 444 return 445 } 446 if _, err := j.Wait(100 * time.Millisecond); err != nil { 447 flushed <- err 448 return 449 } 450 } 451 logrus.WithField("container", s.vars[fieldContainerIDFull]). 452 Warn("journald: deadline exceeded waiting for logs to be committed to journal") 453 }() 454 return <-flushed 455 } 456 457 func init() { 458 waitUntilFlushed = waitUntilFlushedImpl 459 }