k8s.io/kubernetes@v1.29.3/pkg/kubelet/logs/container_log_manager.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package logs 18 19 import ( 20 "compress/gzip" 21 "context" 22 "fmt" 23 "io" 24 "os" 25 "path/filepath" 26 "sort" 27 "strings" 28 "sync" 29 "time" 30 31 "k8s.io/klog/v2" 32 33 "k8s.io/apimachinery/pkg/api/resource" 34 "k8s.io/apimachinery/pkg/util/wait" 35 internalapi "k8s.io/cri-api/pkg/apis" 36 runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" 37 kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" 38 "k8s.io/utils/clock" 39 ) 40 41 const ( 42 // logMonitorPeriod is the period container log manager monitors 43 // container logs and performs log rotation. 44 logMonitorPeriod = 10 * time.Second 45 // timestampFormat is format of the timestamp suffix for rotated log. 46 // See https://golang.org/pkg/time/#Time.Format. 47 timestampFormat = "20060102-150405" 48 // compressSuffix is the suffix for compressed log. 49 compressSuffix = ".gz" 50 // tmpSuffix is the suffix for temporary file. 51 tmpSuffix = ".tmp" 52 ) 53 54 // ContainerLogManager manages lifecycle of all container logs. 55 // 56 // Implementation is thread-safe. 57 type ContainerLogManager interface { 58 // TODO(random-liu): Add RotateLogs function and call it under disk pressure. 59 // Start container log manager. 60 Start() 61 // Clean removes all logs of specified container. 62 Clean(ctx context.Context, containerID string) error 63 } 64 65 // LogRotatePolicy is a policy for container log rotation. The policy applies to all 66 // containers managed by kubelet. 67 type LogRotatePolicy struct { 68 // MaxSize in bytes of the container log file before it is rotated. Negative 69 // number means to disable container log rotation. 70 MaxSize int64 71 // MaxFiles is the maximum number of log files that can be present. 72 // If rotating the logs creates excess files, the oldest file is removed. 73 MaxFiles int 74 } 75 76 // GetAllLogs gets all inuse (rotated/compressed) logs for a specific container log. 77 // Returned logs are sorted in oldest to newest order. 78 // TODO(#59902): Leverage this function to support log rotation in `kubectl logs`. 79 func GetAllLogs(log string) ([]string, error) { 80 // pattern is used to match all rotated files. 81 pattern := fmt.Sprintf("%s.*", log) 82 logs, err := filepath.Glob(pattern) 83 if err != nil { 84 return nil, fmt.Errorf("failed to list all log files with pattern %q: %v", pattern, err) 85 } 86 inuse, _ := filterUnusedLogs(logs) 87 sort.Strings(inuse) 88 return append(inuse, log), nil 89 } 90 91 // compressReadCloser wraps gzip.Reader with a function to close file handler. 92 type compressReadCloser struct { 93 f *os.File 94 *gzip.Reader 95 } 96 97 func (rc *compressReadCloser) Close() error { 98 ferr := rc.f.Close() 99 rerr := rc.Reader.Close() 100 if ferr != nil { 101 return ferr 102 } 103 if rerr != nil { 104 return rerr 105 } 106 return nil 107 } 108 109 // UncompressLog compresses a compressed log and return a readcloser for the 110 // stream of the uncompressed content. 111 // TODO(#59902): Leverage this function to support log rotation in `kubectl logs`. 112 func UncompressLog(log string) (_ io.ReadCloser, retErr error) { 113 if !strings.HasSuffix(log, compressSuffix) { 114 return nil, fmt.Errorf("log is not compressed") 115 } 116 f, err := os.Open(log) 117 if err != nil { 118 return nil, fmt.Errorf("failed to open log: %v", err) 119 } 120 defer func() { 121 if retErr != nil { 122 f.Close() 123 } 124 }() 125 r, err := gzip.NewReader(f) 126 if err != nil { 127 return nil, fmt.Errorf("failed to create gzip reader: %v", err) 128 } 129 return &compressReadCloser{f: f, Reader: r}, nil 130 } 131 132 // parseMaxSize parses quantity string to int64 max size in bytes. 133 func parseMaxSize(size string) (int64, error) { 134 quantity, err := resource.ParseQuantity(size) 135 if err != nil { 136 return 0, err 137 } 138 maxSize, ok := quantity.AsInt64() 139 if !ok { 140 return 0, fmt.Errorf("invalid max log size") 141 } 142 return maxSize, nil 143 } 144 145 type containerLogManager struct { 146 runtimeService internalapi.RuntimeService 147 osInterface kubecontainer.OSInterface 148 policy LogRotatePolicy 149 clock clock.Clock 150 mutex sync.Mutex 151 } 152 153 // NewContainerLogManager creates a new container log manager. 154 func NewContainerLogManager(runtimeService internalapi.RuntimeService, osInterface kubecontainer.OSInterface, maxSize string, maxFiles int) (ContainerLogManager, error) { 155 if maxFiles <= 1 { 156 return nil, fmt.Errorf("invalid MaxFiles %d, must be > 1", maxFiles) 157 } 158 parsedMaxSize, err := parseMaxSize(maxSize) 159 if err != nil { 160 return nil, fmt.Errorf("failed to parse container log max size %q: %v", maxSize, err) 161 } 162 // Negative number means to disable container log rotation 163 if parsedMaxSize < 0 { 164 return NewStubContainerLogManager(), nil 165 } 166 // policy LogRotatePolicy 167 return &containerLogManager{ 168 osInterface: osInterface, 169 runtimeService: runtimeService, 170 policy: LogRotatePolicy{ 171 MaxSize: parsedMaxSize, 172 MaxFiles: maxFiles, 173 }, 174 clock: clock.RealClock{}, 175 mutex: sync.Mutex{}, 176 }, nil 177 } 178 179 // Start the container log manager. 180 func (c *containerLogManager) Start() { 181 ctx := context.Background() 182 // Start a goroutine periodically does container log rotation. 183 go wait.Forever(func() { 184 if err := c.rotateLogs(ctx); err != nil { 185 klog.ErrorS(err, "Failed to rotate container logs") 186 } 187 }, logMonitorPeriod) 188 } 189 190 // Clean removes all logs of specified container (including rotated one). 191 func (c *containerLogManager) Clean(ctx context.Context, containerID string) error { 192 c.mutex.Lock() 193 defer c.mutex.Unlock() 194 resp, err := c.runtimeService.ContainerStatus(ctx, containerID, false) 195 if err != nil { 196 return fmt.Errorf("failed to get container status %q: %v", containerID, err) 197 } 198 if resp.GetStatus() == nil { 199 return fmt.Errorf("container status is nil for %q", containerID) 200 } 201 pattern := fmt.Sprintf("%s*", resp.GetStatus().GetLogPath()) 202 logs, err := c.osInterface.Glob(pattern) 203 if err != nil { 204 return fmt.Errorf("failed to list all log files with pattern %q: %v", pattern, err) 205 } 206 207 for _, l := range logs { 208 if err := c.osInterface.Remove(l); err != nil && !os.IsNotExist(err) { 209 return fmt.Errorf("failed to remove container %q log %q: %v", containerID, l, err) 210 } 211 } 212 213 return nil 214 } 215 216 func (c *containerLogManager) rotateLogs(ctx context.Context) error { 217 c.mutex.Lock() 218 defer c.mutex.Unlock() 219 // TODO(#59998): Use kubelet pod cache. 220 containers, err := c.runtimeService.ListContainers(ctx, &runtimeapi.ContainerFilter{}) 221 if err != nil { 222 return fmt.Errorf("failed to list containers: %v", err) 223 } 224 // NOTE(random-liu): Figure out whether we need to rotate container logs in parallel. 225 for _, container := range containers { 226 // Only rotate logs for running containers. Non-running containers won't 227 // generate new output, it doesn't make sense to keep an empty latest log. 228 if container.GetState() != runtimeapi.ContainerState_CONTAINER_RUNNING { 229 continue 230 } 231 id := container.GetId() 232 // Note that we should not block log rotate for an error of a single container. 233 resp, err := c.runtimeService.ContainerStatus(ctx, id, false) 234 if err != nil { 235 klog.ErrorS(err, "Failed to get container status", "containerID", id) 236 continue 237 } 238 if resp.GetStatus() == nil { 239 klog.ErrorS(err, "Container status is nil", "containerID", id) 240 continue 241 } 242 path := resp.GetStatus().GetLogPath() 243 info, err := c.osInterface.Stat(path) 244 if err != nil { 245 if !os.IsNotExist(err) { 246 klog.ErrorS(err, "Failed to stat container log", "path", path) 247 continue 248 } 249 // In rotateLatestLog, there are several cases that we may 250 // lose original container log after ReopenContainerLog fails. 251 // We try to recover it by reopening container log. 252 if err := c.runtimeService.ReopenContainerLog(ctx, id); err != nil { 253 klog.ErrorS(err, "Container log doesn't exist, reopen container log failed", "containerID", id, "path", path) 254 continue 255 } 256 // The container log should be recovered. 257 info, err = c.osInterface.Stat(path) 258 if err != nil { 259 klog.ErrorS(err, "Failed to stat container log after reopen", "path", path) 260 continue 261 } 262 } 263 if info.Size() < c.policy.MaxSize { 264 continue 265 } 266 // Perform log rotation. 267 if err := c.rotateLog(ctx, id, path); err != nil { 268 klog.ErrorS(err, "Failed to rotate log for container", "path", path, "containerID", id) 269 continue 270 } 271 } 272 return nil 273 } 274 275 func (c *containerLogManager) rotateLog(ctx context.Context, id, log string) error { 276 // pattern is used to match all rotated files. 277 pattern := fmt.Sprintf("%s.*", log) 278 logs, err := filepath.Glob(pattern) 279 if err != nil { 280 return fmt.Errorf("failed to list all log files with pattern %q: %v", pattern, err) 281 } 282 283 logs, err = c.cleanupUnusedLogs(logs) 284 if err != nil { 285 return fmt.Errorf("failed to cleanup logs: %v", err) 286 } 287 288 logs, err = c.removeExcessLogs(logs) 289 if err != nil { 290 return fmt.Errorf("failed to remove excess logs: %v", err) 291 } 292 293 // Compress uncompressed log files. 294 for _, l := range logs { 295 if strings.HasSuffix(l, compressSuffix) { 296 continue 297 } 298 if err := c.compressLog(l); err != nil { 299 return fmt.Errorf("failed to compress log %q: %v", l, err) 300 } 301 } 302 303 if err := c.rotateLatestLog(ctx, id, log); err != nil { 304 return fmt.Errorf("failed to rotate log %q: %v", log, err) 305 } 306 307 return nil 308 } 309 310 // cleanupUnusedLogs cleans up temporary or unused log files generated by previous log rotation 311 // failure. 312 func (c *containerLogManager) cleanupUnusedLogs(logs []string) ([]string, error) { 313 inuse, unused := filterUnusedLogs(logs) 314 for _, l := range unused { 315 if err := c.osInterface.Remove(l); err != nil { 316 return nil, fmt.Errorf("failed to remove unused log %q: %v", l, err) 317 } 318 } 319 return inuse, nil 320 } 321 322 // filterUnusedLogs splits logs into 2 groups, the 1st group is in used logs, 323 // the second group is unused logs. 324 func filterUnusedLogs(logs []string) (inuse []string, unused []string) { 325 for _, l := range logs { 326 if isInUse(l, logs) { 327 inuse = append(inuse, l) 328 } else { 329 unused = append(unused, l) 330 } 331 } 332 return inuse, unused 333 } 334 335 // isInUse checks whether a container log file is still inuse. 336 func isInUse(l string, logs []string) bool { 337 // All temporary files are not in use. 338 if strings.HasSuffix(l, tmpSuffix) { 339 return false 340 } 341 // All compressed logs are in use. 342 if strings.HasSuffix(l, compressSuffix) { 343 return true 344 } 345 // Files has already been compressed are not in use. 346 for _, another := range logs { 347 if l+compressSuffix == another { 348 return false 349 } 350 } 351 return true 352 } 353 354 // removeExcessLogs removes old logs to make sure there are only at most MaxFiles log files. 355 func (c *containerLogManager) removeExcessLogs(logs []string) ([]string, error) { 356 // Sort log files in oldest to newest order. 357 sort.Strings(logs) 358 // Container will create a new log file, and we'll rotate the latest log file. 359 // Other than those 2 files, we can have at most MaxFiles-2 rotated log files. 360 // Keep MaxFiles-2 files by removing old files. 361 // We should remove from oldest to newest, so as not to break ongoing `kubectl logs`. 362 maxRotatedFiles := c.policy.MaxFiles - 2 363 if maxRotatedFiles < 0 { 364 maxRotatedFiles = 0 365 } 366 i := 0 367 for ; i < len(logs)-maxRotatedFiles; i++ { 368 if err := c.osInterface.Remove(logs[i]); err != nil { 369 return nil, fmt.Errorf("failed to remove old log %q: %v", logs[i], err) 370 } 371 } 372 logs = logs[i:] 373 return logs, nil 374 } 375 376 // compressLog compresses a log to log.gz with gzip. 377 func (c *containerLogManager) compressLog(log string) error { 378 r, err := c.osInterface.Open(log) 379 if err != nil { 380 return fmt.Errorf("failed to open log %q: %v", log, err) 381 } 382 defer r.Close() 383 tmpLog := log + tmpSuffix 384 f, err := c.osInterface.OpenFile(tmpLog, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) 385 if err != nil { 386 return fmt.Errorf("failed to create temporary log %q: %v", tmpLog, err) 387 } 388 defer func() { 389 // Best effort cleanup of tmpLog. 390 c.osInterface.Remove(tmpLog) 391 }() 392 defer f.Close() 393 w := gzip.NewWriter(f) 394 defer w.Close() 395 if _, err := io.Copy(w, r); err != nil { 396 return fmt.Errorf("failed to compress %q to %q: %v", log, tmpLog, err) 397 } 398 // The archive needs to be closed before renaming, otherwise an error will occur on Windows. 399 w.Close() 400 f.Close() 401 compressedLog := log + compressSuffix 402 if err := c.osInterface.Rename(tmpLog, compressedLog); err != nil { 403 return fmt.Errorf("failed to rename %q to %q: %v", tmpLog, compressedLog, err) 404 } 405 // Remove old log file. 406 r.Close() 407 if err := c.osInterface.Remove(log); err != nil { 408 return fmt.Errorf("failed to remove log %q after compress: %v", log, err) 409 } 410 return nil 411 } 412 413 // rotateLatestLog rotates latest log without compression, so that container can still write 414 // and fluentd can finish reading. 415 func (c *containerLogManager) rotateLatestLog(ctx context.Context, id, log string) error { 416 timestamp := c.clock.Now().Format(timestampFormat) 417 rotated := fmt.Sprintf("%s.%s", log, timestamp) 418 if err := c.osInterface.Rename(log, rotated); err != nil { 419 return fmt.Errorf("failed to rotate log %q to %q: %v", log, rotated, err) 420 } 421 if err := c.runtimeService.ReopenContainerLog(ctx, id); err != nil { 422 // Rename the rotated log back, so that we can try rotating it again 423 // next round. 424 // If kubelet gets restarted at this point, we'll lose original log. 425 if renameErr := c.osInterface.Rename(rotated, log); renameErr != nil { 426 // This shouldn't happen. 427 // Report an error if this happens, because we will lose original 428 // log. 429 klog.ErrorS(renameErr, "Failed to rename rotated log", "rotatedLog", rotated, "newLog", log, "containerID", id) 430 } 431 return fmt.Errorf("failed to reopen container log %q: %v", id, err) 432 } 433 return nil 434 }