github.com/google/cadvisor@v0.49.1/manager/container.go (about) 1 // Copyright 2014 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package manager 16 17 import ( 18 "flag" 19 "fmt" 20 "math" 21 "math/rand" 22 "os" 23 "os/exec" 24 "path" 25 "regexp" 26 "sort" 27 "strconv" 28 "strings" 29 "sync" 30 "sync/atomic" 31 "time" 32 33 "github.com/google/cadvisor/cache/memory" 34 "github.com/google/cadvisor/collector" 35 "github.com/google/cadvisor/container" 36 info "github.com/google/cadvisor/info/v1" 37 v2 "github.com/google/cadvisor/info/v2" 38 "github.com/google/cadvisor/stats" 39 "github.com/google/cadvisor/summary" 40 "github.com/google/cadvisor/utils/cpuload" 41 42 "github.com/docker/go-units" 43 44 "k8s.io/klog/v2" 45 "k8s.io/utils/clock" 46 ) 47 48 // Housekeeping interval. 49 var enableLoadReader = flag.Bool("enable_load_reader", false, "Whether to enable cpu load reader") 50 var HousekeepingInterval = flag.Duration("housekeeping_interval", 1*time.Second, "Interval between container housekeepings") 51 52 // TODO: replace regular expressions with something simpler, such as strings.Split(). 53 // cgroup type chosen to fetch the cgroup path of a process. 54 // Memory has been chosen, as it is one of the default cgroups that is enabled for most containers... 55 var cgroupMemoryPathRegExp = regexp.MustCompile(`memory[^:]*:(.*?)[,;$]`) 56 57 // ... but there are systems (e.g. Raspberry Pi 4) where memory cgroup controller is disabled by default. 58 // We should check cpu cgroup then. 59 var cgroupCPUPathRegExp = regexp.MustCompile(`cpu[^:]*:(.*?)[,;$]`) 60 61 type containerInfo struct { 62 info.ContainerReference 63 Subcontainers []info.ContainerReference 64 Spec info.ContainerSpec 65 } 66 67 type containerData struct { 68 oomEvents uint64 69 handler container.ContainerHandler 70 info containerInfo 71 memoryCache *memory.InMemoryCache 72 lock sync.Mutex 73 loadReader cpuload.CpuLoadReader 74 summaryReader *summary.StatsSummary 75 loadAvg float64 // smoothed load average seen so far. 76 housekeepingInterval time.Duration 77 maxHousekeepingInterval time.Duration 78 allowDynamicHousekeeping bool 79 infoLastUpdatedTime time.Time 80 statsLastUpdatedTime time.Time 81 lastErrorTime time.Time 82 // used to track time 83 clock clock.Clock 84 85 // Decay value used for load average smoothing. Interval length of 10 seconds is used. 86 loadDecay float64 87 88 // Whether to log the usage of this container when it is updated. 89 logUsage bool 90 91 // Tells the container to stop. 92 stop chan struct{} 93 94 // Tells the container to immediately collect stats 95 onDemandChan chan chan struct{} 96 97 // Runs custom metric collectors. 98 collectorManager collector.CollectorManager 99 100 // perfCollector updates stats for perf_event cgroup controller. 101 perfCollector stats.Collector 102 103 // resctrlCollector updates stats for resctrl controller. 104 resctrlCollector stats.Collector 105 } 106 107 // jitter returns a time.Duration between duration and duration + maxFactor * duration, 108 // to allow clients to avoid converging on periodic behavior. If maxFactor is 0.0, a 109 // suggested default value will be chosen. 110 func jitter(duration time.Duration, maxFactor float64) time.Duration { 111 if maxFactor <= 0.0 { 112 maxFactor = 1.0 113 } 114 wait := duration + time.Duration(rand.Float64()*maxFactor*float64(duration)) 115 return wait 116 } 117 118 func (cd *containerData) Start() error { 119 go cd.housekeeping() 120 return nil 121 } 122 123 func (cd *containerData) Stop() error { 124 err := cd.memoryCache.RemoveContainer(cd.info.Name) 125 if err != nil { 126 return err 127 } 128 close(cd.stop) 129 cd.perfCollector.Destroy() 130 cd.resctrlCollector.Destroy() 131 return nil 132 } 133 134 func (cd *containerData) allowErrorLogging() bool { 135 if cd.clock.Since(cd.lastErrorTime) > time.Minute { 136 cd.lastErrorTime = cd.clock.Now() 137 return true 138 } 139 return false 140 } 141 142 // OnDemandHousekeeping performs housekeeping on the container and blocks until it has completed. 143 // It is designed to be used in conjunction with periodic housekeeping, and will cause the timer for 144 // periodic housekeeping to reset. This should be used sparingly, as calling OnDemandHousekeeping frequently 145 // can have serious performance costs. 146 func (cd *containerData) OnDemandHousekeeping(maxAge time.Duration) { 147 cd.lock.Lock() 148 timeSinceStatsLastUpdate := cd.clock.Since(cd.statsLastUpdatedTime) 149 cd.lock.Unlock() 150 if timeSinceStatsLastUpdate > maxAge { 151 housekeepingFinishedChan := make(chan struct{}) 152 cd.onDemandChan <- housekeepingFinishedChan 153 select { 154 case <-cd.stop: 155 case <-housekeepingFinishedChan: 156 } 157 } 158 } 159 160 // notifyOnDemand notifies all calls to OnDemandHousekeeping that housekeeping is finished 161 func (cd *containerData) notifyOnDemand() { 162 for { 163 select { 164 case finishedChan := <-cd.onDemandChan: 165 close(finishedChan) 166 default: 167 return 168 } 169 } 170 } 171 172 func (cd *containerData) GetInfo(shouldUpdateSubcontainers bool) (*containerInfo, error) { 173 // Get spec and subcontainers. 174 if cd.clock.Since(cd.infoLastUpdatedTime) > 5*time.Second || shouldUpdateSubcontainers { 175 err := cd.updateSpec() 176 if err != nil { 177 return nil, err 178 } 179 if shouldUpdateSubcontainers { 180 err = cd.updateSubcontainers() 181 if err != nil { 182 return nil, err 183 } 184 } 185 cd.infoLastUpdatedTime = cd.clock.Now() 186 } 187 cd.lock.Lock() 188 defer cd.lock.Unlock() 189 cInfo := containerInfo{ 190 Subcontainers: cd.info.Subcontainers, 191 Spec: cd.info.Spec, 192 } 193 cInfo.Id = cd.info.Id 194 cInfo.Name = cd.info.Name 195 cInfo.Aliases = cd.info.Aliases 196 cInfo.Namespace = cd.info.Namespace 197 return &cInfo, nil 198 } 199 200 func (cd *containerData) DerivedStats() (v2.DerivedStats, error) { 201 if cd.summaryReader == nil { 202 return v2.DerivedStats{}, fmt.Errorf("derived stats not enabled for container %q", cd.info.Name) 203 } 204 return cd.summaryReader.DerivedStats() 205 } 206 207 func (cd *containerData) getCgroupPath(cgroups string) string { 208 if cgroups == "-" { 209 return "/" 210 } 211 if strings.HasPrefix(cgroups, "0::") { 212 return cgroups[3:] 213 } 214 matches := cgroupMemoryPathRegExp.FindSubmatch([]byte(cgroups)) 215 if len(matches) != 2 { 216 klog.V(3).Infof( 217 "failed to get memory cgroup path from %q, will try to get cpu cgroup path", 218 cgroups, 219 ) 220 // On some systems (e.g. Raspberry PI 4) cgroup memory controlled is disabled by default. 221 matches = cgroupCPUPathRegExp.FindSubmatch([]byte(cgroups)) 222 if len(matches) != 2 { 223 klog.V(3).Infof("failed to get cpu cgroup path from %q; assuming root cgroup", cgroups) 224 // return root in case of failures - memory hierarchy might not be enabled. 225 return "/" 226 } 227 } 228 return string(matches[1]) 229 } 230 231 // Returns contents of a file inside the container root. 232 // Takes in a path relative to container root. 233 func (cd *containerData) ReadFile(filepath string, inHostNamespace bool) ([]byte, error) { 234 pids, err := cd.getContainerPids(inHostNamespace) 235 if err != nil { 236 return nil, err 237 } 238 // TODO(rjnagal): Optimize by just reading container's cgroup.proc file when in host namespace. 239 rootfs := "/" 240 if !inHostNamespace { 241 rootfs = "/rootfs" 242 } 243 for _, pid := range pids { 244 filePath := path.Join(rootfs, "/proc", pid, "/root", filepath) 245 klog.V(3).Infof("Trying path %q", filePath) 246 data, err := os.ReadFile(filePath) 247 if err == nil { 248 return data, err 249 } 250 } 251 // No process paths could be found. Declare config non-existent. 252 return nil, fmt.Errorf("file %q does not exist", filepath) 253 } 254 255 // Return output for ps command in host /proc with specified format 256 func (cd *containerData) getPsOutput(inHostNamespace bool, format string) ([]byte, error) { 257 args := []string{} 258 command := "ps" 259 if !inHostNamespace { 260 command = "/usr/sbin/chroot" 261 args = append(args, "/rootfs", "ps") 262 } 263 args = append(args, "-e", "-o", format) 264 out, err := exec.Command(command, args...).Output() 265 if err != nil { 266 return nil, fmt.Errorf("failed to execute %q command: %v", command, err) 267 } 268 return out, err 269 } 270 271 // Get pids of processes in this container. 272 // A slightly lighterweight call than GetProcessList if other details are not required. 273 func (cd *containerData) getContainerPids(inHostNamespace bool) ([]string, error) { 274 format := "pid,cgroup" 275 out, err := cd.getPsOutput(inHostNamespace, format) 276 if err != nil { 277 return nil, err 278 } 279 expectedFields := 2 280 lines := strings.Split(string(out), "\n") 281 pids := []string{} 282 for _, line := range lines[1:] { 283 if len(line) == 0 { 284 continue 285 } 286 fields := strings.Fields(line) 287 if len(fields) < expectedFields { 288 return nil, fmt.Errorf("expected at least %d fields, found %d: output: %q", expectedFields, len(fields), line) 289 } 290 pid := fields[0] 291 cgroup := cd.getCgroupPath(fields[1]) 292 if cd.info.Name == cgroup { 293 pids = append(pids, pid) 294 } 295 } 296 return pids, nil 297 } 298 299 func (cd *containerData) GetProcessList(cadvisorContainer string, inHostNamespace bool) ([]v2.ProcessInfo, error) { 300 format := "user,pid,ppid,stime,pcpu,pmem,rss,vsz,stat,time,comm,psr,cgroup" 301 out, err := cd.getPsOutput(inHostNamespace, format) 302 if err != nil { 303 return nil, err 304 } 305 return cd.parseProcessList(cadvisorContainer, inHostNamespace, out) 306 } 307 308 func (cd *containerData) parseProcessList(cadvisorContainer string, inHostNamespace bool, out []byte) ([]v2.ProcessInfo, error) { 309 rootfs := "/" 310 if !inHostNamespace { 311 rootfs = "/rootfs" 312 } 313 processes := []v2.ProcessInfo{} 314 lines := strings.Split(string(out), "\n") 315 for _, line := range lines[1:] { 316 processInfo, err := cd.parsePsLine(line, cadvisorContainer, inHostNamespace) 317 if err != nil { 318 return nil, fmt.Errorf("could not parse line %s: %v", line, err) 319 } 320 if processInfo == nil { 321 continue 322 } 323 324 var fdCount int 325 dirPath := path.Join(rootfs, "/proc", strconv.Itoa(processInfo.Pid), "fd") 326 fds, err := os.ReadDir(dirPath) 327 if err != nil { 328 klog.V(4).Infof("error while listing directory %q to measure fd count: %v", dirPath, err) 329 continue 330 } 331 fdCount = len(fds) 332 processInfo.FdCount = fdCount 333 334 processes = append(processes, *processInfo) 335 } 336 return processes, nil 337 } 338 339 func (cd *containerData) isRoot() bool { 340 return cd.info.Name == "/" 341 } 342 343 func (cd *containerData) parsePsLine(line, cadvisorContainer string, inHostNamespace bool) (*v2.ProcessInfo, error) { 344 const expectedFields = 13 345 if len(line) == 0 { 346 return nil, nil 347 } 348 349 info := v2.ProcessInfo{} 350 var err error 351 352 fields := strings.Fields(line) 353 if len(fields) < expectedFields { 354 return nil, fmt.Errorf("expected at least %d fields, found %d: output: %q", expectedFields, len(fields), line) 355 } 356 info.User = fields[0] 357 info.StartTime = fields[3] 358 info.Status = fields[8] 359 info.RunningTime = fields[9] 360 361 info.Pid, err = strconv.Atoi(fields[1]) 362 if err != nil { 363 return nil, fmt.Errorf("invalid pid %q: %v", fields[1], err) 364 } 365 info.Ppid, err = strconv.Atoi(fields[2]) 366 if err != nil { 367 return nil, fmt.Errorf("invalid ppid %q: %v", fields[2], err) 368 } 369 370 percentCPU, err := strconv.ParseFloat(fields[4], 32) 371 if err != nil { 372 return nil, fmt.Errorf("invalid cpu percent %q: %v", fields[4], err) 373 } 374 info.PercentCpu = float32(percentCPU) 375 percentMem, err := strconv.ParseFloat(fields[5], 32) 376 if err != nil { 377 return nil, fmt.Errorf("invalid memory percent %q: %v", fields[5], err) 378 } 379 info.PercentMemory = float32(percentMem) 380 381 info.RSS, err = strconv.ParseUint(fields[6], 0, 64) 382 if err != nil { 383 return nil, fmt.Errorf("invalid rss %q: %v", fields[6], err) 384 } 385 info.VirtualSize, err = strconv.ParseUint(fields[7], 0, 64) 386 if err != nil { 387 return nil, fmt.Errorf("invalid virtual size %q: %v", fields[7], err) 388 } 389 // convert to bytes 390 info.RSS *= 1024 391 info.VirtualSize *= 1024 392 393 // According to `man ps`: The following user-defined format specifiers may contain spaces: args, cmd, comm, command, 394 // fname, ucmd, ucomm, lstart, bsdstart, start. 395 // Therefore we need to be able to parse comm that consists of multiple space-separated parts. 396 info.Cmd = strings.Join(fields[10:len(fields)-2], " ") 397 398 // These are last two parts of the line. We create a subslice of `fields` to handle comm that includes spaces. 399 lastTwoFields := fields[len(fields)-2:] 400 info.Psr, err = strconv.Atoi(lastTwoFields[0]) 401 if err != nil { 402 return nil, fmt.Errorf("invalid psr %q: %v", lastTwoFields[0], err) 403 } 404 info.CgroupPath = cd.getCgroupPath(lastTwoFields[1]) 405 406 // Remove the ps command we just ran from cadvisor container. 407 // Not necessary, but makes the cadvisor page look cleaner. 408 if !inHostNamespace && cadvisorContainer == info.CgroupPath && info.Cmd == "ps" { 409 return nil, nil 410 } 411 412 // Do not report processes from other containers when non-root container requested. 413 if !cd.isRoot() && info.CgroupPath != cd.info.Name { 414 return nil, nil 415 } 416 417 // Remove cgroup information when non-root container requested. 418 if !cd.isRoot() { 419 info.CgroupPath = "" 420 } 421 return &info, nil 422 } 423 424 func newContainerData(containerName string, memoryCache *memory.InMemoryCache, handler container.ContainerHandler, logUsage bool, collectorManager collector.CollectorManager, maxHousekeepingInterval time.Duration, allowDynamicHousekeeping bool, clock clock.Clock) (*containerData, error) { 425 if memoryCache == nil { 426 return nil, fmt.Errorf("nil memory storage") 427 } 428 if handler == nil { 429 return nil, fmt.Errorf("nil container handler") 430 } 431 ref, err := handler.ContainerReference() 432 if err != nil { 433 return nil, err 434 } 435 436 cont := &containerData{ 437 handler: handler, 438 memoryCache: memoryCache, 439 housekeepingInterval: *HousekeepingInterval, 440 maxHousekeepingInterval: maxHousekeepingInterval, 441 allowDynamicHousekeeping: allowDynamicHousekeeping, 442 logUsage: logUsage, 443 loadAvg: -1.0, // negative value indicates uninitialized. 444 stop: make(chan struct{}), 445 collectorManager: collectorManager, 446 onDemandChan: make(chan chan struct{}, 100), 447 clock: clock, 448 perfCollector: &stats.NoopCollector{}, 449 resctrlCollector: &stats.NoopCollector{}, 450 } 451 cont.info.ContainerReference = ref 452 453 cont.loadDecay = math.Exp(float64(-cont.housekeepingInterval.Seconds() / 10)) 454 455 if *enableLoadReader { 456 // Create cpu load reader. 457 loadReader, err := cpuload.New() 458 if err != nil { 459 klog.Warningf("Could not initialize cpu load reader for %q: %s", ref.Name, err) 460 } else { 461 cont.loadReader = loadReader 462 } 463 } 464 465 err = cont.updateSpec() 466 if err != nil { 467 return nil, err 468 } 469 cont.summaryReader, err = summary.New(cont.info.Spec) 470 if err != nil { 471 cont.summaryReader = nil 472 klog.V(5).Infof("Failed to create summary reader for %q: %v", ref.Name, err) 473 } 474 475 return cont, nil 476 } 477 478 // Determine when the next housekeeping should occur. 479 func (cd *containerData) nextHousekeepingInterval() time.Duration { 480 if cd.allowDynamicHousekeeping { 481 var empty time.Time 482 stats, err := cd.memoryCache.RecentStats(cd.info.Name, empty, empty, 2) 483 if err != nil { 484 if cd.allowErrorLogging() { 485 klog.V(4).Infof("Failed to get RecentStats(%q) while determining the next housekeeping: %v", cd.info.Name, err) 486 } 487 } else if len(stats) == 2 { 488 // TODO(vishnuk): Use no processes as a signal. 489 // Raise the interval if usage hasn't changed in the last housekeeping. 490 if stats[0].StatsEq(stats[1]) && (cd.housekeepingInterval < cd.maxHousekeepingInterval) { 491 cd.housekeepingInterval *= 2 492 if cd.housekeepingInterval > cd.maxHousekeepingInterval { 493 cd.housekeepingInterval = cd.maxHousekeepingInterval 494 } 495 } else if cd.housekeepingInterval != *HousekeepingInterval { 496 // Lower interval back to the baseline. 497 cd.housekeepingInterval = *HousekeepingInterval 498 } 499 } 500 } 501 502 return jitter(cd.housekeepingInterval, 1.0) 503 } 504 505 // TODO(vmarmol): Implement stats collecting as a custom collector. 506 func (cd *containerData) housekeeping() { 507 // Start any background goroutines - must be cleaned up in cd.handler.Cleanup(). 508 cd.handler.Start() 509 defer cd.handler.Cleanup() 510 511 // Initialize cpuload reader - must be cleaned up in cd.loadReader.Stop() 512 if cd.loadReader != nil { 513 err := cd.loadReader.Start() 514 if err != nil { 515 klog.Warningf("Could not start cpu load stat collector for %q: %s", cd.info.Name, err) 516 } 517 defer cd.loadReader.Stop() 518 } 519 520 // Long housekeeping is either 100ms or half of the housekeeping interval. 521 longHousekeeping := 100 * time.Millisecond 522 if *HousekeepingInterval/2 < longHousekeeping { 523 longHousekeeping = *HousekeepingInterval / 2 524 } 525 526 // Housekeep every second. 527 klog.V(3).Infof("Start housekeeping for container %q\n", cd.info.Name) 528 houseKeepingTimer := cd.clock.NewTimer(0 * time.Second) 529 defer houseKeepingTimer.Stop() 530 for { 531 if !cd.housekeepingTick(houseKeepingTimer.C(), longHousekeeping) { 532 return 533 } 534 // Stop and drain the timer so that it is safe to reset it 535 if !houseKeepingTimer.Stop() { 536 select { 537 case <-houseKeepingTimer.C(): 538 default: 539 } 540 } 541 // Log usage if asked to do so. 542 if cd.logUsage { 543 const numSamples = 60 544 var empty time.Time 545 stats, err := cd.memoryCache.RecentStats(cd.info.Name, empty, empty, numSamples) 546 if err != nil { 547 if cd.allowErrorLogging() { 548 klog.Warningf("[%s] Failed to get recent stats for logging usage: %v", cd.info.Name, err) 549 } 550 } else if len(stats) < numSamples { 551 // Ignore, not enough stats yet. 552 } else { 553 usageCPUNs := uint64(0) 554 for i := range stats { 555 if i > 0 { 556 usageCPUNs += stats[i].Cpu.Usage.Total - stats[i-1].Cpu.Usage.Total 557 } 558 } 559 usageMemory := stats[numSamples-1].Memory.Usage 560 561 instantUsageInCores := float64(stats[numSamples-1].Cpu.Usage.Total-stats[numSamples-2].Cpu.Usage.Total) / float64(stats[numSamples-1].Timestamp.Sub(stats[numSamples-2].Timestamp).Nanoseconds()) 562 usageInCores := float64(usageCPUNs) / float64(stats[numSamples-1].Timestamp.Sub(stats[0].Timestamp).Nanoseconds()) 563 usageInHuman := units.HumanSize(float64(usageMemory)) 564 // Don't set verbosity since this is already protected by the logUsage flag. 565 klog.Infof("[%s] %.3f cores (average: %.3f cores), %s of memory", cd.info.Name, instantUsageInCores, usageInCores, usageInHuman) 566 } 567 } 568 houseKeepingTimer.Reset(cd.nextHousekeepingInterval()) 569 } 570 } 571 572 func (cd *containerData) housekeepingTick(timer <-chan time.Time, longHousekeeping time.Duration) bool { 573 select { 574 case <-cd.stop: 575 // Stop housekeeping when signaled. 576 return false 577 case finishedChan := <-cd.onDemandChan: 578 // notify the calling function once housekeeping has completed 579 defer close(finishedChan) 580 case <-timer: 581 } 582 start := cd.clock.Now() 583 err := cd.updateStats() 584 if err != nil { 585 if cd.allowErrorLogging() { 586 klog.Warningf("Failed to update stats for container \"%s\": %s", cd.info.Name, err) 587 } 588 } 589 // Log if housekeeping took too long. 590 duration := cd.clock.Since(start) 591 if duration >= longHousekeeping { 592 klog.V(3).Infof("[%s] Housekeeping took %s", cd.info.Name, duration) 593 } 594 cd.notifyOnDemand() 595 cd.lock.Lock() 596 defer cd.lock.Unlock() 597 cd.statsLastUpdatedTime = cd.clock.Now() 598 return true 599 } 600 601 func (cd *containerData) updateSpec() error { 602 spec, err := cd.handler.GetSpec() 603 if err != nil { 604 // Ignore errors if the container is dead. 605 if !cd.handler.Exists() { 606 return nil 607 } 608 return err 609 } 610 611 customMetrics, err := cd.collectorManager.GetSpec() 612 if err != nil { 613 return err 614 } 615 if len(customMetrics) > 0 { 616 spec.HasCustomMetrics = true 617 spec.CustomMetrics = customMetrics 618 } 619 cd.lock.Lock() 620 defer cd.lock.Unlock() 621 cd.info.Spec = spec 622 return nil 623 } 624 625 // Calculate new smoothed load average using the new sample of runnable threads. 626 // The decay used ensures that the load will stabilize on a new constant value within 627 // 10 seconds. 628 func (cd *containerData) updateLoad(newLoad uint64) { 629 if cd.loadAvg < 0 { 630 cd.loadAvg = float64(newLoad) // initialize to the first seen sample for faster stabilization. 631 } else { 632 cd.loadAvg = cd.loadAvg*cd.loadDecay + float64(newLoad)*(1.0-cd.loadDecay) 633 } 634 } 635 636 func (cd *containerData) updateStats() error { 637 stats, statsErr := cd.handler.GetStats() 638 if statsErr != nil { 639 // Ignore errors if the container is dead. 640 if !cd.handler.Exists() { 641 return nil 642 } 643 644 // Stats may be partially populated, push those before we return an error. 645 statsErr = fmt.Errorf("%v, continuing to push stats", statsErr) 646 } 647 if stats == nil { 648 return statsErr 649 } 650 if cd.loadReader != nil { 651 // TODO(vmarmol): Cache this path. 652 path, err := cd.handler.GetCgroupPath("cpu") 653 if err == nil { 654 loadStats, err := cd.loadReader.GetCpuLoad(cd.info.Name, path) 655 if err != nil { 656 return fmt.Errorf("failed to get load stat for %q - path %q, error %s", cd.info.Name, path, err) 657 } 658 stats.TaskStats = loadStats 659 cd.updateLoad(loadStats.NrRunning) 660 // convert to 'milliLoad' to avoid floats and preserve precision. 661 stats.Cpu.LoadAverage = int32(cd.loadAvg * 1000) 662 } 663 } 664 if cd.summaryReader != nil { 665 err := cd.summaryReader.AddSample(*stats) 666 if err != nil { 667 // Ignore summary errors for now. 668 klog.V(2).Infof("Failed to add summary stats for %q: %v", cd.info.Name, err) 669 } 670 } 671 672 stats.OOMEvents = atomic.LoadUint64(&cd.oomEvents) 673 674 var customStatsErr error 675 cm := cd.collectorManager.(*collector.GenericCollectorManager) 676 if len(cm.Collectors) > 0 { 677 if cm.NextCollectionTime.Before(cd.clock.Now()) { 678 customStats, err := cd.updateCustomStats() 679 if customStats != nil { 680 stats.CustomMetrics = customStats 681 } 682 if err != nil { 683 customStatsErr = err 684 } 685 } 686 } 687 688 perfStatsErr := cd.perfCollector.UpdateStats(stats) 689 690 resctrlStatsErr := cd.resctrlCollector.UpdateStats(stats) 691 692 ref, err := cd.handler.ContainerReference() 693 if err != nil { 694 // Ignore errors if the container is dead. 695 if !cd.handler.Exists() { 696 return nil 697 } 698 return err 699 } 700 701 cInfo := info.ContainerInfo{ 702 ContainerReference: ref, 703 } 704 705 err = cd.memoryCache.AddStats(&cInfo, stats) 706 if err != nil { 707 return err 708 } 709 if statsErr != nil { 710 return statsErr 711 } 712 if perfStatsErr != nil { 713 klog.Errorf("error occurred while collecting perf stats for container %s: %s", cInfo.Name, err) 714 return perfStatsErr 715 } 716 if resctrlStatsErr != nil { 717 klog.Errorf("error occurred while collecting resctrl stats for container %s: %s", cInfo.Name, resctrlStatsErr) 718 return resctrlStatsErr 719 } 720 return customStatsErr 721 } 722 723 func (cd *containerData) updateCustomStats() (map[string][]info.MetricVal, error) { 724 _, customStats, customStatsErr := cd.collectorManager.Collect() 725 if customStatsErr != nil { 726 if !cd.handler.Exists() { 727 return customStats, nil 728 } 729 customStatsErr = fmt.Errorf("%v, continuing to push custom stats", customStatsErr) 730 } 731 return customStats, customStatsErr 732 } 733 734 func (cd *containerData) updateSubcontainers() error { 735 var subcontainers info.ContainerReferenceSlice 736 subcontainers, err := cd.handler.ListContainers(container.ListSelf) 737 if err != nil { 738 // Ignore errors if the container is dead. 739 if !cd.handler.Exists() { 740 return nil 741 } 742 return err 743 } 744 sort.Sort(subcontainers) 745 cd.lock.Lock() 746 defer cd.lock.Unlock() 747 cd.info.Subcontainers = subcontainers 748 return nil 749 }