github.com/google/cadvisor@v0.49.1/manager/container.go (about)

     1  // Copyright 2014 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package manager
    16  
    17  import (
    18  	"flag"
    19  	"fmt"
    20  	"math"
    21  	"math/rand"
    22  	"os"
    23  	"os/exec"
    24  	"path"
    25  	"regexp"
    26  	"sort"
    27  	"strconv"
    28  	"strings"
    29  	"sync"
    30  	"sync/atomic"
    31  	"time"
    32  
    33  	"github.com/google/cadvisor/cache/memory"
    34  	"github.com/google/cadvisor/collector"
    35  	"github.com/google/cadvisor/container"
    36  	info "github.com/google/cadvisor/info/v1"
    37  	v2 "github.com/google/cadvisor/info/v2"
    38  	"github.com/google/cadvisor/stats"
    39  	"github.com/google/cadvisor/summary"
    40  	"github.com/google/cadvisor/utils/cpuload"
    41  
    42  	"github.com/docker/go-units"
    43  
    44  	"k8s.io/klog/v2"
    45  	"k8s.io/utils/clock"
    46  )
    47  
    48  // Housekeeping interval.
    49  var enableLoadReader = flag.Bool("enable_load_reader", false, "Whether to enable cpu load reader")
    50  var HousekeepingInterval = flag.Duration("housekeeping_interval", 1*time.Second, "Interval between container housekeepings")
    51  
    52  // TODO: replace regular expressions with something simpler, such as strings.Split().
    53  // cgroup type chosen to fetch the cgroup path of a process.
    54  // Memory has been chosen, as it is one of the default cgroups that is enabled for most containers...
    55  var cgroupMemoryPathRegExp = regexp.MustCompile(`memory[^:]*:(.*?)[,;$]`)
    56  
    57  // ... but there are systems (e.g. Raspberry Pi 4) where memory cgroup controller is disabled by default.
    58  // We should check cpu cgroup then.
    59  var cgroupCPUPathRegExp = regexp.MustCompile(`cpu[^:]*:(.*?)[,;$]`)
    60  
    61  type containerInfo struct {
    62  	info.ContainerReference
    63  	Subcontainers []info.ContainerReference
    64  	Spec          info.ContainerSpec
    65  }
    66  
    67  type containerData struct {
    68  	oomEvents                uint64
    69  	handler                  container.ContainerHandler
    70  	info                     containerInfo
    71  	memoryCache              *memory.InMemoryCache
    72  	lock                     sync.Mutex
    73  	loadReader               cpuload.CpuLoadReader
    74  	summaryReader            *summary.StatsSummary
    75  	loadAvg                  float64 // smoothed load average seen so far.
    76  	housekeepingInterval     time.Duration
    77  	maxHousekeepingInterval  time.Duration
    78  	allowDynamicHousekeeping bool
    79  	infoLastUpdatedTime      time.Time
    80  	statsLastUpdatedTime     time.Time
    81  	lastErrorTime            time.Time
    82  	//  used to track time
    83  	clock clock.Clock
    84  
    85  	// Decay value used for load average smoothing. Interval length of 10 seconds is used.
    86  	loadDecay float64
    87  
    88  	// Whether to log the usage of this container when it is updated.
    89  	logUsage bool
    90  
    91  	// Tells the container to stop.
    92  	stop chan struct{}
    93  
    94  	// Tells the container to immediately collect stats
    95  	onDemandChan chan chan struct{}
    96  
    97  	// Runs custom metric collectors.
    98  	collectorManager collector.CollectorManager
    99  
   100  	// perfCollector updates stats for perf_event cgroup controller.
   101  	perfCollector stats.Collector
   102  
   103  	// resctrlCollector updates stats for resctrl controller.
   104  	resctrlCollector stats.Collector
   105  }
   106  
   107  // jitter returns a time.Duration between duration and duration + maxFactor * duration,
   108  // to allow clients to avoid converging on periodic behavior.  If maxFactor is 0.0, a
   109  // suggested default value will be chosen.
   110  func jitter(duration time.Duration, maxFactor float64) time.Duration {
   111  	if maxFactor <= 0.0 {
   112  		maxFactor = 1.0
   113  	}
   114  	wait := duration + time.Duration(rand.Float64()*maxFactor*float64(duration))
   115  	return wait
   116  }
   117  
   118  func (cd *containerData) Start() error {
   119  	go cd.housekeeping()
   120  	return nil
   121  }
   122  
   123  func (cd *containerData) Stop() error {
   124  	err := cd.memoryCache.RemoveContainer(cd.info.Name)
   125  	if err != nil {
   126  		return err
   127  	}
   128  	close(cd.stop)
   129  	cd.perfCollector.Destroy()
   130  	cd.resctrlCollector.Destroy()
   131  	return nil
   132  }
   133  
   134  func (cd *containerData) allowErrorLogging() bool {
   135  	if cd.clock.Since(cd.lastErrorTime) > time.Minute {
   136  		cd.lastErrorTime = cd.clock.Now()
   137  		return true
   138  	}
   139  	return false
   140  }
   141  
   142  // OnDemandHousekeeping performs housekeeping on the container and blocks until it has completed.
   143  // It is designed to be used in conjunction with periodic housekeeping, and will cause the timer for
   144  // periodic housekeeping to reset.  This should be used sparingly, as calling OnDemandHousekeeping frequently
   145  // can have serious performance costs.
   146  func (cd *containerData) OnDemandHousekeeping(maxAge time.Duration) {
   147  	cd.lock.Lock()
   148  	timeSinceStatsLastUpdate := cd.clock.Since(cd.statsLastUpdatedTime)
   149  	cd.lock.Unlock()
   150  	if timeSinceStatsLastUpdate > maxAge {
   151  		housekeepingFinishedChan := make(chan struct{})
   152  		cd.onDemandChan <- housekeepingFinishedChan
   153  		select {
   154  		case <-cd.stop:
   155  		case <-housekeepingFinishedChan:
   156  		}
   157  	}
   158  }
   159  
   160  // notifyOnDemand notifies all calls to OnDemandHousekeeping that housekeeping is finished
   161  func (cd *containerData) notifyOnDemand() {
   162  	for {
   163  		select {
   164  		case finishedChan := <-cd.onDemandChan:
   165  			close(finishedChan)
   166  		default:
   167  			return
   168  		}
   169  	}
   170  }
   171  
   172  func (cd *containerData) GetInfo(shouldUpdateSubcontainers bool) (*containerInfo, error) {
   173  	// Get spec and subcontainers.
   174  	if cd.clock.Since(cd.infoLastUpdatedTime) > 5*time.Second || shouldUpdateSubcontainers {
   175  		err := cd.updateSpec()
   176  		if err != nil {
   177  			return nil, err
   178  		}
   179  		if shouldUpdateSubcontainers {
   180  			err = cd.updateSubcontainers()
   181  			if err != nil {
   182  				return nil, err
   183  			}
   184  		}
   185  		cd.infoLastUpdatedTime = cd.clock.Now()
   186  	}
   187  	cd.lock.Lock()
   188  	defer cd.lock.Unlock()
   189  	cInfo := containerInfo{
   190  		Subcontainers: cd.info.Subcontainers,
   191  		Spec:          cd.info.Spec,
   192  	}
   193  	cInfo.Id = cd.info.Id
   194  	cInfo.Name = cd.info.Name
   195  	cInfo.Aliases = cd.info.Aliases
   196  	cInfo.Namespace = cd.info.Namespace
   197  	return &cInfo, nil
   198  }
   199  
   200  func (cd *containerData) DerivedStats() (v2.DerivedStats, error) {
   201  	if cd.summaryReader == nil {
   202  		return v2.DerivedStats{}, fmt.Errorf("derived stats not enabled for container %q", cd.info.Name)
   203  	}
   204  	return cd.summaryReader.DerivedStats()
   205  }
   206  
   207  func (cd *containerData) getCgroupPath(cgroups string) string {
   208  	if cgroups == "-" {
   209  		return "/"
   210  	}
   211  	if strings.HasPrefix(cgroups, "0::") {
   212  		return cgroups[3:]
   213  	}
   214  	matches := cgroupMemoryPathRegExp.FindSubmatch([]byte(cgroups))
   215  	if len(matches) != 2 {
   216  		klog.V(3).Infof(
   217  			"failed to get memory cgroup path from %q, will try to get cpu cgroup path",
   218  			cgroups,
   219  		)
   220  		// On some systems (e.g. Raspberry PI 4) cgroup memory controlled is disabled by default.
   221  		matches = cgroupCPUPathRegExp.FindSubmatch([]byte(cgroups))
   222  		if len(matches) != 2 {
   223  			klog.V(3).Infof("failed to get cpu cgroup path from %q; assuming root cgroup", cgroups)
   224  			// return root in case of failures - memory hierarchy might not be enabled.
   225  			return "/"
   226  		}
   227  	}
   228  	return string(matches[1])
   229  }
   230  
   231  // Returns contents of a file inside the container root.
   232  // Takes in a path relative to container root.
   233  func (cd *containerData) ReadFile(filepath string, inHostNamespace bool) ([]byte, error) {
   234  	pids, err := cd.getContainerPids(inHostNamespace)
   235  	if err != nil {
   236  		return nil, err
   237  	}
   238  	// TODO(rjnagal): Optimize by just reading container's cgroup.proc file when in host namespace.
   239  	rootfs := "/"
   240  	if !inHostNamespace {
   241  		rootfs = "/rootfs"
   242  	}
   243  	for _, pid := range pids {
   244  		filePath := path.Join(rootfs, "/proc", pid, "/root", filepath)
   245  		klog.V(3).Infof("Trying path %q", filePath)
   246  		data, err := os.ReadFile(filePath)
   247  		if err == nil {
   248  			return data, err
   249  		}
   250  	}
   251  	// No process paths could be found. Declare config non-existent.
   252  	return nil, fmt.Errorf("file %q does not exist", filepath)
   253  }
   254  
   255  // Return output for ps command in host /proc with specified format
   256  func (cd *containerData) getPsOutput(inHostNamespace bool, format string) ([]byte, error) {
   257  	args := []string{}
   258  	command := "ps"
   259  	if !inHostNamespace {
   260  		command = "/usr/sbin/chroot"
   261  		args = append(args, "/rootfs", "ps")
   262  	}
   263  	args = append(args, "-e", "-o", format)
   264  	out, err := exec.Command(command, args...).Output()
   265  	if err != nil {
   266  		return nil, fmt.Errorf("failed to execute %q command: %v", command, err)
   267  	}
   268  	return out, err
   269  }
   270  
   271  // Get pids of processes in this container.
   272  // A slightly lighterweight call than GetProcessList if other details are not required.
   273  func (cd *containerData) getContainerPids(inHostNamespace bool) ([]string, error) {
   274  	format := "pid,cgroup"
   275  	out, err := cd.getPsOutput(inHostNamespace, format)
   276  	if err != nil {
   277  		return nil, err
   278  	}
   279  	expectedFields := 2
   280  	lines := strings.Split(string(out), "\n")
   281  	pids := []string{}
   282  	for _, line := range lines[1:] {
   283  		if len(line) == 0 {
   284  			continue
   285  		}
   286  		fields := strings.Fields(line)
   287  		if len(fields) < expectedFields {
   288  			return nil, fmt.Errorf("expected at least %d fields, found %d: output: %q", expectedFields, len(fields), line)
   289  		}
   290  		pid := fields[0]
   291  		cgroup := cd.getCgroupPath(fields[1])
   292  		if cd.info.Name == cgroup {
   293  			pids = append(pids, pid)
   294  		}
   295  	}
   296  	return pids, nil
   297  }
   298  
   299  func (cd *containerData) GetProcessList(cadvisorContainer string, inHostNamespace bool) ([]v2.ProcessInfo, error) {
   300  	format := "user,pid,ppid,stime,pcpu,pmem,rss,vsz,stat,time,comm,psr,cgroup"
   301  	out, err := cd.getPsOutput(inHostNamespace, format)
   302  	if err != nil {
   303  		return nil, err
   304  	}
   305  	return cd.parseProcessList(cadvisorContainer, inHostNamespace, out)
   306  }
   307  
   308  func (cd *containerData) parseProcessList(cadvisorContainer string, inHostNamespace bool, out []byte) ([]v2.ProcessInfo, error) {
   309  	rootfs := "/"
   310  	if !inHostNamespace {
   311  		rootfs = "/rootfs"
   312  	}
   313  	processes := []v2.ProcessInfo{}
   314  	lines := strings.Split(string(out), "\n")
   315  	for _, line := range lines[1:] {
   316  		processInfo, err := cd.parsePsLine(line, cadvisorContainer, inHostNamespace)
   317  		if err != nil {
   318  			return nil, fmt.Errorf("could not parse line %s: %v", line, err)
   319  		}
   320  		if processInfo == nil {
   321  			continue
   322  		}
   323  
   324  		var fdCount int
   325  		dirPath := path.Join(rootfs, "/proc", strconv.Itoa(processInfo.Pid), "fd")
   326  		fds, err := os.ReadDir(dirPath)
   327  		if err != nil {
   328  			klog.V(4).Infof("error while listing directory %q to measure fd count: %v", dirPath, err)
   329  			continue
   330  		}
   331  		fdCount = len(fds)
   332  		processInfo.FdCount = fdCount
   333  
   334  		processes = append(processes, *processInfo)
   335  	}
   336  	return processes, nil
   337  }
   338  
   339  func (cd *containerData) isRoot() bool {
   340  	return cd.info.Name == "/"
   341  }
   342  
   343  func (cd *containerData) parsePsLine(line, cadvisorContainer string, inHostNamespace bool) (*v2.ProcessInfo, error) {
   344  	const expectedFields = 13
   345  	if len(line) == 0 {
   346  		return nil, nil
   347  	}
   348  
   349  	info := v2.ProcessInfo{}
   350  	var err error
   351  
   352  	fields := strings.Fields(line)
   353  	if len(fields) < expectedFields {
   354  		return nil, fmt.Errorf("expected at least %d fields, found %d: output: %q", expectedFields, len(fields), line)
   355  	}
   356  	info.User = fields[0]
   357  	info.StartTime = fields[3]
   358  	info.Status = fields[8]
   359  	info.RunningTime = fields[9]
   360  
   361  	info.Pid, err = strconv.Atoi(fields[1])
   362  	if err != nil {
   363  		return nil, fmt.Errorf("invalid pid %q: %v", fields[1], err)
   364  	}
   365  	info.Ppid, err = strconv.Atoi(fields[2])
   366  	if err != nil {
   367  		return nil, fmt.Errorf("invalid ppid %q: %v", fields[2], err)
   368  	}
   369  
   370  	percentCPU, err := strconv.ParseFloat(fields[4], 32)
   371  	if err != nil {
   372  		return nil, fmt.Errorf("invalid cpu percent %q: %v", fields[4], err)
   373  	}
   374  	info.PercentCpu = float32(percentCPU)
   375  	percentMem, err := strconv.ParseFloat(fields[5], 32)
   376  	if err != nil {
   377  		return nil, fmt.Errorf("invalid memory percent %q: %v", fields[5], err)
   378  	}
   379  	info.PercentMemory = float32(percentMem)
   380  
   381  	info.RSS, err = strconv.ParseUint(fields[6], 0, 64)
   382  	if err != nil {
   383  		return nil, fmt.Errorf("invalid rss %q: %v", fields[6], err)
   384  	}
   385  	info.VirtualSize, err = strconv.ParseUint(fields[7], 0, 64)
   386  	if err != nil {
   387  		return nil, fmt.Errorf("invalid virtual size %q: %v", fields[7], err)
   388  	}
   389  	// convert to bytes
   390  	info.RSS *= 1024
   391  	info.VirtualSize *= 1024
   392  
   393  	// According to `man ps`: The following user-defined format specifiers may contain spaces: args, cmd, comm, command,
   394  	// fname, ucmd, ucomm, lstart, bsdstart, start.
   395  	// Therefore we need to be able to parse comm that consists of multiple space-separated parts.
   396  	info.Cmd = strings.Join(fields[10:len(fields)-2], " ")
   397  
   398  	// These are last two parts of the line. We create a subslice of `fields` to handle comm that includes spaces.
   399  	lastTwoFields := fields[len(fields)-2:]
   400  	info.Psr, err = strconv.Atoi(lastTwoFields[0])
   401  	if err != nil {
   402  		return nil, fmt.Errorf("invalid psr %q: %v", lastTwoFields[0], err)
   403  	}
   404  	info.CgroupPath = cd.getCgroupPath(lastTwoFields[1])
   405  
   406  	// Remove the ps command we just ran from cadvisor container.
   407  	// Not necessary, but makes the cadvisor page look cleaner.
   408  	if !inHostNamespace && cadvisorContainer == info.CgroupPath && info.Cmd == "ps" {
   409  		return nil, nil
   410  	}
   411  
   412  	// Do not report processes from other containers when non-root container requested.
   413  	if !cd.isRoot() && info.CgroupPath != cd.info.Name {
   414  		return nil, nil
   415  	}
   416  
   417  	// Remove cgroup information when non-root container requested.
   418  	if !cd.isRoot() {
   419  		info.CgroupPath = ""
   420  	}
   421  	return &info, nil
   422  }
   423  
   424  func newContainerData(containerName string, memoryCache *memory.InMemoryCache, handler container.ContainerHandler, logUsage bool, collectorManager collector.CollectorManager, maxHousekeepingInterval time.Duration, allowDynamicHousekeeping bool, clock clock.Clock) (*containerData, error) {
   425  	if memoryCache == nil {
   426  		return nil, fmt.Errorf("nil memory storage")
   427  	}
   428  	if handler == nil {
   429  		return nil, fmt.Errorf("nil container handler")
   430  	}
   431  	ref, err := handler.ContainerReference()
   432  	if err != nil {
   433  		return nil, err
   434  	}
   435  
   436  	cont := &containerData{
   437  		handler:                  handler,
   438  		memoryCache:              memoryCache,
   439  		housekeepingInterval:     *HousekeepingInterval,
   440  		maxHousekeepingInterval:  maxHousekeepingInterval,
   441  		allowDynamicHousekeeping: allowDynamicHousekeeping,
   442  		logUsage:                 logUsage,
   443  		loadAvg:                  -1.0, // negative value indicates uninitialized.
   444  		stop:                     make(chan struct{}),
   445  		collectorManager:         collectorManager,
   446  		onDemandChan:             make(chan chan struct{}, 100),
   447  		clock:                    clock,
   448  		perfCollector:            &stats.NoopCollector{},
   449  		resctrlCollector:         &stats.NoopCollector{},
   450  	}
   451  	cont.info.ContainerReference = ref
   452  
   453  	cont.loadDecay = math.Exp(float64(-cont.housekeepingInterval.Seconds() / 10))
   454  
   455  	if *enableLoadReader {
   456  		// Create cpu load reader.
   457  		loadReader, err := cpuload.New()
   458  		if err != nil {
   459  			klog.Warningf("Could not initialize cpu load reader for %q: %s", ref.Name, err)
   460  		} else {
   461  			cont.loadReader = loadReader
   462  		}
   463  	}
   464  
   465  	err = cont.updateSpec()
   466  	if err != nil {
   467  		return nil, err
   468  	}
   469  	cont.summaryReader, err = summary.New(cont.info.Spec)
   470  	if err != nil {
   471  		cont.summaryReader = nil
   472  		klog.V(5).Infof("Failed to create summary reader for %q: %v", ref.Name, err)
   473  	}
   474  
   475  	return cont, nil
   476  }
   477  
   478  // Determine when the next housekeeping should occur.
   479  func (cd *containerData) nextHousekeepingInterval() time.Duration {
   480  	if cd.allowDynamicHousekeeping {
   481  		var empty time.Time
   482  		stats, err := cd.memoryCache.RecentStats(cd.info.Name, empty, empty, 2)
   483  		if err != nil {
   484  			if cd.allowErrorLogging() {
   485  				klog.V(4).Infof("Failed to get RecentStats(%q) while determining the next housekeeping: %v", cd.info.Name, err)
   486  			}
   487  		} else if len(stats) == 2 {
   488  			// TODO(vishnuk): Use no processes as a signal.
   489  			// Raise the interval if usage hasn't changed in the last housekeeping.
   490  			if stats[0].StatsEq(stats[1]) && (cd.housekeepingInterval < cd.maxHousekeepingInterval) {
   491  				cd.housekeepingInterval *= 2
   492  				if cd.housekeepingInterval > cd.maxHousekeepingInterval {
   493  					cd.housekeepingInterval = cd.maxHousekeepingInterval
   494  				}
   495  			} else if cd.housekeepingInterval != *HousekeepingInterval {
   496  				// Lower interval back to the baseline.
   497  				cd.housekeepingInterval = *HousekeepingInterval
   498  			}
   499  		}
   500  	}
   501  
   502  	return jitter(cd.housekeepingInterval, 1.0)
   503  }
   504  
   505  // TODO(vmarmol): Implement stats collecting as a custom collector.
   506  func (cd *containerData) housekeeping() {
   507  	// Start any background goroutines - must be cleaned up in cd.handler.Cleanup().
   508  	cd.handler.Start()
   509  	defer cd.handler.Cleanup()
   510  
   511  	// Initialize cpuload reader - must be cleaned up in cd.loadReader.Stop()
   512  	if cd.loadReader != nil {
   513  		err := cd.loadReader.Start()
   514  		if err != nil {
   515  			klog.Warningf("Could not start cpu load stat collector for %q: %s", cd.info.Name, err)
   516  		}
   517  		defer cd.loadReader.Stop()
   518  	}
   519  
   520  	// Long housekeeping is either 100ms or half of the housekeeping interval.
   521  	longHousekeeping := 100 * time.Millisecond
   522  	if *HousekeepingInterval/2 < longHousekeeping {
   523  		longHousekeeping = *HousekeepingInterval / 2
   524  	}
   525  
   526  	// Housekeep every second.
   527  	klog.V(3).Infof("Start housekeeping for container %q\n", cd.info.Name)
   528  	houseKeepingTimer := cd.clock.NewTimer(0 * time.Second)
   529  	defer houseKeepingTimer.Stop()
   530  	for {
   531  		if !cd.housekeepingTick(houseKeepingTimer.C(), longHousekeeping) {
   532  			return
   533  		}
   534  		// Stop and drain the timer so that it is safe to reset it
   535  		if !houseKeepingTimer.Stop() {
   536  			select {
   537  			case <-houseKeepingTimer.C():
   538  			default:
   539  			}
   540  		}
   541  		// Log usage if asked to do so.
   542  		if cd.logUsage {
   543  			const numSamples = 60
   544  			var empty time.Time
   545  			stats, err := cd.memoryCache.RecentStats(cd.info.Name, empty, empty, numSamples)
   546  			if err != nil {
   547  				if cd.allowErrorLogging() {
   548  					klog.Warningf("[%s] Failed to get recent stats for logging usage: %v", cd.info.Name, err)
   549  				}
   550  			} else if len(stats) < numSamples {
   551  				// Ignore, not enough stats yet.
   552  			} else {
   553  				usageCPUNs := uint64(0)
   554  				for i := range stats {
   555  					if i > 0 {
   556  						usageCPUNs += stats[i].Cpu.Usage.Total - stats[i-1].Cpu.Usage.Total
   557  					}
   558  				}
   559  				usageMemory := stats[numSamples-1].Memory.Usage
   560  
   561  				instantUsageInCores := float64(stats[numSamples-1].Cpu.Usage.Total-stats[numSamples-2].Cpu.Usage.Total) / float64(stats[numSamples-1].Timestamp.Sub(stats[numSamples-2].Timestamp).Nanoseconds())
   562  				usageInCores := float64(usageCPUNs) / float64(stats[numSamples-1].Timestamp.Sub(stats[0].Timestamp).Nanoseconds())
   563  				usageInHuman := units.HumanSize(float64(usageMemory))
   564  				// Don't set verbosity since this is already protected by the logUsage flag.
   565  				klog.Infof("[%s] %.3f cores (average: %.3f cores), %s of memory", cd.info.Name, instantUsageInCores, usageInCores, usageInHuman)
   566  			}
   567  		}
   568  		houseKeepingTimer.Reset(cd.nextHousekeepingInterval())
   569  	}
   570  }
   571  
   572  func (cd *containerData) housekeepingTick(timer <-chan time.Time, longHousekeeping time.Duration) bool {
   573  	select {
   574  	case <-cd.stop:
   575  		// Stop housekeeping when signaled.
   576  		return false
   577  	case finishedChan := <-cd.onDemandChan:
   578  		// notify the calling function once housekeeping has completed
   579  		defer close(finishedChan)
   580  	case <-timer:
   581  	}
   582  	start := cd.clock.Now()
   583  	err := cd.updateStats()
   584  	if err != nil {
   585  		if cd.allowErrorLogging() {
   586  			klog.Warningf("Failed to update stats for container \"%s\": %s", cd.info.Name, err)
   587  		}
   588  	}
   589  	// Log if housekeeping took too long.
   590  	duration := cd.clock.Since(start)
   591  	if duration >= longHousekeeping {
   592  		klog.V(3).Infof("[%s] Housekeeping took %s", cd.info.Name, duration)
   593  	}
   594  	cd.notifyOnDemand()
   595  	cd.lock.Lock()
   596  	defer cd.lock.Unlock()
   597  	cd.statsLastUpdatedTime = cd.clock.Now()
   598  	return true
   599  }
   600  
   601  func (cd *containerData) updateSpec() error {
   602  	spec, err := cd.handler.GetSpec()
   603  	if err != nil {
   604  		// Ignore errors if the container is dead.
   605  		if !cd.handler.Exists() {
   606  			return nil
   607  		}
   608  		return err
   609  	}
   610  
   611  	customMetrics, err := cd.collectorManager.GetSpec()
   612  	if err != nil {
   613  		return err
   614  	}
   615  	if len(customMetrics) > 0 {
   616  		spec.HasCustomMetrics = true
   617  		spec.CustomMetrics = customMetrics
   618  	}
   619  	cd.lock.Lock()
   620  	defer cd.lock.Unlock()
   621  	cd.info.Spec = spec
   622  	return nil
   623  }
   624  
   625  // Calculate new smoothed load average using the new sample of runnable threads.
   626  // The decay used ensures that the load will stabilize on a new constant value within
   627  // 10 seconds.
   628  func (cd *containerData) updateLoad(newLoad uint64) {
   629  	if cd.loadAvg < 0 {
   630  		cd.loadAvg = float64(newLoad) // initialize to the first seen sample for faster stabilization.
   631  	} else {
   632  		cd.loadAvg = cd.loadAvg*cd.loadDecay + float64(newLoad)*(1.0-cd.loadDecay)
   633  	}
   634  }
   635  
   636  func (cd *containerData) updateStats() error {
   637  	stats, statsErr := cd.handler.GetStats()
   638  	if statsErr != nil {
   639  		// Ignore errors if the container is dead.
   640  		if !cd.handler.Exists() {
   641  			return nil
   642  		}
   643  
   644  		// Stats may be partially populated, push those before we return an error.
   645  		statsErr = fmt.Errorf("%v, continuing to push stats", statsErr)
   646  	}
   647  	if stats == nil {
   648  		return statsErr
   649  	}
   650  	if cd.loadReader != nil {
   651  		// TODO(vmarmol): Cache this path.
   652  		path, err := cd.handler.GetCgroupPath("cpu")
   653  		if err == nil {
   654  			loadStats, err := cd.loadReader.GetCpuLoad(cd.info.Name, path)
   655  			if err != nil {
   656  				return fmt.Errorf("failed to get load stat for %q - path %q, error %s", cd.info.Name, path, err)
   657  			}
   658  			stats.TaskStats = loadStats
   659  			cd.updateLoad(loadStats.NrRunning)
   660  			// convert to 'milliLoad' to avoid floats and preserve precision.
   661  			stats.Cpu.LoadAverage = int32(cd.loadAvg * 1000)
   662  		}
   663  	}
   664  	if cd.summaryReader != nil {
   665  		err := cd.summaryReader.AddSample(*stats)
   666  		if err != nil {
   667  			// Ignore summary errors for now.
   668  			klog.V(2).Infof("Failed to add summary stats for %q: %v", cd.info.Name, err)
   669  		}
   670  	}
   671  
   672  	stats.OOMEvents = atomic.LoadUint64(&cd.oomEvents)
   673  
   674  	var customStatsErr error
   675  	cm := cd.collectorManager.(*collector.GenericCollectorManager)
   676  	if len(cm.Collectors) > 0 {
   677  		if cm.NextCollectionTime.Before(cd.clock.Now()) {
   678  			customStats, err := cd.updateCustomStats()
   679  			if customStats != nil {
   680  				stats.CustomMetrics = customStats
   681  			}
   682  			if err != nil {
   683  				customStatsErr = err
   684  			}
   685  		}
   686  	}
   687  
   688  	perfStatsErr := cd.perfCollector.UpdateStats(stats)
   689  
   690  	resctrlStatsErr := cd.resctrlCollector.UpdateStats(stats)
   691  
   692  	ref, err := cd.handler.ContainerReference()
   693  	if err != nil {
   694  		// Ignore errors if the container is dead.
   695  		if !cd.handler.Exists() {
   696  			return nil
   697  		}
   698  		return err
   699  	}
   700  
   701  	cInfo := info.ContainerInfo{
   702  		ContainerReference: ref,
   703  	}
   704  
   705  	err = cd.memoryCache.AddStats(&cInfo, stats)
   706  	if err != nil {
   707  		return err
   708  	}
   709  	if statsErr != nil {
   710  		return statsErr
   711  	}
   712  	if perfStatsErr != nil {
   713  		klog.Errorf("error occurred while collecting perf stats for container %s: %s", cInfo.Name, err)
   714  		return perfStatsErr
   715  	}
   716  	if resctrlStatsErr != nil {
   717  		klog.Errorf("error occurred while collecting resctrl stats for container %s: %s", cInfo.Name, resctrlStatsErr)
   718  		return resctrlStatsErr
   719  	}
   720  	return customStatsErr
   721  }
   722  
   723  func (cd *containerData) updateCustomStats() (map[string][]info.MetricVal, error) {
   724  	_, customStats, customStatsErr := cd.collectorManager.Collect()
   725  	if customStatsErr != nil {
   726  		if !cd.handler.Exists() {
   727  			return customStats, nil
   728  		}
   729  		customStatsErr = fmt.Errorf("%v, continuing to push custom stats", customStatsErr)
   730  	}
   731  	return customStats, customStatsErr
   732  }
   733  
   734  func (cd *containerData) updateSubcontainers() error {
   735  	var subcontainers info.ContainerReferenceSlice
   736  	subcontainers, err := cd.handler.ListContainers(container.ListSelf)
   737  	if err != nil {
   738  		// Ignore errors if the container is dead.
   739  		if !cd.handler.Exists() {
   740  			return nil
   741  		}
   742  		return err
   743  	}
   744  	sort.Sort(subcontainers)
   745  	cd.lock.Lock()
   746  	defer cd.lock.Unlock()
   747  	cd.info.Subcontainers = subcontainers
   748  	return nil
   749  }