github.com/google/cadvisor@v0.49.1/manager/manager.go (about)

     1  // Copyright 2014 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Manager of cAdvisor-monitored containers.
    16  package manager
    17  
    18  import (
    19  	"flag"
    20  	"fmt"
    21  	"net/http"
    22  	"os"
    23  	"path"
    24  	"strconv"
    25  	"strings"
    26  	"sync"
    27  	"sync/atomic"
    28  	"time"
    29  
    30  	"github.com/google/cadvisor/cache/memory"
    31  	"github.com/google/cadvisor/collector"
    32  	"github.com/google/cadvisor/container"
    33  	"github.com/google/cadvisor/container/raw"
    34  	"github.com/google/cadvisor/events"
    35  	"github.com/google/cadvisor/fs"
    36  	info "github.com/google/cadvisor/info/v1"
    37  	v2 "github.com/google/cadvisor/info/v2"
    38  	"github.com/google/cadvisor/machine"
    39  	"github.com/google/cadvisor/nvm"
    40  	"github.com/google/cadvisor/perf"
    41  	"github.com/google/cadvisor/resctrl"
    42  	"github.com/google/cadvisor/stats"
    43  	"github.com/google/cadvisor/utils/oomparser"
    44  	"github.com/google/cadvisor/utils/sysfs"
    45  	"github.com/google/cadvisor/version"
    46  	"github.com/google/cadvisor/watcher"
    47  
    48  	"github.com/opencontainers/runc/libcontainer/cgroups"
    49  
    50  	"k8s.io/klog/v2"
    51  	"k8s.io/utils/clock"
    52  )
    53  
    54  var globalHousekeepingInterval = flag.Duration("global_housekeeping_interval", 1*time.Minute, "Interval between global housekeepings")
    55  var updateMachineInfoInterval = flag.Duration("update_machine_info_interval", 5*time.Minute, "Interval between machine info updates.")
    56  var logCadvisorUsage = flag.Bool("log_cadvisor_usage", false, "Whether to log the usage of the cAdvisor container")
    57  var eventStorageAgeLimit = flag.String("event_storage_age_limit", "default=24h", "Max length of time for which to store events (per type). Value is a comma separated list of key values, where the keys are event types (e.g.: creation, oom) or \"default\" and the value is a duration. Default is applied to all non-specified event types")
    58  var eventStorageEventLimit = flag.String("event_storage_event_limit", "default=100000", "Max number of events to store (per type). Value is a comma separated list of key values, where the keys are event types (e.g.: creation, oom) or \"default\" and the value is an integer. Default is applied to all non-specified event types")
    59  var applicationMetricsCountLimit = flag.Int("application_metrics_count_limit", 100, "Max number of application metrics to store (per container)")
    60  
    61  // The namespace under which aliases are unique.
    62  const (
    63  	DockerNamespace = "docker"
    64  	PodmanNamespace = "podman"
    65  )
    66  
    67  var HousekeepingConfigFlags = HousekeepingConfig{
    68  	flag.Duration("max_housekeeping_interval", 60*time.Second, "Largest interval to allow between container housekeepings"),
    69  	flag.Bool("allow_dynamic_housekeeping", true, "Whether to allow the housekeeping interval to be dynamic"),
    70  }
    71  
    72  // The Manager interface defines operations for starting a manager and getting
    73  // container and machine information.
    74  type Manager interface {
    75  	// Start the manager. Calling other manager methods before this returns
    76  	// may produce undefined behavior.
    77  	Start() error
    78  
    79  	// Stops the manager.
    80  	Stop() error
    81  
    82  	//  information about a container.
    83  	GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error)
    84  
    85  	// Get V2 information about a container.
    86  	// Recursive (subcontainer) requests are best-effort, and may return a partial result alongside an
    87  	// error in the partial failure case.
    88  	GetContainerInfoV2(containerName string, options v2.RequestOptions) (map[string]v2.ContainerInfo, error)
    89  
    90  	// Get information about all subcontainers of the specified container (includes self).
    91  	SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error)
    92  
    93  	// Gets all the Docker containers. Return is a map from full container name to ContainerInfo.
    94  	AllDockerContainers(query *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error)
    95  
    96  	// Gets information about a specific Docker container. The specified name is within the Docker namespace.
    97  	DockerContainer(dockerName string, query *info.ContainerInfoRequest) (info.ContainerInfo, error)
    98  
    99  	// Gets spec for all containers based on request options.
   100  	GetContainerSpec(containerName string, options v2.RequestOptions) (map[string]v2.ContainerSpec, error)
   101  
   102  	// Gets summary stats for all containers based on request options.
   103  	GetDerivedStats(containerName string, options v2.RequestOptions) (map[string]v2.DerivedStats, error)
   104  
   105  	// Get info for all requested containers based on the request options.
   106  	GetRequestedContainersInfo(containerName string, options v2.RequestOptions) (map[string]*info.ContainerInfo, error)
   107  
   108  	// Returns true if the named container exists.
   109  	Exists(containerName string) bool
   110  
   111  	// Get information about the machine.
   112  	GetMachineInfo() (*info.MachineInfo, error)
   113  
   114  	// Get version information about different components we depend on.
   115  	GetVersionInfo() (*info.VersionInfo, error)
   116  
   117  	// GetFsInfoByFsUUID returns the information of the device having the
   118  	// specified filesystem uuid. If no such device with the UUID exists, this
   119  	// function will return the fs.ErrNoSuchDevice error.
   120  	GetFsInfoByFsUUID(uuid string) (v2.FsInfo, error)
   121  
   122  	// Get filesystem information for the filesystem that contains the given directory
   123  	GetDirFsInfo(dir string) (v2.FsInfo, error)
   124  
   125  	// Get filesystem information for a given label.
   126  	// Returns information for all global filesystems if label is empty.
   127  	GetFsInfo(label string) ([]v2.FsInfo, error)
   128  
   129  	// Get ps output for a container.
   130  	GetProcessList(containerName string, options v2.RequestOptions) ([]v2.ProcessInfo, error)
   131  
   132  	// Get events streamed through passedChannel that fit the request.
   133  	WatchForEvents(request *events.Request) (*events.EventChannel, error)
   134  
   135  	// Get past events that have been detected and that fit the request.
   136  	GetPastEvents(request *events.Request) ([]*info.Event, error)
   137  
   138  	CloseEventChannel(watchID int)
   139  
   140  	// Returns debugging information. Map of lines per category.
   141  	DebugInfo() map[string][]string
   142  
   143  	AllPodmanContainers(c *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error)
   144  
   145  	PodmanContainer(containerName string, query *info.ContainerInfoRequest) (info.ContainerInfo, error)
   146  }
   147  
   148  // Housekeeping configuration for the manager
   149  type HousekeepingConfig = struct {
   150  	Interval     *time.Duration
   151  	AllowDynamic *bool
   152  }
   153  
   154  // New takes a memory storage and returns a new manager.
   155  func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, HousekeepingConfig HousekeepingConfig, includedMetricsSet container.MetricSet, collectorHTTPClient *http.Client, rawContainerCgroupPathPrefixWhiteList, containerEnvMetadataWhiteList []string, perfEventsFile string, resctrlInterval time.Duration) (Manager, error) {
   156  	if memoryCache == nil {
   157  		return nil, fmt.Errorf("manager requires memory storage")
   158  	}
   159  
   160  	// Detect the container we are running on.
   161  	selfContainer := "/"
   162  	var err error
   163  	// Avoid using GetOwnCgroupPath on cgroup v2 as it is not supported by libcontainer
   164  	if !cgroups.IsCgroup2UnifiedMode() {
   165  		selfContainer, err = cgroups.GetOwnCgroup("cpu")
   166  		if err != nil {
   167  			return nil, err
   168  		}
   169  		klog.V(2).Infof("cAdvisor running in container: %q", selfContainer)
   170  	}
   171  
   172  	context := fs.Context{}
   173  
   174  	if err := container.InitializeFSContext(&context); err != nil {
   175  		return nil, err
   176  	}
   177  
   178  	fsInfo, err := fs.NewFsInfo(context)
   179  	if err != nil {
   180  		return nil, err
   181  	}
   182  
   183  	// If cAdvisor was started with host's rootfs mounted, assume that its running
   184  	// in its own namespaces.
   185  	inHostNamespace := false
   186  	if _, err := os.Stat("/rootfs/proc"); os.IsNotExist(err) {
   187  		inHostNamespace = true
   188  	}
   189  
   190  	// Register for new subcontainers.
   191  	eventsChannel := make(chan watcher.ContainerEvent, 16)
   192  
   193  	newManager := &manager{
   194  		containers:                            make(map[namespacedContainerName]*containerData),
   195  		quitChannels:                          make([]chan error, 0, 2),
   196  		memoryCache:                           memoryCache,
   197  		fsInfo:                                fsInfo,
   198  		sysFs:                                 sysfs,
   199  		cadvisorContainer:                     selfContainer,
   200  		inHostNamespace:                       inHostNamespace,
   201  		startupTime:                           time.Now(),
   202  		maxHousekeepingInterval:               *HousekeepingConfig.Interval,
   203  		allowDynamicHousekeeping:              *HousekeepingConfig.AllowDynamic,
   204  		includedMetrics:                       includedMetricsSet,
   205  		containerWatchers:                     []watcher.ContainerWatcher{},
   206  		eventsChannel:                         eventsChannel,
   207  		collectorHTTPClient:                   collectorHTTPClient,
   208  		rawContainerCgroupPathPrefixWhiteList: rawContainerCgroupPathPrefixWhiteList,
   209  		containerEnvMetadataWhiteList:         containerEnvMetadataWhiteList,
   210  	}
   211  
   212  	machineInfo, err := machine.Info(sysfs, fsInfo, inHostNamespace)
   213  	if err != nil {
   214  		return nil, err
   215  	}
   216  	newManager.machineInfo = *machineInfo
   217  	klog.V(1).Infof("Machine: %+v", newManager.machineInfo)
   218  
   219  	newManager.perfManager, err = perf.NewManager(perfEventsFile, machineInfo.Topology)
   220  	if err != nil {
   221  		return nil, err
   222  	}
   223  
   224  	newManager.resctrlManager, err = resctrl.NewManager(resctrlInterval, resctrl.Setup, machineInfo.CPUVendorID, inHostNamespace)
   225  	if err != nil {
   226  		klog.V(4).Infof("Cannot gather resctrl metrics: %v", err)
   227  	}
   228  
   229  	versionInfo, err := getVersionInfo()
   230  	if err != nil {
   231  		return nil, err
   232  	}
   233  	klog.V(1).Infof("Version: %+v", *versionInfo)
   234  
   235  	newManager.eventHandler = events.NewEventManager(parseEventsStoragePolicy())
   236  	return newManager, nil
   237  }
   238  
   239  // A namespaced container name.
   240  type namespacedContainerName struct {
   241  	// The namespace of the container. Can be empty for the root namespace.
   242  	Namespace string
   243  
   244  	// The name of the container in this namespace.
   245  	Name string
   246  }
   247  
   248  type manager struct {
   249  	containers               map[namespacedContainerName]*containerData
   250  	containersLock           sync.RWMutex
   251  	memoryCache              *memory.InMemoryCache
   252  	fsInfo                   fs.FsInfo
   253  	sysFs                    sysfs.SysFs
   254  	machineMu                sync.RWMutex // protects machineInfo
   255  	machineInfo              info.MachineInfo
   256  	quitChannels             []chan error
   257  	cadvisorContainer        string
   258  	inHostNamespace          bool
   259  	eventHandler             events.EventManager
   260  	startupTime              time.Time
   261  	maxHousekeepingInterval  time.Duration
   262  	allowDynamicHousekeeping bool
   263  	includedMetrics          container.MetricSet
   264  	containerWatchers        []watcher.ContainerWatcher
   265  	eventsChannel            chan watcher.ContainerEvent
   266  	collectorHTTPClient      *http.Client
   267  	perfManager              stats.Manager
   268  	resctrlManager           resctrl.Manager
   269  	// List of raw container cgroup path prefix whitelist.
   270  	rawContainerCgroupPathPrefixWhiteList []string
   271  	// List of container env prefix whitelist, the matched container envs would be collected into metrics as extra labels.
   272  	containerEnvMetadataWhiteList []string
   273  }
   274  
   275  func (m *manager) PodmanContainer(containerName string, query *info.ContainerInfoRequest) (info.ContainerInfo, error) {
   276  	container, err := m.namespacedContainer(containerName, PodmanNamespace)
   277  	if err != nil {
   278  		return info.ContainerInfo{}, err
   279  	}
   280  
   281  	inf, err := m.containerDataToContainerInfo(container, query)
   282  	if err != nil {
   283  		return info.ContainerInfo{}, err
   284  	}
   285  	return *inf, nil
   286  }
   287  
   288  // Start the container manager.
   289  func (m *manager) Start() error {
   290  	m.containerWatchers = container.InitializePlugins(m, m.fsInfo, m.includedMetrics)
   291  
   292  	err := raw.Register(m, m.fsInfo, m.includedMetrics, m.rawContainerCgroupPathPrefixWhiteList)
   293  	if err != nil {
   294  		klog.Errorf("Registration of the raw container factory failed: %v", err)
   295  	}
   296  
   297  	rawWatcher, err := raw.NewRawContainerWatcher(m.includedMetrics)
   298  	if err != nil {
   299  		return err
   300  	}
   301  	m.containerWatchers = append(m.containerWatchers, rawWatcher)
   302  
   303  	// Watch for OOMs.
   304  	err = m.watchForNewOoms()
   305  	if err != nil {
   306  		klog.Warningf("Could not configure a source for OOM detection, disabling OOM events: %v", err)
   307  	}
   308  
   309  	// If there are no factories, don't start any housekeeping and serve the information we do have.
   310  	if !container.HasFactories() {
   311  		return nil
   312  	}
   313  
   314  	// Create root and then recover all containers.
   315  	err = m.createContainer("/", watcher.Raw)
   316  	if err != nil {
   317  		return err
   318  	}
   319  	klog.V(2).Infof("Starting recovery of all containers")
   320  	err = m.detectSubcontainers("/")
   321  	if err != nil {
   322  		return err
   323  	}
   324  	klog.V(2).Infof("Recovery completed")
   325  
   326  	// Watch for new container.
   327  	quitWatcher := make(chan error)
   328  	err = m.watchForNewContainers(quitWatcher)
   329  	if err != nil {
   330  		return err
   331  	}
   332  	m.quitChannels = append(m.quitChannels, quitWatcher)
   333  
   334  	// Look for new containers in the main housekeeping thread.
   335  	quitGlobalHousekeeping := make(chan error)
   336  	m.quitChannels = append(m.quitChannels, quitGlobalHousekeeping)
   337  	go m.globalHousekeeping(quitGlobalHousekeeping)
   338  
   339  	quitUpdateMachineInfo := make(chan error)
   340  	m.quitChannels = append(m.quitChannels, quitUpdateMachineInfo)
   341  	go m.updateMachineInfo(quitUpdateMachineInfo)
   342  
   343  	return nil
   344  }
   345  
   346  func (m *manager) Stop() error {
   347  	defer m.destroyCollectors()
   348  	// Stop and wait on all quit channels.
   349  	for i, c := range m.quitChannels {
   350  		// Send the exit signal and wait on the thread to exit (by closing the channel).
   351  		c <- nil
   352  		err := <-c
   353  		if err != nil {
   354  			// Remove the channels that quit successfully.
   355  			m.quitChannels = m.quitChannels[i:]
   356  			return err
   357  		}
   358  	}
   359  	m.quitChannels = make([]chan error, 0, 2)
   360  	nvm.Finalize()
   361  	perf.Finalize()
   362  	return nil
   363  }
   364  
   365  func (m *manager) destroyCollectors() {
   366  	for _, container := range m.containers {
   367  		container.perfCollector.Destroy()
   368  		container.resctrlCollector.Destroy()
   369  	}
   370  }
   371  
   372  func (m *manager) updateMachineInfo(quit chan error) {
   373  	ticker := time.NewTicker(*updateMachineInfoInterval)
   374  	for {
   375  		select {
   376  		case <-ticker.C:
   377  			info, err := machine.Info(m.sysFs, m.fsInfo, m.inHostNamespace)
   378  			if err != nil {
   379  				klog.Errorf("Could not get machine info: %v", err)
   380  				break
   381  			}
   382  			m.machineMu.Lock()
   383  			m.machineInfo = *info
   384  			m.machineMu.Unlock()
   385  			klog.V(5).Infof("Update machine info: %+v", *info)
   386  		case <-quit:
   387  			ticker.Stop()
   388  			quit <- nil
   389  			return
   390  		}
   391  	}
   392  }
   393  
   394  func (m *manager) globalHousekeeping(quit chan error) {
   395  	// Long housekeeping is either 100ms or half of the housekeeping interval.
   396  	longHousekeeping := 100 * time.Millisecond
   397  	if *globalHousekeepingInterval/2 < longHousekeeping {
   398  		longHousekeeping = *globalHousekeepingInterval / 2
   399  	}
   400  
   401  	ticker := time.NewTicker(*globalHousekeepingInterval)
   402  	for {
   403  		select {
   404  		case t := <-ticker.C:
   405  			start := time.Now()
   406  
   407  			// Check for new containers.
   408  			err := m.detectSubcontainers("/")
   409  			if err != nil {
   410  				klog.Errorf("Failed to detect containers: %s", err)
   411  			}
   412  
   413  			// Log if housekeeping took too long.
   414  			duration := time.Since(start)
   415  			if duration >= longHousekeeping {
   416  				klog.V(3).Infof("Global Housekeeping(%d) took %s", t.Unix(), duration)
   417  			}
   418  		case <-quit:
   419  			// Quit if asked to do so.
   420  			quit <- nil
   421  			klog.Infof("Exiting global housekeeping thread")
   422  			return
   423  		}
   424  	}
   425  }
   426  
   427  func (m *manager) getContainerData(containerName string) (*containerData, error) {
   428  	var cont *containerData
   429  	var ok bool
   430  	func() {
   431  		m.containersLock.RLock()
   432  		defer m.containersLock.RUnlock()
   433  
   434  		// Ensure we have the container.
   435  		cont, ok = m.containers[namespacedContainerName{
   436  			Name: containerName,
   437  		}]
   438  	}()
   439  	if !ok {
   440  		return nil, fmt.Errorf("unknown container %q", containerName)
   441  	}
   442  	return cont, nil
   443  }
   444  
   445  func (m *manager) GetDerivedStats(containerName string, options v2.RequestOptions) (map[string]v2.DerivedStats, error) {
   446  	conts, err := m.getRequestedContainers(containerName, options)
   447  	if err != nil {
   448  		return nil, err
   449  	}
   450  	var errs partialFailure
   451  	stats := make(map[string]v2.DerivedStats)
   452  	for name, cont := range conts {
   453  		d, err := cont.DerivedStats()
   454  		if err != nil {
   455  			errs.append(name, "DerivedStats", err)
   456  		}
   457  		stats[name] = d
   458  	}
   459  	return stats, errs.OrNil()
   460  }
   461  
   462  func (m *manager) GetContainerSpec(containerName string, options v2.RequestOptions) (map[string]v2.ContainerSpec, error) {
   463  	conts, err := m.getRequestedContainers(containerName, options)
   464  	if err != nil {
   465  		return nil, err
   466  	}
   467  	var errs partialFailure
   468  	specs := make(map[string]v2.ContainerSpec)
   469  	for name, cont := range conts {
   470  		cinfo, err := cont.GetInfo(false)
   471  		if err != nil {
   472  			errs.append(name, "GetInfo", err)
   473  		}
   474  		spec := m.getV2Spec(cinfo)
   475  		specs[name] = spec
   476  	}
   477  	return specs, errs.OrNil()
   478  }
   479  
   480  // Get V2 container spec from v1 container info.
   481  func (m *manager) getV2Spec(cinfo *containerInfo) v2.ContainerSpec {
   482  	spec := m.getAdjustedSpec(cinfo)
   483  	return v2.ContainerSpecFromV1(&spec, cinfo.Aliases, cinfo.Namespace)
   484  }
   485  
   486  func (m *manager) getAdjustedSpec(cinfo *containerInfo) info.ContainerSpec {
   487  	spec := cinfo.Spec
   488  
   489  	// Set default value to an actual value
   490  	if spec.HasMemory {
   491  		// Memory.Limit is 0 means there's no limit
   492  		if spec.Memory.Limit == 0 {
   493  			m.machineMu.RLock()
   494  			spec.Memory.Limit = uint64(m.machineInfo.MemoryCapacity)
   495  			m.machineMu.RUnlock()
   496  		}
   497  	}
   498  	return spec
   499  }
   500  
   501  func (m *manager) GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) {
   502  	cont, err := m.getContainerData(containerName)
   503  	if err != nil {
   504  		return nil, err
   505  	}
   506  	return m.containerDataToContainerInfo(cont, query)
   507  }
   508  
   509  func (m *manager) GetContainerInfoV2(containerName string, options v2.RequestOptions) (map[string]v2.ContainerInfo, error) {
   510  	containers, err := m.getRequestedContainers(containerName, options)
   511  	if err != nil {
   512  		return nil, err
   513  	}
   514  
   515  	var errs partialFailure
   516  	var nilTime time.Time // Ignored.
   517  
   518  	infos := make(map[string]v2.ContainerInfo, len(containers))
   519  	for name, container := range containers {
   520  		result := v2.ContainerInfo{}
   521  		cinfo, err := container.GetInfo(false)
   522  		if err != nil {
   523  			errs.append(name, "GetInfo", err)
   524  			infos[name] = result
   525  			continue
   526  		}
   527  		result.Spec = m.getV2Spec(cinfo)
   528  
   529  		stats, err := m.memoryCache.RecentStats(name, nilTime, nilTime, options.Count)
   530  		if err != nil {
   531  			errs.append(name, "RecentStats", err)
   532  			infos[name] = result
   533  			continue
   534  		}
   535  
   536  		result.Stats = v2.ContainerStatsFromV1(containerName, &cinfo.Spec, stats)
   537  		infos[name] = result
   538  	}
   539  
   540  	return infos, errs.OrNil()
   541  }
   542  
   543  func (m *manager) containerDataToContainerInfo(cont *containerData, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) {
   544  	// Get the info from the container.
   545  	cinfo, err := cont.GetInfo(true)
   546  	if err != nil {
   547  		return nil, err
   548  	}
   549  
   550  	stats, err := m.memoryCache.RecentStats(cinfo.Name, query.Start, query.End, query.NumStats)
   551  	if err != nil {
   552  		return nil, err
   553  	}
   554  
   555  	// Make a copy of the info for the user.
   556  	ret := &info.ContainerInfo{
   557  		ContainerReference: cinfo.ContainerReference,
   558  		Subcontainers:      cinfo.Subcontainers,
   559  		Spec:               m.getAdjustedSpec(cinfo),
   560  		Stats:              stats,
   561  	}
   562  	return ret, nil
   563  }
   564  
   565  func (m *manager) getContainer(containerName string) (*containerData, error) {
   566  	m.containersLock.RLock()
   567  	defer m.containersLock.RUnlock()
   568  	cont, ok := m.containers[namespacedContainerName{Name: containerName}]
   569  	if !ok {
   570  		return nil, fmt.Errorf("unknown container %q", containerName)
   571  	}
   572  	return cont, nil
   573  }
   574  
   575  func (m *manager) getSubcontainers(containerName string) map[string]*containerData {
   576  	m.containersLock.RLock()
   577  	defer m.containersLock.RUnlock()
   578  	containersMap := make(map[string]*containerData, len(m.containers))
   579  
   580  	// Get all the unique subcontainers of the specified container
   581  	matchedName := path.Join(containerName, "/")
   582  	for i := range m.containers {
   583  		if m.containers[i] == nil {
   584  			continue
   585  		}
   586  		name := m.containers[i].info.Name
   587  		if name == containerName || strings.HasPrefix(name, matchedName) {
   588  			containersMap[m.containers[i].info.Name] = m.containers[i]
   589  		}
   590  	}
   591  	return containersMap
   592  }
   593  
   594  func (m *manager) SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
   595  	containersMap := m.getSubcontainers(containerName)
   596  
   597  	containers := make([]*containerData, 0, len(containersMap))
   598  	for _, cont := range containersMap {
   599  		containers = append(containers, cont)
   600  	}
   601  	return m.containerDataSliceToContainerInfoSlice(containers, query)
   602  }
   603  
   604  func (m *manager) getAllNamespacedContainers(ns string) map[string]*containerData {
   605  	m.containersLock.RLock()
   606  	defer m.containersLock.RUnlock()
   607  	containers := make(map[string]*containerData, len(m.containers))
   608  
   609  	// Get containers in a namespace.
   610  	for name, cont := range m.containers {
   611  		if name.Namespace == ns {
   612  			containers[cont.info.Name] = cont
   613  		}
   614  	}
   615  	return containers
   616  }
   617  
   618  func (m *manager) AllDockerContainers(query *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error) {
   619  	containers := m.getAllNamespacedContainers(DockerNamespace)
   620  	return m.containersInfo(containers, query)
   621  }
   622  
   623  func (m *manager) namespacedContainer(containerName string, ns string) (*containerData, error) {
   624  	m.containersLock.RLock()
   625  	defer m.containersLock.RUnlock()
   626  
   627  	// Check for the container in the namespace.
   628  	cont, ok := m.containers[namespacedContainerName{
   629  		Namespace: ns,
   630  		Name:      containerName,
   631  	}]
   632  
   633  	// Look for container by short prefix name if no exact match found.
   634  	if !ok {
   635  		for contName, c := range m.containers {
   636  			if contName.Namespace == ns && strings.HasPrefix(contName.Name, containerName) {
   637  				if cont == nil {
   638  					cont = c
   639  				} else {
   640  					return nil, fmt.Errorf("unable to find container in %q namespace. Container %q is not unique", ns, containerName)
   641  				}
   642  			}
   643  		}
   644  
   645  		if cont == nil {
   646  			return nil, fmt.Errorf("unable to find container %q in %q namespace", containerName, ns)
   647  		}
   648  	}
   649  
   650  	return cont, nil
   651  }
   652  
   653  func (m *manager) DockerContainer(containerName string, query *info.ContainerInfoRequest) (info.ContainerInfo, error) {
   654  	container, err := m.namespacedContainer(containerName, DockerNamespace)
   655  	if err != nil {
   656  		return info.ContainerInfo{}, err
   657  	}
   658  
   659  	inf, err := m.containerDataToContainerInfo(container, query)
   660  	if err != nil {
   661  		return info.ContainerInfo{}, err
   662  	}
   663  	return *inf, nil
   664  }
   665  
   666  func (m *manager) containerDataSliceToContainerInfoSlice(containers []*containerData, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
   667  	if len(containers) == 0 {
   668  		return nil, fmt.Errorf("no containers found")
   669  	}
   670  
   671  	// Get the info for each container.
   672  	output := make([]*info.ContainerInfo, 0, len(containers))
   673  	for i := range containers {
   674  		cinfo, err := m.containerDataToContainerInfo(containers[i], query)
   675  		if err != nil {
   676  			// Skip containers with errors, we try to degrade gracefully.
   677  			klog.V(4).Infof("convert container data to container info failed with error %s", err.Error())
   678  			continue
   679  		}
   680  		output = append(output, cinfo)
   681  	}
   682  
   683  	return output, nil
   684  }
   685  
   686  func (m *manager) GetRequestedContainersInfo(containerName string, options v2.RequestOptions) (map[string]*info.ContainerInfo, error) {
   687  	containers, err := m.getRequestedContainers(containerName, options)
   688  	if err != nil {
   689  		return nil, err
   690  	}
   691  	var errs partialFailure
   692  	containersMap := make(map[string]*info.ContainerInfo)
   693  	query := info.ContainerInfoRequest{
   694  		NumStats: options.Count,
   695  	}
   696  	for name, data := range containers {
   697  		info, err := m.containerDataToContainerInfo(data, &query)
   698  		if err != nil {
   699  			if err == memory.ErrDataNotFound {
   700  				klog.V(4).Infof("Error getting data for container %s because of race condition", name)
   701  				continue
   702  			}
   703  			errs.append(name, "containerDataToContainerInfo", err)
   704  		}
   705  		containersMap[name] = info
   706  	}
   707  	return containersMap, errs.OrNil()
   708  }
   709  
   710  func (m *manager) getRequestedContainers(containerName string, options v2.RequestOptions) (map[string]*containerData, error) {
   711  	containersMap := make(map[string]*containerData)
   712  	switch options.IdType {
   713  	case v2.TypeName:
   714  		if !options.Recursive {
   715  			cont, err := m.getContainer(containerName)
   716  			if err != nil {
   717  				return containersMap, err
   718  			}
   719  			containersMap[cont.info.Name] = cont
   720  		} else {
   721  			containersMap = m.getSubcontainers(containerName)
   722  			if len(containersMap) == 0 {
   723  				return containersMap, fmt.Errorf("unknown container: %q", containerName)
   724  			}
   725  		}
   726  	case v2.TypeDocker, v2.TypePodman:
   727  		namespace := map[string]string{
   728  			v2.TypeDocker: DockerNamespace,
   729  			v2.TypePodman: PodmanNamespace,
   730  		}[options.IdType]
   731  		if !options.Recursive {
   732  			containerName = strings.TrimPrefix(containerName, "/")
   733  			cont, err := m.namespacedContainer(containerName, namespace)
   734  			if err != nil {
   735  				return containersMap, err
   736  			}
   737  			containersMap[cont.info.Name] = cont
   738  		} else {
   739  			if containerName != "/" {
   740  				return containersMap, fmt.Errorf("invalid request for %s container %q with subcontainers", options.IdType, containerName)
   741  			}
   742  			containersMap = m.getAllNamespacedContainers(namespace)
   743  		}
   744  	default:
   745  		return containersMap, fmt.Errorf("invalid request type %q", options.IdType)
   746  	}
   747  	if options.MaxAge != nil {
   748  		// update stats for all containers in containersMap
   749  		var waitGroup sync.WaitGroup
   750  		waitGroup.Add(len(containersMap))
   751  		for _, container := range containersMap {
   752  			go func(cont *containerData) {
   753  				cont.OnDemandHousekeeping(*options.MaxAge)
   754  				waitGroup.Done()
   755  			}(container)
   756  		}
   757  		waitGroup.Wait()
   758  	}
   759  	return containersMap, nil
   760  }
   761  
   762  func (m *manager) GetDirFsInfo(dir string) (v2.FsInfo, error) {
   763  	device, err := m.fsInfo.GetDirFsDevice(dir)
   764  	if err != nil {
   765  		return v2.FsInfo{}, fmt.Errorf("failed to get device for dir %q: %v", dir, err)
   766  	}
   767  	return m.getFsInfoByDeviceName(device.Device)
   768  }
   769  
   770  func (m *manager) GetFsInfoByFsUUID(uuid string) (v2.FsInfo, error) {
   771  	device, err := m.fsInfo.GetDeviceInfoByFsUUID(uuid)
   772  	if err != nil {
   773  		return v2.FsInfo{}, err
   774  	}
   775  	return m.getFsInfoByDeviceName(device.Device)
   776  }
   777  
   778  func (m *manager) GetFsInfo(label string) ([]v2.FsInfo, error) {
   779  	var empty time.Time
   780  	// Get latest data from filesystems hanging off root container.
   781  	stats, err := m.memoryCache.RecentStats("/", empty, empty, 1)
   782  	if err != nil {
   783  		return nil, err
   784  	}
   785  	dev := ""
   786  	if len(label) != 0 {
   787  		dev, err = m.fsInfo.GetDeviceForLabel(label)
   788  		if err != nil {
   789  			return nil, err
   790  		}
   791  	}
   792  	fsInfo := []v2.FsInfo{}
   793  	for i := range stats[0].Filesystem {
   794  		fs := stats[0].Filesystem[i]
   795  		if len(label) != 0 && fs.Device != dev {
   796  			continue
   797  		}
   798  		mountpoint, err := m.fsInfo.GetMountpointForDevice(fs.Device)
   799  		if err != nil {
   800  			return nil, err
   801  		}
   802  		labels, err := m.fsInfo.GetLabelsForDevice(fs.Device)
   803  		if err != nil {
   804  			return nil, err
   805  		}
   806  
   807  		fi := v2.FsInfo{
   808  			Timestamp:  stats[0].Timestamp,
   809  			Device:     fs.Device,
   810  			Mountpoint: mountpoint,
   811  			Capacity:   fs.Limit,
   812  			Usage:      fs.Usage,
   813  			Available:  fs.Available,
   814  			Labels:     labels,
   815  		}
   816  		if fs.HasInodes {
   817  			fi.Inodes = &fs.Inodes
   818  			fi.InodesFree = &fs.InodesFree
   819  		}
   820  		fsInfo = append(fsInfo, fi)
   821  	}
   822  	return fsInfo, nil
   823  }
   824  
   825  func (m *manager) GetMachineInfo() (*info.MachineInfo, error) {
   826  	m.machineMu.RLock()
   827  	defer m.machineMu.RUnlock()
   828  	return m.machineInfo.Clone(), nil
   829  }
   830  
   831  func (m *manager) GetVersionInfo() (*info.VersionInfo, error) {
   832  	// TODO: Consider caching this and periodically updating.  The VersionInfo may change if
   833  	// the docker daemon is started after the cAdvisor client is created.  Caching the value
   834  	// would be helpful so we would be able to return the last known docker version if
   835  	// docker was down at the time of a query.
   836  	return getVersionInfo()
   837  }
   838  
   839  func (m *manager) Exists(containerName string) bool {
   840  	m.containersLock.RLock()
   841  	defer m.containersLock.RUnlock()
   842  
   843  	namespacedName := namespacedContainerName{
   844  		Name: containerName,
   845  	}
   846  
   847  	_, ok := m.containers[namespacedName]
   848  	return ok
   849  }
   850  
   851  func (m *manager) GetProcessList(containerName string, options v2.RequestOptions) ([]v2.ProcessInfo, error) {
   852  	// override recursive. Only support single container listing.
   853  	options.Recursive = false
   854  	// override MaxAge.  ProcessList does not require updated stats.
   855  	options.MaxAge = nil
   856  	conts, err := m.getRequestedContainers(containerName, options)
   857  	if err != nil {
   858  		return nil, err
   859  	}
   860  	if len(conts) != 1 {
   861  		return nil, fmt.Errorf("Expected the request to match only one container")
   862  	}
   863  	// TODO(rjnagal): handle count? Only if we can do count by type (eg. top 5 cpu users)
   864  	ps := []v2.ProcessInfo{}
   865  	for _, cont := range conts {
   866  		ps, err = cont.GetProcessList(m.cadvisorContainer, m.inHostNamespace)
   867  		if err != nil {
   868  			return nil, err
   869  		}
   870  	}
   871  	return ps, nil
   872  }
   873  
   874  func (m *manager) registerCollectors(collectorConfigs map[string]string, cont *containerData) error {
   875  	for k, v := range collectorConfigs {
   876  		configFile, err := cont.ReadFile(v, m.inHostNamespace)
   877  		if err != nil {
   878  			return fmt.Errorf("failed to read config file %q for config %q, container %q: %v", k, v, cont.info.Name, err)
   879  		}
   880  		klog.V(4).Infof("Got config from %q: %q", v, configFile)
   881  
   882  		if strings.HasPrefix(k, "prometheus") || strings.HasPrefix(k, "Prometheus") {
   883  			newCollector, err := collector.NewPrometheusCollector(k, configFile, *applicationMetricsCountLimit, cont.handler, m.collectorHTTPClient)
   884  			if err != nil {
   885  				return fmt.Errorf("failed to create collector for container %q, config %q: %v", cont.info.Name, k, err)
   886  			}
   887  			err = cont.collectorManager.RegisterCollector(newCollector)
   888  			if err != nil {
   889  				return fmt.Errorf("failed to register collector for container %q, config %q: %v", cont.info.Name, k, err)
   890  			}
   891  		} else {
   892  			newCollector, err := collector.NewCollector(k, configFile, *applicationMetricsCountLimit, cont.handler, m.collectorHTTPClient)
   893  			if err != nil {
   894  				return fmt.Errorf("failed to create collector for container %q, config %q: %v", cont.info.Name, k, err)
   895  			}
   896  			err = cont.collectorManager.RegisterCollector(newCollector)
   897  			if err != nil {
   898  				return fmt.Errorf("failed to register collector for container %q, config %q: %v", cont.info.Name, k, err)
   899  			}
   900  		}
   901  	}
   902  	return nil
   903  }
   904  
   905  // Create a container.
   906  func (m *manager) createContainer(containerName string, watchSource watcher.ContainerWatchSource) error {
   907  	m.containersLock.Lock()
   908  	defer m.containersLock.Unlock()
   909  
   910  	return m.createContainerLocked(containerName, watchSource)
   911  }
   912  
   913  func (m *manager) createContainerLocked(containerName string, watchSource watcher.ContainerWatchSource) error {
   914  	namespacedName := namespacedContainerName{
   915  		Name: containerName,
   916  	}
   917  
   918  	// Check that the container didn't already exist.
   919  	if _, ok := m.containers[namespacedName]; ok {
   920  		return nil
   921  	}
   922  
   923  	handler, accept, err := container.NewContainerHandler(containerName, watchSource, m.containerEnvMetadataWhiteList, m.inHostNamespace)
   924  	if err != nil {
   925  		return err
   926  	}
   927  	if !accept {
   928  		// ignoring this container.
   929  		klog.V(4).Infof("ignoring container %q", containerName)
   930  		return nil
   931  	}
   932  	collectorManager, err := collector.NewCollectorManager()
   933  	if err != nil {
   934  		return err
   935  	}
   936  
   937  	logUsage := *logCadvisorUsage && containerName == m.cadvisorContainer
   938  	cont, err := newContainerData(containerName, m.memoryCache, handler, logUsage, collectorManager, m.maxHousekeepingInterval, m.allowDynamicHousekeeping, clock.RealClock{})
   939  	if err != nil {
   940  		return err
   941  	}
   942  
   943  	if m.includedMetrics.Has(container.PerfMetrics) {
   944  		perfCgroupPath, err := handler.GetCgroupPath("perf_event")
   945  		if err != nil {
   946  			klog.Warningf("Error getting perf_event cgroup path: %q", err)
   947  		} else {
   948  			cont.perfCollector, err = m.perfManager.GetCollector(perfCgroupPath)
   949  			if err != nil {
   950  				klog.Errorf("Perf event metrics will not be available for container %q: %v", containerName, err)
   951  			}
   952  		}
   953  	}
   954  
   955  	if m.includedMetrics.Has(container.ResctrlMetrics) {
   956  		cont.resctrlCollector, err = m.resctrlManager.GetCollector(containerName, func() ([]string, error) {
   957  			return cont.getContainerPids(m.inHostNamespace)
   958  		}, len(m.machineInfo.Topology))
   959  		if err != nil {
   960  			klog.V(4).Infof("resctrl metrics will not be available for container %s: %s", cont.info.Name, err)
   961  		}
   962  	}
   963  
   964  	// Add collectors
   965  	labels := handler.GetContainerLabels()
   966  	collectorConfigs := collector.GetCollectorConfigs(labels)
   967  	err = m.registerCollectors(collectorConfigs, cont)
   968  	if err != nil {
   969  		klog.Warningf("Failed to register collectors for %q: %v", containerName, err)
   970  	}
   971  
   972  	// Add the container name and all its aliases. The aliases must be within the namespace of the factory.
   973  	m.containers[namespacedName] = cont
   974  	for _, alias := range cont.info.Aliases {
   975  		m.containers[namespacedContainerName{
   976  			Namespace: cont.info.Namespace,
   977  			Name:      alias,
   978  		}] = cont
   979  	}
   980  
   981  	klog.V(3).Infof("Added container: %q (aliases: %v, namespace: %q)", containerName, cont.info.Aliases, cont.info.Namespace)
   982  
   983  	contSpec, err := cont.handler.GetSpec()
   984  	if err != nil {
   985  		return err
   986  	}
   987  
   988  	contRef, err := cont.handler.ContainerReference()
   989  	if err != nil {
   990  		return err
   991  	}
   992  
   993  	newEvent := &info.Event{
   994  		ContainerName: contRef.Name,
   995  		Timestamp:     contSpec.CreationTime,
   996  		EventType:     info.EventContainerCreation,
   997  	}
   998  	err = m.eventHandler.AddEvent(newEvent)
   999  	if err != nil {
  1000  		return err
  1001  	}
  1002  	// Start the container's housekeeping.
  1003  	return cont.Start()
  1004  }
  1005  
  1006  func (m *manager) destroyContainer(containerName string) error {
  1007  	m.containersLock.Lock()
  1008  	defer m.containersLock.Unlock()
  1009  
  1010  	return m.destroyContainerLocked(containerName)
  1011  }
  1012  
  1013  func (m *manager) destroyContainerLocked(containerName string) error {
  1014  	namespacedName := namespacedContainerName{
  1015  		Name: containerName,
  1016  	}
  1017  	cont, ok := m.containers[namespacedName]
  1018  	if !ok {
  1019  		// Already destroyed, done.
  1020  		return nil
  1021  	}
  1022  
  1023  	// Tell the container to stop.
  1024  	err := cont.Stop()
  1025  	if err != nil {
  1026  		return err
  1027  	}
  1028  
  1029  	// Remove the container from our records (and all its aliases).
  1030  	delete(m.containers, namespacedName)
  1031  	for _, alias := range cont.info.Aliases {
  1032  		delete(m.containers, namespacedContainerName{
  1033  			Namespace: cont.info.Namespace,
  1034  			Name:      alias,
  1035  		})
  1036  	}
  1037  	klog.V(3).Infof("Destroyed container: %q (aliases: %v, namespace: %q)", containerName, cont.info.Aliases, cont.info.Namespace)
  1038  
  1039  	contRef, err := cont.handler.ContainerReference()
  1040  	if err != nil {
  1041  		return err
  1042  	}
  1043  
  1044  	newEvent := &info.Event{
  1045  		ContainerName: contRef.Name,
  1046  		Timestamp:     time.Now(),
  1047  		EventType:     info.EventContainerDeletion,
  1048  	}
  1049  	err = m.eventHandler.AddEvent(newEvent)
  1050  	if err != nil {
  1051  		return err
  1052  	}
  1053  	return nil
  1054  }
  1055  
  1056  // Detect all containers that have been added or deleted from the specified container.
  1057  func (m *manager) getContainersDiff(containerName string) (added []info.ContainerReference, removed []info.ContainerReference, err error) {
  1058  	// Get all subcontainers recursively.
  1059  	m.containersLock.RLock()
  1060  	cont, ok := m.containers[namespacedContainerName{
  1061  		Name: containerName,
  1062  	}]
  1063  	m.containersLock.RUnlock()
  1064  	if !ok {
  1065  		return nil, nil, fmt.Errorf("failed to find container %q while checking for new containers", containerName)
  1066  	}
  1067  	allContainers, err := cont.handler.ListContainers(container.ListRecursive)
  1068  
  1069  	if err != nil {
  1070  		return nil, nil, err
  1071  	}
  1072  	allContainers = append(allContainers, info.ContainerReference{Name: containerName})
  1073  
  1074  	m.containersLock.RLock()
  1075  	defer m.containersLock.RUnlock()
  1076  
  1077  	// Determine which were added and which were removed.
  1078  	allContainersSet := make(map[string]*containerData)
  1079  	for name, d := range m.containers {
  1080  		// Only add the canonical name.
  1081  		if d.info.Name == name.Name {
  1082  			allContainersSet[name.Name] = d
  1083  		}
  1084  	}
  1085  
  1086  	// Added containers
  1087  	for _, c := range allContainers {
  1088  		delete(allContainersSet, c.Name)
  1089  		_, ok := m.containers[namespacedContainerName{
  1090  			Name: c.Name,
  1091  		}]
  1092  		if !ok {
  1093  			added = append(added, c)
  1094  		}
  1095  	}
  1096  
  1097  	// Removed ones are no longer in the container listing.
  1098  	for _, d := range allContainersSet {
  1099  		removed = append(removed, d.info.ContainerReference)
  1100  	}
  1101  
  1102  	return
  1103  }
  1104  
  1105  // Detect the existing subcontainers and reflect the setup here.
  1106  func (m *manager) detectSubcontainers(containerName string) error {
  1107  	added, removed, err := m.getContainersDiff(containerName)
  1108  	if err != nil {
  1109  		return err
  1110  	}
  1111  
  1112  	// Add the new containers.
  1113  	for _, cont := range added {
  1114  		err = m.createContainer(cont.Name, watcher.Raw)
  1115  		if err != nil {
  1116  			klog.Errorf("Failed to create existing container: %s: %s", cont.Name, err)
  1117  		}
  1118  	}
  1119  
  1120  	// Remove the old containers.
  1121  	for _, cont := range removed {
  1122  		err = m.destroyContainer(cont.Name)
  1123  		if err != nil {
  1124  			klog.Errorf("Failed to destroy existing container: %s: %s", cont.Name, err)
  1125  		}
  1126  	}
  1127  
  1128  	return nil
  1129  }
  1130  
  1131  // Watches for new containers started in the system. Runs forever unless there is a setup error.
  1132  func (m *manager) watchForNewContainers(quit chan error) error {
  1133  	watched := make([]watcher.ContainerWatcher, 0)
  1134  	for _, watcher := range m.containerWatchers {
  1135  		err := watcher.Start(m.eventsChannel)
  1136  		if err != nil {
  1137  			for _, w := range watched {
  1138  				stopErr := w.Stop()
  1139  				if stopErr != nil {
  1140  					klog.Warningf("Failed to stop wacher %v with error: %v", w, stopErr)
  1141  				}
  1142  			}
  1143  			return err
  1144  		}
  1145  		watched = append(watched, watcher)
  1146  	}
  1147  
  1148  	// There is a race between starting the watch and new container creation so we do a detection before we read new containers.
  1149  	err := m.detectSubcontainers("/")
  1150  	if err != nil {
  1151  		return err
  1152  	}
  1153  
  1154  	// Listen to events from the container handler.
  1155  	go func() {
  1156  		for {
  1157  			select {
  1158  			case event := <-m.eventsChannel:
  1159  				switch {
  1160  				case event.EventType == watcher.ContainerAdd:
  1161  					switch event.WatchSource {
  1162  					default:
  1163  						err = m.createContainer(event.Name, event.WatchSource)
  1164  					}
  1165  				case event.EventType == watcher.ContainerDelete:
  1166  					err = m.destroyContainer(event.Name)
  1167  				}
  1168  				if err != nil {
  1169  					klog.Warningf("Failed to process watch event %+v: %v", event, err)
  1170  				}
  1171  			case <-quit:
  1172  				var errs partialFailure
  1173  
  1174  				// Stop processing events if asked to quit.
  1175  				for i, watcher := range m.containerWatchers {
  1176  					err := watcher.Stop()
  1177  					if err != nil {
  1178  						errs.append(fmt.Sprintf("watcher %d", i), "Stop", err)
  1179  					}
  1180  				}
  1181  
  1182  				if len(errs) > 0 {
  1183  					quit <- errs
  1184  				} else {
  1185  					quit <- nil
  1186  					klog.Infof("Exiting thread watching subcontainers")
  1187  					return
  1188  				}
  1189  			}
  1190  		}
  1191  	}()
  1192  	return nil
  1193  }
  1194  
  1195  func (m *manager) watchForNewOoms() error {
  1196  	klog.V(2).Infof("Started watching for new ooms in manager")
  1197  	outStream := make(chan *oomparser.OomInstance, 10)
  1198  	oomLog, err := oomparser.New()
  1199  	if err != nil {
  1200  		return err
  1201  	}
  1202  	go oomLog.StreamOoms(outStream)
  1203  
  1204  	go func() {
  1205  		for oomInstance := range outStream {
  1206  			// Surface OOM and OOM kill events.
  1207  			newEvent := &info.Event{
  1208  				ContainerName: oomInstance.ContainerName,
  1209  				Timestamp:     oomInstance.TimeOfDeath,
  1210  				EventType:     info.EventOom,
  1211  			}
  1212  			err := m.eventHandler.AddEvent(newEvent)
  1213  			if err != nil {
  1214  				klog.Errorf("failed to add OOM event for %q: %v", oomInstance.ContainerName, err)
  1215  			}
  1216  			klog.V(3).Infof("Created an OOM event in container %q at %v", oomInstance.ContainerName, oomInstance.TimeOfDeath)
  1217  
  1218  			newEvent = &info.Event{
  1219  				ContainerName: oomInstance.VictimContainerName,
  1220  				Timestamp:     oomInstance.TimeOfDeath,
  1221  				EventType:     info.EventOomKill,
  1222  				EventData: info.EventData{
  1223  					OomKill: &info.OomKillEventData{
  1224  						Pid:         oomInstance.Pid,
  1225  						ProcessName: oomInstance.ProcessName,
  1226  					},
  1227  				},
  1228  			}
  1229  			err = m.eventHandler.AddEvent(newEvent)
  1230  			if err != nil {
  1231  				klog.Errorf("failed to add OOM kill event for %q: %v", oomInstance.ContainerName, err)
  1232  			}
  1233  
  1234  			// Count OOM events for later collection by prometheus
  1235  			request := v2.RequestOptions{
  1236  				IdType: v2.TypeName,
  1237  				Count:  1,
  1238  			}
  1239  			conts, err := m.getRequestedContainers(oomInstance.ContainerName, request)
  1240  			if err != nil {
  1241  				klog.V(2).Infof("failed getting container info for %q: %v", oomInstance.ContainerName, err)
  1242  				continue
  1243  			}
  1244  			if len(conts) != 1 {
  1245  				klog.V(2).Info("Expected the request to match only one container")
  1246  				continue
  1247  			}
  1248  			for _, cont := range conts {
  1249  				atomic.AddUint64(&cont.oomEvents, 1)
  1250  			}
  1251  		}
  1252  	}()
  1253  	return nil
  1254  }
  1255  
  1256  // can be called by the api which will take events returned on the channel
  1257  func (m *manager) WatchForEvents(request *events.Request) (*events.EventChannel, error) {
  1258  	return m.eventHandler.WatchEvents(request)
  1259  }
  1260  
  1261  // can be called by the api which will return all events satisfying the request
  1262  func (m *manager) GetPastEvents(request *events.Request) ([]*info.Event, error) {
  1263  	return m.eventHandler.GetEvents(request)
  1264  }
  1265  
  1266  // called by the api when a client is no longer listening to the channel
  1267  func (m *manager) CloseEventChannel(watchID int) {
  1268  	m.eventHandler.StopWatch(watchID)
  1269  }
  1270  
  1271  // Parses the events StoragePolicy from the flags.
  1272  func parseEventsStoragePolicy() events.StoragePolicy {
  1273  	policy := events.DefaultStoragePolicy()
  1274  
  1275  	// Parse max age.
  1276  	parts := strings.Split(*eventStorageAgeLimit, ",")
  1277  	for _, part := range parts {
  1278  		items := strings.Split(part, "=")
  1279  		if len(items) != 2 {
  1280  			klog.Warningf("Unknown event storage policy %q when parsing max age", part)
  1281  			continue
  1282  		}
  1283  		dur, err := time.ParseDuration(items[1])
  1284  		if err != nil {
  1285  			klog.Warningf("Unable to parse event max age duration %q: %v", items[1], err)
  1286  			continue
  1287  		}
  1288  		if items[0] == "default" {
  1289  			policy.DefaultMaxAge = dur
  1290  			continue
  1291  		}
  1292  		policy.PerTypeMaxAge[info.EventType(items[0])] = dur
  1293  	}
  1294  
  1295  	// Parse max number.
  1296  	parts = strings.Split(*eventStorageEventLimit, ",")
  1297  	for _, part := range parts {
  1298  		items := strings.Split(part, "=")
  1299  		if len(items) != 2 {
  1300  			klog.Warningf("Unknown event storage policy %q when parsing max event limit", part)
  1301  			continue
  1302  		}
  1303  		val, err := strconv.Atoi(items[1])
  1304  		if err != nil {
  1305  			klog.Warningf("Unable to parse integer from %q: %v", items[1], err)
  1306  			continue
  1307  		}
  1308  		if items[0] == "default" {
  1309  			policy.DefaultMaxNumEvents = val
  1310  			continue
  1311  		}
  1312  		policy.PerTypeMaxNumEvents[info.EventType(items[0])] = val
  1313  	}
  1314  
  1315  	return policy
  1316  }
  1317  
  1318  func (m *manager) DebugInfo() map[string][]string {
  1319  	debugInfo := container.DebugInfo()
  1320  
  1321  	// Get unique containers.
  1322  	var conts map[*containerData]struct{}
  1323  	func() {
  1324  		m.containersLock.RLock()
  1325  		defer m.containersLock.RUnlock()
  1326  
  1327  		conts = make(map[*containerData]struct{}, len(m.containers))
  1328  		for _, c := range m.containers {
  1329  			conts[c] = struct{}{}
  1330  		}
  1331  	}()
  1332  
  1333  	// List containers.
  1334  	lines := make([]string, 0, len(conts))
  1335  	for cont := range conts {
  1336  		lines = append(lines, cont.info.Name)
  1337  		if cont.info.Namespace != "" {
  1338  			lines = append(lines, fmt.Sprintf("\tNamespace: %s", cont.info.Namespace))
  1339  		}
  1340  
  1341  		if len(cont.info.Aliases) != 0 {
  1342  			lines = append(lines, "\tAliases:")
  1343  			for _, alias := range cont.info.Aliases {
  1344  				lines = append(lines, fmt.Sprintf("\t\t%s", alias))
  1345  			}
  1346  		}
  1347  	}
  1348  
  1349  	debugInfo["Managed containers"] = lines
  1350  	return debugInfo
  1351  }
  1352  
  1353  func (m *manager) getFsInfoByDeviceName(deviceName string) (v2.FsInfo, error) {
  1354  	mountPoint, err := m.fsInfo.GetMountpointForDevice(deviceName)
  1355  	if err != nil {
  1356  		return v2.FsInfo{}, fmt.Errorf("failed to get mount point for device %q: %v", deviceName, err)
  1357  	}
  1358  	infos, err := m.GetFsInfo("")
  1359  	if err != nil {
  1360  		return v2.FsInfo{}, err
  1361  	}
  1362  	for _, info := range infos {
  1363  		if info.Mountpoint == mountPoint {
  1364  			return info, nil
  1365  		}
  1366  	}
  1367  	return v2.FsInfo{}, fmt.Errorf("cannot find filesystem info for device %q", deviceName)
  1368  }
  1369  
  1370  func (m *manager) containersInfo(containers map[string]*containerData, query *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error) {
  1371  	output := make(map[string]info.ContainerInfo, len(containers))
  1372  	for name, cont := range containers {
  1373  		inf, err := m.containerDataToContainerInfo(cont, query)
  1374  		if err != nil {
  1375  			// Ignore the error because of race condition and return best-effort result.
  1376  			if err == memory.ErrDataNotFound {
  1377  				klog.V(4).Infof("Error getting data for container %s because of race condition", name)
  1378  				continue
  1379  			}
  1380  			return nil, err
  1381  		}
  1382  		output[name] = *inf
  1383  	}
  1384  	return output, nil
  1385  }
  1386  
  1387  func (m *manager) AllPodmanContainers(query *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error) {
  1388  	containers := m.getAllNamespacedContainers(PodmanNamespace)
  1389  	return m.containersInfo(containers, query)
  1390  }
  1391  
  1392  func getVersionInfo() (*info.VersionInfo, error) {
  1393  
  1394  	kernelVersion := machine.KernelVersion()
  1395  	osVersion := machine.ContainerOsVersion()
  1396  
  1397  	return &info.VersionInfo{
  1398  		KernelVersion:      kernelVersion,
  1399  		ContainerOsVersion: osVersion,
  1400  		CadvisorVersion:    version.Info["version"],
  1401  		CadvisorRevision:   version.Info["revision"],
  1402  	}, nil
  1403  }
  1404  
  1405  // Helper for accumulating partial failures.
  1406  type partialFailure []string
  1407  
  1408  func (f *partialFailure) append(id, operation string, err error) {
  1409  	*f = append(*f, fmt.Sprintf("[%q: %s: %s]", id, operation, err))
  1410  }
  1411  
  1412  func (f partialFailure) Error() string {
  1413  	return fmt.Sprintf("partial failures: %s", strings.Join(f, ", "))
  1414  }
  1415  
  1416  func (f partialFailure) OrNil() error {
  1417  	if len(f) == 0 {
  1418  		return nil
  1419  	}
  1420  	return f
  1421  }