github.com/google/cadvisor@v0.49.1/manager/manager.go (about) 1 // Copyright 2014 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Manager of cAdvisor-monitored containers. 16 package manager 17 18 import ( 19 "flag" 20 "fmt" 21 "net/http" 22 "os" 23 "path" 24 "strconv" 25 "strings" 26 "sync" 27 "sync/atomic" 28 "time" 29 30 "github.com/google/cadvisor/cache/memory" 31 "github.com/google/cadvisor/collector" 32 "github.com/google/cadvisor/container" 33 "github.com/google/cadvisor/container/raw" 34 "github.com/google/cadvisor/events" 35 "github.com/google/cadvisor/fs" 36 info "github.com/google/cadvisor/info/v1" 37 v2 "github.com/google/cadvisor/info/v2" 38 "github.com/google/cadvisor/machine" 39 "github.com/google/cadvisor/nvm" 40 "github.com/google/cadvisor/perf" 41 "github.com/google/cadvisor/resctrl" 42 "github.com/google/cadvisor/stats" 43 "github.com/google/cadvisor/utils/oomparser" 44 "github.com/google/cadvisor/utils/sysfs" 45 "github.com/google/cadvisor/version" 46 "github.com/google/cadvisor/watcher" 47 48 "github.com/opencontainers/runc/libcontainer/cgroups" 49 50 "k8s.io/klog/v2" 51 "k8s.io/utils/clock" 52 ) 53 54 var globalHousekeepingInterval = flag.Duration("global_housekeeping_interval", 1*time.Minute, "Interval between global housekeepings") 55 var updateMachineInfoInterval = flag.Duration("update_machine_info_interval", 5*time.Minute, "Interval between machine info updates.") 56 var logCadvisorUsage = flag.Bool("log_cadvisor_usage", false, "Whether to log the usage of the cAdvisor container") 57 var eventStorageAgeLimit = flag.String("event_storage_age_limit", "default=24h", "Max length of time for which to store events (per type). Value is a comma separated list of key values, where the keys are event types (e.g.: creation, oom) or \"default\" and the value is a duration. Default is applied to all non-specified event types") 58 var eventStorageEventLimit = flag.String("event_storage_event_limit", "default=100000", "Max number of events to store (per type). Value is a comma separated list of key values, where the keys are event types (e.g.: creation, oom) or \"default\" and the value is an integer. Default is applied to all non-specified event types") 59 var applicationMetricsCountLimit = flag.Int("application_metrics_count_limit", 100, "Max number of application metrics to store (per container)") 60 61 // The namespace under which aliases are unique. 62 const ( 63 DockerNamespace = "docker" 64 PodmanNamespace = "podman" 65 ) 66 67 var HousekeepingConfigFlags = HousekeepingConfig{ 68 flag.Duration("max_housekeeping_interval", 60*time.Second, "Largest interval to allow between container housekeepings"), 69 flag.Bool("allow_dynamic_housekeeping", true, "Whether to allow the housekeeping interval to be dynamic"), 70 } 71 72 // The Manager interface defines operations for starting a manager and getting 73 // container and machine information. 74 type Manager interface { 75 // Start the manager. Calling other manager methods before this returns 76 // may produce undefined behavior. 77 Start() error 78 79 // Stops the manager. 80 Stop() error 81 82 // information about a container. 83 GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) 84 85 // Get V2 information about a container. 86 // Recursive (subcontainer) requests are best-effort, and may return a partial result alongside an 87 // error in the partial failure case. 88 GetContainerInfoV2(containerName string, options v2.RequestOptions) (map[string]v2.ContainerInfo, error) 89 90 // Get information about all subcontainers of the specified container (includes self). 91 SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) 92 93 // Gets all the Docker containers. Return is a map from full container name to ContainerInfo. 94 AllDockerContainers(query *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error) 95 96 // Gets information about a specific Docker container. The specified name is within the Docker namespace. 97 DockerContainer(dockerName string, query *info.ContainerInfoRequest) (info.ContainerInfo, error) 98 99 // Gets spec for all containers based on request options. 100 GetContainerSpec(containerName string, options v2.RequestOptions) (map[string]v2.ContainerSpec, error) 101 102 // Gets summary stats for all containers based on request options. 103 GetDerivedStats(containerName string, options v2.RequestOptions) (map[string]v2.DerivedStats, error) 104 105 // Get info for all requested containers based on the request options. 106 GetRequestedContainersInfo(containerName string, options v2.RequestOptions) (map[string]*info.ContainerInfo, error) 107 108 // Returns true if the named container exists. 109 Exists(containerName string) bool 110 111 // Get information about the machine. 112 GetMachineInfo() (*info.MachineInfo, error) 113 114 // Get version information about different components we depend on. 115 GetVersionInfo() (*info.VersionInfo, error) 116 117 // GetFsInfoByFsUUID returns the information of the device having the 118 // specified filesystem uuid. If no such device with the UUID exists, this 119 // function will return the fs.ErrNoSuchDevice error. 120 GetFsInfoByFsUUID(uuid string) (v2.FsInfo, error) 121 122 // Get filesystem information for the filesystem that contains the given directory 123 GetDirFsInfo(dir string) (v2.FsInfo, error) 124 125 // Get filesystem information for a given label. 126 // Returns information for all global filesystems if label is empty. 127 GetFsInfo(label string) ([]v2.FsInfo, error) 128 129 // Get ps output for a container. 130 GetProcessList(containerName string, options v2.RequestOptions) ([]v2.ProcessInfo, error) 131 132 // Get events streamed through passedChannel that fit the request. 133 WatchForEvents(request *events.Request) (*events.EventChannel, error) 134 135 // Get past events that have been detected and that fit the request. 136 GetPastEvents(request *events.Request) ([]*info.Event, error) 137 138 CloseEventChannel(watchID int) 139 140 // Returns debugging information. Map of lines per category. 141 DebugInfo() map[string][]string 142 143 AllPodmanContainers(c *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error) 144 145 PodmanContainer(containerName string, query *info.ContainerInfoRequest) (info.ContainerInfo, error) 146 } 147 148 // Housekeeping configuration for the manager 149 type HousekeepingConfig = struct { 150 Interval *time.Duration 151 AllowDynamic *bool 152 } 153 154 // New takes a memory storage and returns a new manager. 155 func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, HousekeepingConfig HousekeepingConfig, includedMetricsSet container.MetricSet, collectorHTTPClient *http.Client, rawContainerCgroupPathPrefixWhiteList, containerEnvMetadataWhiteList []string, perfEventsFile string, resctrlInterval time.Duration) (Manager, error) { 156 if memoryCache == nil { 157 return nil, fmt.Errorf("manager requires memory storage") 158 } 159 160 // Detect the container we are running on. 161 selfContainer := "/" 162 var err error 163 // Avoid using GetOwnCgroupPath on cgroup v2 as it is not supported by libcontainer 164 if !cgroups.IsCgroup2UnifiedMode() { 165 selfContainer, err = cgroups.GetOwnCgroup("cpu") 166 if err != nil { 167 return nil, err 168 } 169 klog.V(2).Infof("cAdvisor running in container: %q", selfContainer) 170 } 171 172 context := fs.Context{} 173 174 if err := container.InitializeFSContext(&context); err != nil { 175 return nil, err 176 } 177 178 fsInfo, err := fs.NewFsInfo(context) 179 if err != nil { 180 return nil, err 181 } 182 183 // If cAdvisor was started with host's rootfs mounted, assume that its running 184 // in its own namespaces. 185 inHostNamespace := false 186 if _, err := os.Stat("/rootfs/proc"); os.IsNotExist(err) { 187 inHostNamespace = true 188 } 189 190 // Register for new subcontainers. 191 eventsChannel := make(chan watcher.ContainerEvent, 16) 192 193 newManager := &manager{ 194 containers: make(map[namespacedContainerName]*containerData), 195 quitChannels: make([]chan error, 0, 2), 196 memoryCache: memoryCache, 197 fsInfo: fsInfo, 198 sysFs: sysfs, 199 cadvisorContainer: selfContainer, 200 inHostNamespace: inHostNamespace, 201 startupTime: time.Now(), 202 maxHousekeepingInterval: *HousekeepingConfig.Interval, 203 allowDynamicHousekeeping: *HousekeepingConfig.AllowDynamic, 204 includedMetrics: includedMetricsSet, 205 containerWatchers: []watcher.ContainerWatcher{}, 206 eventsChannel: eventsChannel, 207 collectorHTTPClient: collectorHTTPClient, 208 rawContainerCgroupPathPrefixWhiteList: rawContainerCgroupPathPrefixWhiteList, 209 containerEnvMetadataWhiteList: containerEnvMetadataWhiteList, 210 } 211 212 machineInfo, err := machine.Info(sysfs, fsInfo, inHostNamespace) 213 if err != nil { 214 return nil, err 215 } 216 newManager.machineInfo = *machineInfo 217 klog.V(1).Infof("Machine: %+v", newManager.machineInfo) 218 219 newManager.perfManager, err = perf.NewManager(perfEventsFile, machineInfo.Topology) 220 if err != nil { 221 return nil, err 222 } 223 224 newManager.resctrlManager, err = resctrl.NewManager(resctrlInterval, resctrl.Setup, machineInfo.CPUVendorID, inHostNamespace) 225 if err != nil { 226 klog.V(4).Infof("Cannot gather resctrl metrics: %v", err) 227 } 228 229 versionInfo, err := getVersionInfo() 230 if err != nil { 231 return nil, err 232 } 233 klog.V(1).Infof("Version: %+v", *versionInfo) 234 235 newManager.eventHandler = events.NewEventManager(parseEventsStoragePolicy()) 236 return newManager, nil 237 } 238 239 // A namespaced container name. 240 type namespacedContainerName struct { 241 // The namespace of the container. Can be empty for the root namespace. 242 Namespace string 243 244 // The name of the container in this namespace. 245 Name string 246 } 247 248 type manager struct { 249 containers map[namespacedContainerName]*containerData 250 containersLock sync.RWMutex 251 memoryCache *memory.InMemoryCache 252 fsInfo fs.FsInfo 253 sysFs sysfs.SysFs 254 machineMu sync.RWMutex // protects machineInfo 255 machineInfo info.MachineInfo 256 quitChannels []chan error 257 cadvisorContainer string 258 inHostNamespace bool 259 eventHandler events.EventManager 260 startupTime time.Time 261 maxHousekeepingInterval time.Duration 262 allowDynamicHousekeeping bool 263 includedMetrics container.MetricSet 264 containerWatchers []watcher.ContainerWatcher 265 eventsChannel chan watcher.ContainerEvent 266 collectorHTTPClient *http.Client 267 perfManager stats.Manager 268 resctrlManager resctrl.Manager 269 // List of raw container cgroup path prefix whitelist. 270 rawContainerCgroupPathPrefixWhiteList []string 271 // List of container env prefix whitelist, the matched container envs would be collected into metrics as extra labels. 272 containerEnvMetadataWhiteList []string 273 } 274 275 func (m *manager) PodmanContainer(containerName string, query *info.ContainerInfoRequest) (info.ContainerInfo, error) { 276 container, err := m.namespacedContainer(containerName, PodmanNamespace) 277 if err != nil { 278 return info.ContainerInfo{}, err 279 } 280 281 inf, err := m.containerDataToContainerInfo(container, query) 282 if err != nil { 283 return info.ContainerInfo{}, err 284 } 285 return *inf, nil 286 } 287 288 // Start the container manager. 289 func (m *manager) Start() error { 290 m.containerWatchers = container.InitializePlugins(m, m.fsInfo, m.includedMetrics) 291 292 err := raw.Register(m, m.fsInfo, m.includedMetrics, m.rawContainerCgroupPathPrefixWhiteList) 293 if err != nil { 294 klog.Errorf("Registration of the raw container factory failed: %v", err) 295 } 296 297 rawWatcher, err := raw.NewRawContainerWatcher(m.includedMetrics) 298 if err != nil { 299 return err 300 } 301 m.containerWatchers = append(m.containerWatchers, rawWatcher) 302 303 // Watch for OOMs. 304 err = m.watchForNewOoms() 305 if err != nil { 306 klog.Warningf("Could not configure a source for OOM detection, disabling OOM events: %v", err) 307 } 308 309 // If there are no factories, don't start any housekeeping and serve the information we do have. 310 if !container.HasFactories() { 311 return nil 312 } 313 314 // Create root and then recover all containers. 315 err = m.createContainer("/", watcher.Raw) 316 if err != nil { 317 return err 318 } 319 klog.V(2).Infof("Starting recovery of all containers") 320 err = m.detectSubcontainers("/") 321 if err != nil { 322 return err 323 } 324 klog.V(2).Infof("Recovery completed") 325 326 // Watch for new container. 327 quitWatcher := make(chan error) 328 err = m.watchForNewContainers(quitWatcher) 329 if err != nil { 330 return err 331 } 332 m.quitChannels = append(m.quitChannels, quitWatcher) 333 334 // Look for new containers in the main housekeeping thread. 335 quitGlobalHousekeeping := make(chan error) 336 m.quitChannels = append(m.quitChannels, quitGlobalHousekeeping) 337 go m.globalHousekeeping(quitGlobalHousekeeping) 338 339 quitUpdateMachineInfo := make(chan error) 340 m.quitChannels = append(m.quitChannels, quitUpdateMachineInfo) 341 go m.updateMachineInfo(quitUpdateMachineInfo) 342 343 return nil 344 } 345 346 func (m *manager) Stop() error { 347 defer m.destroyCollectors() 348 // Stop and wait on all quit channels. 349 for i, c := range m.quitChannels { 350 // Send the exit signal and wait on the thread to exit (by closing the channel). 351 c <- nil 352 err := <-c 353 if err != nil { 354 // Remove the channels that quit successfully. 355 m.quitChannels = m.quitChannels[i:] 356 return err 357 } 358 } 359 m.quitChannels = make([]chan error, 0, 2) 360 nvm.Finalize() 361 perf.Finalize() 362 return nil 363 } 364 365 func (m *manager) destroyCollectors() { 366 for _, container := range m.containers { 367 container.perfCollector.Destroy() 368 container.resctrlCollector.Destroy() 369 } 370 } 371 372 func (m *manager) updateMachineInfo(quit chan error) { 373 ticker := time.NewTicker(*updateMachineInfoInterval) 374 for { 375 select { 376 case <-ticker.C: 377 info, err := machine.Info(m.sysFs, m.fsInfo, m.inHostNamespace) 378 if err != nil { 379 klog.Errorf("Could not get machine info: %v", err) 380 break 381 } 382 m.machineMu.Lock() 383 m.machineInfo = *info 384 m.machineMu.Unlock() 385 klog.V(5).Infof("Update machine info: %+v", *info) 386 case <-quit: 387 ticker.Stop() 388 quit <- nil 389 return 390 } 391 } 392 } 393 394 func (m *manager) globalHousekeeping(quit chan error) { 395 // Long housekeeping is either 100ms or half of the housekeeping interval. 396 longHousekeeping := 100 * time.Millisecond 397 if *globalHousekeepingInterval/2 < longHousekeeping { 398 longHousekeeping = *globalHousekeepingInterval / 2 399 } 400 401 ticker := time.NewTicker(*globalHousekeepingInterval) 402 for { 403 select { 404 case t := <-ticker.C: 405 start := time.Now() 406 407 // Check for new containers. 408 err := m.detectSubcontainers("/") 409 if err != nil { 410 klog.Errorf("Failed to detect containers: %s", err) 411 } 412 413 // Log if housekeeping took too long. 414 duration := time.Since(start) 415 if duration >= longHousekeeping { 416 klog.V(3).Infof("Global Housekeeping(%d) took %s", t.Unix(), duration) 417 } 418 case <-quit: 419 // Quit if asked to do so. 420 quit <- nil 421 klog.Infof("Exiting global housekeeping thread") 422 return 423 } 424 } 425 } 426 427 func (m *manager) getContainerData(containerName string) (*containerData, error) { 428 var cont *containerData 429 var ok bool 430 func() { 431 m.containersLock.RLock() 432 defer m.containersLock.RUnlock() 433 434 // Ensure we have the container. 435 cont, ok = m.containers[namespacedContainerName{ 436 Name: containerName, 437 }] 438 }() 439 if !ok { 440 return nil, fmt.Errorf("unknown container %q", containerName) 441 } 442 return cont, nil 443 } 444 445 func (m *manager) GetDerivedStats(containerName string, options v2.RequestOptions) (map[string]v2.DerivedStats, error) { 446 conts, err := m.getRequestedContainers(containerName, options) 447 if err != nil { 448 return nil, err 449 } 450 var errs partialFailure 451 stats := make(map[string]v2.DerivedStats) 452 for name, cont := range conts { 453 d, err := cont.DerivedStats() 454 if err != nil { 455 errs.append(name, "DerivedStats", err) 456 } 457 stats[name] = d 458 } 459 return stats, errs.OrNil() 460 } 461 462 func (m *manager) GetContainerSpec(containerName string, options v2.RequestOptions) (map[string]v2.ContainerSpec, error) { 463 conts, err := m.getRequestedContainers(containerName, options) 464 if err != nil { 465 return nil, err 466 } 467 var errs partialFailure 468 specs := make(map[string]v2.ContainerSpec) 469 for name, cont := range conts { 470 cinfo, err := cont.GetInfo(false) 471 if err != nil { 472 errs.append(name, "GetInfo", err) 473 } 474 spec := m.getV2Spec(cinfo) 475 specs[name] = spec 476 } 477 return specs, errs.OrNil() 478 } 479 480 // Get V2 container spec from v1 container info. 481 func (m *manager) getV2Spec(cinfo *containerInfo) v2.ContainerSpec { 482 spec := m.getAdjustedSpec(cinfo) 483 return v2.ContainerSpecFromV1(&spec, cinfo.Aliases, cinfo.Namespace) 484 } 485 486 func (m *manager) getAdjustedSpec(cinfo *containerInfo) info.ContainerSpec { 487 spec := cinfo.Spec 488 489 // Set default value to an actual value 490 if spec.HasMemory { 491 // Memory.Limit is 0 means there's no limit 492 if spec.Memory.Limit == 0 { 493 m.machineMu.RLock() 494 spec.Memory.Limit = uint64(m.machineInfo.MemoryCapacity) 495 m.machineMu.RUnlock() 496 } 497 } 498 return spec 499 } 500 501 func (m *manager) GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) { 502 cont, err := m.getContainerData(containerName) 503 if err != nil { 504 return nil, err 505 } 506 return m.containerDataToContainerInfo(cont, query) 507 } 508 509 func (m *manager) GetContainerInfoV2(containerName string, options v2.RequestOptions) (map[string]v2.ContainerInfo, error) { 510 containers, err := m.getRequestedContainers(containerName, options) 511 if err != nil { 512 return nil, err 513 } 514 515 var errs partialFailure 516 var nilTime time.Time // Ignored. 517 518 infos := make(map[string]v2.ContainerInfo, len(containers)) 519 for name, container := range containers { 520 result := v2.ContainerInfo{} 521 cinfo, err := container.GetInfo(false) 522 if err != nil { 523 errs.append(name, "GetInfo", err) 524 infos[name] = result 525 continue 526 } 527 result.Spec = m.getV2Spec(cinfo) 528 529 stats, err := m.memoryCache.RecentStats(name, nilTime, nilTime, options.Count) 530 if err != nil { 531 errs.append(name, "RecentStats", err) 532 infos[name] = result 533 continue 534 } 535 536 result.Stats = v2.ContainerStatsFromV1(containerName, &cinfo.Spec, stats) 537 infos[name] = result 538 } 539 540 return infos, errs.OrNil() 541 } 542 543 func (m *manager) containerDataToContainerInfo(cont *containerData, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) { 544 // Get the info from the container. 545 cinfo, err := cont.GetInfo(true) 546 if err != nil { 547 return nil, err 548 } 549 550 stats, err := m.memoryCache.RecentStats(cinfo.Name, query.Start, query.End, query.NumStats) 551 if err != nil { 552 return nil, err 553 } 554 555 // Make a copy of the info for the user. 556 ret := &info.ContainerInfo{ 557 ContainerReference: cinfo.ContainerReference, 558 Subcontainers: cinfo.Subcontainers, 559 Spec: m.getAdjustedSpec(cinfo), 560 Stats: stats, 561 } 562 return ret, nil 563 } 564 565 func (m *manager) getContainer(containerName string) (*containerData, error) { 566 m.containersLock.RLock() 567 defer m.containersLock.RUnlock() 568 cont, ok := m.containers[namespacedContainerName{Name: containerName}] 569 if !ok { 570 return nil, fmt.Errorf("unknown container %q", containerName) 571 } 572 return cont, nil 573 } 574 575 func (m *manager) getSubcontainers(containerName string) map[string]*containerData { 576 m.containersLock.RLock() 577 defer m.containersLock.RUnlock() 578 containersMap := make(map[string]*containerData, len(m.containers)) 579 580 // Get all the unique subcontainers of the specified container 581 matchedName := path.Join(containerName, "/") 582 for i := range m.containers { 583 if m.containers[i] == nil { 584 continue 585 } 586 name := m.containers[i].info.Name 587 if name == containerName || strings.HasPrefix(name, matchedName) { 588 containersMap[m.containers[i].info.Name] = m.containers[i] 589 } 590 } 591 return containersMap 592 } 593 594 func (m *manager) SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) { 595 containersMap := m.getSubcontainers(containerName) 596 597 containers := make([]*containerData, 0, len(containersMap)) 598 for _, cont := range containersMap { 599 containers = append(containers, cont) 600 } 601 return m.containerDataSliceToContainerInfoSlice(containers, query) 602 } 603 604 func (m *manager) getAllNamespacedContainers(ns string) map[string]*containerData { 605 m.containersLock.RLock() 606 defer m.containersLock.RUnlock() 607 containers := make(map[string]*containerData, len(m.containers)) 608 609 // Get containers in a namespace. 610 for name, cont := range m.containers { 611 if name.Namespace == ns { 612 containers[cont.info.Name] = cont 613 } 614 } 615 return containers 616 } 617 618 func (m *manager) AllDockerContainers(query *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error) { 619 containers := m.getAllNamespacedContainers(DockerNamespace) 620 return m.containersInfo(containers, query) 621 } 622 623 func (m *manager) namespacedContainer(containerName string, ns string) (*containerData, error) { 624 m.containersLock.RLock() 625 defer m.containersLock.RUnlock() 626 627 // Check for the container in the namespace. 628 cont, ok := m.containers[namespacedContainerName{ 629 Namespace: ns, 630 Name: containerName, 631 }] 632 633 // Look for container by short prefix name if no exact match found. 634 if !ok { 635 for contName, c := range m.containers { 636 if contName.Namespace == ns && strings.HasPrefix(contName.Name, containerName) { 637 if cont == nil { 638 cont = c 639 } else { 640 return nil, fmt.Errorf("unable to find container in %q namespace. Container %q is not unique", ns, containerName) 641 } 642 } 643 } 644 645 if cont == nil { 646 return nil, fmt.Errorf("unable to find container %q in %q namespace", containerName, ns) 647 } 648 } 649 650 return cont, nil 651 } 652 653 func (m *manager) DockerContainer(containerName string, query *info.ContainerInfoRequest) (info.ContainerInfo, error) { 654 container, err := m.namespacedContainer(containerName, DockerNamespace) 655 if err != nil { 656 return info.ContainerInfo{}, err 657 } 658 659 inf, err := m.containerDataToContainerInfo(container, query) 660 if err != nil { 661 return info.ContainerInfo{}, err 662 } 663 return *inf, nil 664 } 665 666 func (m *manager) containerDataSliceToContainerInfoSlice(containers []*containerData, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) { 667 if len(containers) == 0 { 668 return nil, fmt.Errorf("no containers found") 669 } 670 671 // Get the info for each container. 672 output := make([]*info.ContainerInfo, 0, len(containers)) 673 for i := range containers { 674 cinfo, err := m.containerDataToContainerInfo(containers[i], query) 675 if err != nil { 676 // Skip containers with errors, we try to degrade gracefully. 677 klog.V(4).Infof("convert container data to container info failed with error %s", err.Error()) 678 continue 679 } 680 output = append(output, cinfo) 681 } 682 683 return output, nil 684 } 685 686 func (m *manager) GetRequestedContainersInfo(containerName string, options v2.RequestOptions) (map[string]*info.ContainerInfo, error) { 687 containers, err := m.getRequestedContainers(containerName, options) 688 if err != nil { 689 return nil, err 690 } 691 var errs partialFailure 692 containersMap := make(map[string]*info.ContainerInfo) 693 query := info.ContainerInfoRequest{ 694 NumStats: options.Count, 695 } 696 for name, data := range containers { 697 info, err := m.containerDataToContainerInfo(data, &query) 698 if err != nil { 699 if err == memory.ErrDataNotFound { 700 klog.V(4).Infof("Error getting data for container %s because of race condition", name) 701 continue 702 } 703 errs.append(name, "containerDataToContainerInfo", err) 704 } 705 containersMap[name] = info 706 } 707 return containersMap, errs.OrNil() 708 } 709 710 func (m *manager) getRequestedContainers(containerName string, options v2.RequestOptions) (map[string]*containerData, error) { 711 containersMap := make(map[string]*containerData) 712 switch options.IdType { 713 case v2.TypeName: 714 if !options.Recursive { 715 cont, err := m.getContainer(containerName) 716 if err != nil { 717 return containersMap, err 718 } 719 containersMap[cont.info.Name] = cont 720 } else { 721 containersMap = m.getSubcontainers(containerName) 722 if len(containersMap) == 0 { 723 return containersMap, fmt.Errorf("unknown container: %q", containerName) 724 } 725 } 726 case v2.TypeDocker, v2.TypePodman: 727 namespace := map[string]string{ 728 v2.TypeDocker: DockerNamespace, 729 v2.TypePodman: PodmanNamespace, 730 }[options.IdType] 731 if !options.Recursive { 732 containerName = strings.TrimPrefix(containerName, "/") 733 cont, err := m.namespacedContainer(containerName, namespace) 734 if err != nil { 735 return containersMap, err 736 } 737 containersMap[cont.info.Name] = cont 738 } else { 739 if containerName != "/" { 740 return containersMap, fmt.Errorf("invalid request for %s container %q with subcontainers", options.IdType, containerName) 741 } 742 containersMap = m.getAllNamespacedContainers(namespace) 743 } 744 default: 745 return containersMap, fmt.Errorf("invalid request type %q", options.IdType) 746 } 747 if options.MaxAge != nil { 748 // update stats for all containers in containersMap 749 var waitGroup sync.WaitGroup 750 waitGroup.Add(len(containersMap)) 751 for _, container := range containersMap { 752 go func(cont *containerData) { 753 cont.OnDemandHousekeeping(*options.MaxAge) 754 waitGroup.Done() 755 }(container) 756 } 757 waitGroup.Wait() 758 } 759 return containersMap, nil 760 } 761 762 func (m *manager) GetDirFsInfo(dir string) (v2.FsInfo, error) { 763 device, err := m.fsInfo.GetDirFsDevice(dir) 764 if err != nil { 765 return v2.FsInfo{}, fmt.Errorf("failed to get device for dir %q: %v", dir, err) 766 } 767 return m.getFsInfoByDeviceName(device.Device) 768 } 769 770 func (m *manager) GetFsInfoByFsUUID(uuid string) (v2.FsInfo, error) { 771 device, err := m.fsInfo.GetDeviceInfoByFsUUID(uuid) 772 if err != nil { 773 return v2.FsInfo{}, err 774 } 775 return m.getFsInfoByDeviceName(device.Device) 776 } 777 778 func (m *manager) GetFsInfo(label string) ([]v2.FsInfo, error) { 779 var empty time.Time 780 // Get latest data from filesystems hanging off root container. 781 stats, err := m.memoryCache.RecentStats("/", empty, empty, 1) 782 if err != nil { 783 return nil, err 784 } 785 dev := "" 786 if len(label) != 0 { 787 dev, err = m.fsInfo.GetDeviceForLabel(label) 788 if err != nil { 789 return nil, err 790 } 791 } 792 fsInfo := []v2.FsInfo{} 793 for i := range stats[0].Filesystem { 794 fs := stats[0].Filesystem[i] 795 if len(label) != 0 && fs.Device != dev { 796 continue 797 } 798 mountpoint, err := m.fsInfo.GetMountpointForDevice(fs.Device) 799 if err != nil { 800 return nil, err 801 } 802 labels, err := m.fsInfo.GetLabelsForDevice(fs.Device) 803 if err != nil { 804 return nil, err 805 } 806 807 fi := v2.FsInfo{ 808 Timestamp: stats[0].Timestamp, 809 Device: fs.Device, 810 Mountpoint: mountpoint, 811 Capacity: fs.Limit, 812 Usage: fs.Usage, 813 Available: fs.Available, 814 Labels: labels, 815 } 816 if fs.HasInodes { 817 fi.Inodes = &fs.Inodes 818 fi.InodesFree = &fs.InodesFree 819 } 820 fsInfo = append(fsInfo, fi) 821 } 822 return fsInfo, nil 823 } 824 825 func (m *manager) GetMachineInfo() (*info.MachineInfo, error) { 826 m.machineMu.RLock() 827 defer m.machineMu.RUnlock() 828 return m.machineInfo.Clone(), nil 829 } 830 831 func (m *manager) GetVersionInfo() (*info.VersionInfo, error) { 832 // TODO: Consider caching this and periodically updating. The VersionInfo may change if 833 // the docker daemon is started after the cAdvisor client is created. Caching the value 834 // would be helpful so we would be able to return the last known docker version if 835 // docker was down at the time of a query. 836 return getVersionInfo() 837 } 838 839 func (m *manager) Exists(containerName string) bool { 840 m.containersLock.RLock() 841 defer m.containersLock.RUnlock() 842 843 namespacedName := namespacedContainerName{ 844 Name: containerName, 845 } 846 847 _, ok := m.containers[namespacedName] 848 return ok 849 } 850 851 func (m *manager) GetProcessList(containerName string, options v2.RequestOptions) ([]v2.ProcessInfo, error) { 852 // override recursive. Only support single container listing. 853 options.Recursive = false 854 // override MaxAge. ProcessList does not require updated stats. 855 options.MaxAge = nil 856 conts, err := m.getRequestedContainers(containerName, options) 857 if err != nil { 858 return nil, err 859 } 860 if len(conts) != 1 { 861 return nil, fmt.Errorf("Expected the request to match only one container") 862 } 863 // TODO(rjnagal): handle count? Only if we can do count by type (eg. top 5 cpu users) 864 ps := []v2.ProcessInfo{} 865 for _, cont := range conts { 866 ps, err = cont.GetProcessList(m.cadvisorContainer, m.inHostNamespace) 867 if err != nil { 868 return nil, err 869 } 870 } 871 return ps, nil 872 } 873 874 func (m *manager) registerCollectors(collectorConfigs map[string]string, cont *containerData) error { 875 for k, v := range collectorConfigs { 876 configFile, err := cont.ReadFile(v, m.inHostNamespace) 877 if err != nil { 878 return fmt.Errorf("failed to read config file %q for config %q, container %q: %v", k, v, cont.info.Name, err) 879 } 880 klog.V(4).Infof("Got config from %q: %q", v, configFile) 881 882 if strings.HasPrefix(k, "prometheus") || strings.HasPrefix(k, "Prometheus") { 883 newCollector, err := collector.NewPrometheusCollector(k, configFile, *applicationMetricsCountLimit, cont.handler, m.collectorHTTPClient) 884 if err != nil { 885 return fmt.Errorf("failed to create collector for container %q, config %q: %v", cont.info.Name, k, err) 886 } 887 err = cont.collectorManager.RegisterCollector(newCollector) 888 if err != nil { 889 return fmt.Errorf("failed to register collector for container %q, config %q: %v", cont.info.Name, k, err) 890 } 891 } else { 892 newCollector, err := collector.NewCollector(k, configFile, *applicationMetricsCountLimit, cont.handler, m.collectorHTTPClient) 893 if err != nil { 894 return fmt.Errorf("failed to create collector for container %q, config %q: %v", cont.info.Name, k, err) 895 } 896 err = cont.collectorManager.RegisterCollector(newCollector) 897 if err != nil { 898 return fmt.Errorf("failed to register collector for container %q, config %q: %v", cont.info.Name, k, err) 899 } 900 } 901 } 902 return nil 903 } 904 905 // Create a container. 906 func (m *manager) createContainer(containerName string, watchSource watcher.ContainerWatchSource) error { 907 m.containersLock.Lock() 908 defer m.containersLock.Unlock() 909 910 return m.createContainerLocked(containerName, watchSource) 911 } 912 913 func (m *manager) createContainerLocked(containerName string, watchSource watcher.ContainerWatchSource) error { 914 namespacedName := namespacedContainerName{ 915 Name: containerName, 916 } 917 918 // Check that the container didn't already exist. 919 if _, ok := m.containers[namespacedName]; ok { 920 return nil 921 } 922 923 handler, accept, err := container.NewContainerHandler(containerName, watchSource, m.containerEnvMetadataWhiteList, m.inHostNamespace) 924 if err != nil { 925 return err 926 } 927 if !accept { 928 // ignoring this container. 929 klog.V(4).Infof("ignoring container %q", containerName) 930 return nil 931 } 932 collectorManager, err := collector.NewCollectorManager() 933 if err != nil { 934 return err 935 } 936 937 logUsage := *logCadvisorUsage && containerName == m.cadvisorContainer 938 cont, err := newContainerData(containerName, m.memoryCache, handler, logUsage, collectorManager, m.maxHousekeepingInterval, m.allowDynamicHousekeeping, clock.RealClock{}) 939 if err != nil { 940 return err 941 } 942 943 if m.includedMetrics.Has(container.PerfMetrics) { 944 perfCgroupPath, err := handler.GetCgroupPath("perf_event") 945 if err != nil { 946 klog.Warningf("Error getting perf_event cgroup path: %q", err) 947 } else { 948 cont.perfCollector, err = m.perfManager.GetCollector(perfCgroupPath) 949 if err != nil { 950 klog.Errorf("Perf event metrics will not be available for container %q: %v", containerName, err) 951 } 952 } 953 } 954 955 if m.includedMetrics.Has(container.ResctrlMetrics) { 956 cont.resctrlCollector, err = m.resctrlManager.GetCollector(containerName, func() ([]string, error) { 957 return cont.getContainerPids(m.inHostNamespace) 958 }, len(m.machineInfo.Topology)) 959 if err != nil { 960 klog.V(4).Infof("resctrl metrics will not be available for container %s: %s", cont.info.Name, err) 961 } 962 } 963 964 // Add collectors 965 labels := handler.GetContainerLabels() 966 collectorConfigs := collector.GetCollectorConfigs(labels) 967 err = m.registerCollectors(collectorConfigs, cont) 968 if err != nil { 969 klog.Warningf("Failed to register collectors for %q: %v", containerName, err) 970 } 971 972 // Add the container name and all its aliases. The aliases must be within the namespace of the factory. 973 m.containers[namespacedName] = cont 974 for _, alias := range cont.info.Aliases { 975 m.containers[namespacedContainerName{ 976 Namespace: cont.info.Namespace, 977 Name: alias, 978 }] = cont 979 } 980 981 klog.V(3).Infof("Added container: %q (aliases: %v, namespace: %q)", containerName, cont.info.Aliases, cont.info.Namespace) 982 983 contSpec, err := cont.handler.GetSpec() 984 if err != nil { 985 return err 986 } 987 988 contRef, err := cont.handler.ContainerReference() 989 if err != nil { 990 return err 991 } 992 993 newEvent := &info.Event{ 994 ContainerName: contRef.Name, 995 Timestamp: contSpec.CreationTime, 996 EventType: info.EventContainerCreation, 997 } 998 err = m.eventHandler.AddEvent(newEvent) 999 if err != nil { 1000 return err 1001 } 1002 // Start the container's housekeeping. 1003 return cont.Start() 1004 } 1005 1006 func (m *manager) destroyContainer(containerName string) error { 1007 m.containersLock.Lock() 1008 defer m.containersLock.Unlock() 1009 1010 return m.destroyContainerLocked(containerName) 1011 } 1012 1013 func (m *manager) destroyContainerLocked(containerName string) error { 1014 namespacedName := namespacedContainerName{ 1015 Name: containerName, 1016 } 1017 cont, ok := m.containers[namespacedName] 1018 if !ok { 1019 // Already destroyed, done. 1020 return nil 1021 } 1022 1023 // Tell the container to stop. 1024 err := cont.Stop() 1025 if err != nil { 1026 return err 1027 } 1028 1029 // Remove the container from our records (and all its aliases). 1030 delete(m.containers, namespacedName) 1031 for _, alias := range cont.info.Aliases { 1032 delete(m.containers, namespacedContainerName{ 1033 Namespace: cont.info.Namespace, 1034 Name: alias, 1035 }) 1036 } 1037 klog.V(3).Infof("Destroyed container: %q (aliases: %v, namespace: %q)", containerName, cont.info.Aliases, cont.info.Namespace) 1038 1039 contRef, err := cont.handler.ContainerReference() 1040 if err != nil { 1041 return err 1042 } 1043 1044 newEvent := &info.Event{ 1045 ContainerName: contRef.Name, 1046 Timestamp: time.Now(), 1047 EventType: info.EventContainerDeletion, 1048 } 1049 err = m.eventHandler.AddEvent(newEvent) 1050 if err != nil { 1051 return err 1052 } 1053 return nil 1054 } 1055 1056 // Detect all containers that have been added or deleted from the specified container. 1057 func (m *manager) getContainersDiff(containerName string) (added []info.ContainerReference, removed []info.ContainerReference, err error) { 1058 // Get all subcontainers recursively. 1059 m.containersLock.RLock() 1060 cont, ok := m.containers[namespacedContainerName{ 1061 Name: containerName, 1062 }] 1063 m.containersLock.RUnlock() 1064 if !ok { 1065 return nil, nil, fmt.Errorf("failed to find container %q while checking for new containers", containerName) 1066 } 1067 allContainers, err := cont.handler.ListContainers(container.ListRecursive) 1068 1069 if err != nil { 1070 return nil, nil, err 1071 } 1072 allContainers = append(allContainers, info.ContainerReference{Name: containerName}) 1073 1074 m.containersLock.RLock() 1075 defer m.containersLock.RUnlock() 1076 1077 // Determine which were added and which were removed. 1078 allContainersSet := make(map[string]*containerData) 1079 for name, d := range m.containers { 1080 // Only add the canonical name. 1081 if d.info.Name == name.Name { 1082 allContainersSet[name.Name] = d 1083 } 1084 } 1085 1086 // Added containers 1087 for _, c := range allContainers { 1088 delete(allContainersSet, c.Name) 1089 _, ok := m.containers[namespacedContainerName{ 1090 Name: c.Name, 1091 }] 1092 if !ok { 1093 added = append(added, c) 1094 } 1095 } 1096 1097 // Removed ones are no longer in the container listing. 1098 for _, d := range allContainersSet { 1099 removed = append(removed, d.info.ContainerReference) 1100 } 1101 1102 return 1103 } 1104 1105 // Detect the existing subcontainers and reflect the setup here. 1106 func (m *manager) detectSubcontainers(containerName string) error { 1107 added, removed, err := m.getContainersDiff(containerName) 1108 if err != nil { 1109 return err 1110 } 1111 1112 // Add the new containers. 1113 for _, cont := range added { 1114 err = m.createContainer(cont.Name, watcher.Raw) 1115 if err != nil { 1116 klog.Errorf("Failed to create existing container: %s: %s", cont.Name, err) 1117 } 1118 } 1119 1120 // Remove the old containers. 1121 for _, cont := range removed { 1122 err = m.destroyContainer(cont.Name) 1123 if err != nil { 1124 klog.Errorf("Failed to destroy existing container: %s: %s", cont.Name, err) 1125 } 1126 } 1127 1128 return nil 1129 } 1130 1131 // Watches for new containers started in the system. Runs forever unless there is a setup error. 1132 func (m *manager) watchForNewContainers(quit chan error) error { 1133 watched := make([]watcher.ContainerWatcher, 0) 1134 for _, watcher := range m.containerWatchers { 1135 err := watcher.Start(m.eventsChannel) 1136 if err != nil { 1137 for _, w := range watched { 1138 stopErr := w.Stop() 1139 if stopErr != nil { 1140 klog.Warningf("Failed to stop wacher %v with error: %v", w, stopErr) 1141 } 1142 } 1143 return err 1144 } 1145 watched = append(watched, watcher) 1146 } 1147 1148 // There is a race between starting the watch and new container creation so we do a detection before we read new containers. 1149 err := m.detectSubcontainers("/") 1150 if err != nil { 1151 return err 1152 } 1153 1154 // Listen to events from the container handler. 1155 go func() { 1156 for { 1157 select { 1158 case event := <-m.eventsChannel: 1159 switch { 1160 case event.EventType == watcher.ContainerAdd: 1161 switch event.WatchSource { 1162 default: 1163 err = m.createContainer(event.Name, event.WatchSource) 1164 } 1165 case event.EventType == watcher.ContainerDelete: 1166 err = m.destroyContainer(event.Name) 1167 } 1168 if err != nil { 1169 klog.Warningf("Failed to process watch event %+v: %v", event, err) 1170 } 1171 case <-quit: 1172 var errs partialFailure 1173 1174 // Stop processing events if asked to quit. 1175 for i, watcher := range m.containerWatchers { 1176 err := watcher.Stop() 1177 if err != nil { 1178 errs.append(fmt.Sprintf("watcher %d", i), "Stop", err) 1179 } 1180 } 1181 1182 if len(errs) > 0 { 1183 quit <- errs 1184 } else { 1185 quit <- nil 1186 klog.Infof("Exiting thread watching subcontainers") 1187 return 1188 } 1189 } 1190 } 1191 }() 1192 return nil 1193 } 1194 1195 func (m *manager) watchForNewOoms() error { 1196 klog.V(2).Infof("Started watching for new ooms in manager") 1197 outStream := make(chan *oomparser.OomInstance, 10) 1198 oomLog, err := oomparser.New() 1199 if err != nil { 1200 return err 1201 } 1202 go oomLog.StreamOoms(outStream) 1203 1204 go func() { 1205 for oomInstance := range outStream { 1206 // Surface OOM and OOM kill events. 1207 newEvent := &info.Event{ 1208 ContainerName: oomInstance.ContainerName, 1209 Timestamp: oomInstance.TimeOfDeath, 1210 EventType: info.EventOom, 1211 } 1212 err := m.eventHandler.AddEvent(newEvent) 1213 if err != nil { 1214 klog.Errorf("failed to add OOM event for %q: %v", oomInstance.ContainerName, err) 1215 } 1216 klog.V(3).Infof("Created an OOM event in container %q at %v", oomInstance.ContainerName, oomInstance.TimeOfDeath) 1217 1218 newEvent = &info.Event{ 1219 ContainerName: oomInstance.VictimContainerName, 1220 Timestamp: oomInstance.TimeOfDeath, 1221 EventType: info.EventOomKill, 1222 EventData: info.EventData{ 1223 OomKill: &info.OomKillEventData{ 1224 Pid: oomInstance.Pid, 1225 ProcessName: oomInstance.ProcessName, 1226 }, 1227 }, 1228 } 1229 err = m.eventHandler.AddEvent(newEvent) 1230 if err != nil { 1231 klog.Errorf("failed to add OOM kill event for %q: %v", oomInstance.ContainerName, err) 1232 } 1233 1234 // Count OOM events for later collection by prometheus 1235 request := v2.RequestOptions{ 1236 IdType: v2.TypeName, 1237 Count: 1, 1238 } 1239 conts, err := m.getRequestedContainers(oomInstance.ContainerName, request) 1240 if err != nil { 1241 klog.V(2).Infof("failed getting container info for %q: %v", oomInstance.ContainerName, err) 1242 continue 1243 } 1244 if len(conts) != 1 { 1245 klog.V(2).Info("Expected the request to match only one container") 1246 continue 1247 } 1248 for _, cont := range conts { 1249 atomic.AddUint64(&cont.oomEvents, 1) 1250 } 1251 } 1252 }() 1253 return nil 1254 } 1255 1256 // can be called by the api which will take events returned on the channel 1257 func (m *manager) WatchForEvents(request *events.Request) (*events.EventChannel, error) { 1258 return m.eventHandler.WatchEvents(request) 1259 } 1260 1261 // can be called by the api which will return all events satisfying the request 1262 func (m *manager) GetPastEvents(request *events.Request) ([]*info.Event, error) { 1263 return m.eventHandler.GetEvents(request) 1264 } 1265 1266 // called by the api when a client is no longer listening to the channel 1267 func (m *manager) CloseEventChannel(watchID int) { 1268 m.eventHandler.StopWatch(watchID) 1269 } 1270 1271 // Parses the events StoragePolicy from the flags. 1272 func parseEventsStoragePolicy() events.StoragePolicy { 1273 policy := events.DefaultStoragePolicy() 1274 1275 // Parse max age. 1276 parts := strings.Split(*eventStorageAgeLimit, ",") 1277 for _, part := range parts { 1278 items := strings.Split(part, "=") 1279 if len(items) != 2 { 1280 klog.Warningf("Unknown event storage policy %q when parsing max age", part) 1281 continue 1282 } 1283 dur, err := time.ParseDuration(items[1]) 1284 if err != nil { 1285 klog.Warningf("Unable to parse event max age duration %q: %v", items[1], err) 1286 continue 1287 } 1288 if items[0] == "default" { 1289 policy.DefaultMaxAge = dur 1290 continue 1291 } 1292 policy.PerTypeMaxAge[info.EventType(items[0])] = dur 1293 } 1294 1295 // Parse max number. 1296 parts = strings.Split(*eventStorageEventLimit, ",") 1297 for _, part := range parts { 1298 items := strings.Split(part, "=") 1299 if len(items) != 2 { 1300 klog.Warningf("Unknown event storage policy %q when parsing max event limit", part) 1301 continue 1302 } 1303 val, err := strconv.Atoi(items[1]) 1304 if err != nil { 1305 klog.Warningf("Unable to parse integer from %q: %v", items[1], err) 1306 continue 1307 } 1308 if items[0] == "default" { 1309 policy.DefaultMaxNumEvents = val 1310 continue 1311 } 1312 policy.PerTypeMaxNumEvents[info.EventType(items[0])] = val 1313 } 1314 1315 return policy 1316 } 1317 1318 func (m *manager) DebugInfo() map[string][]string { 1319 debugInfo := container.DebugInfo() 1320 1321 // Get unique containers. 1322 var conts map[*containerData]struct{} 1323 func() { 1324 m.containersLock.RLock() 1325 defer m.containersLock.RUnlock() 1326 1327 conts = make(map[*containerData]struct{}, len(m.containers)) 1328 for _, c := range m.containers { 1329 conts[c] = struct{}{} 1330 } 1331 }() 1332 1333 // List containers. 1334 lines := make([]string, 0, len(conts)) 1335 for cont := range conts { 1336 lines = append(lines, cont.info.Name) 1337 if cont.info.Namespace != "" { 1338 lines = append(lines, fmt.Sprintf("\tNamespace: %s", cont.info.Namespace)) 1339 } 1340 1341 if len(cont.info.Aliases) != 0 { 1342 lines = append(lines, "\tAliases:") 1343 for _, alias := range cont.info.Aliases { 1344 lines = append(lines, fmt.Sprintf("\t\t%s", alias)) 1345 } 1346 } 1347 } 1348 1349 debugInfo["Managed containers"] = lines 1350 return debugInfo 1351 } 1352 1353 func (m *manager) getFsInfoByDeviceName(deviceName string) (v2.FsInfo, error) { 1354 mountPoint, err := m.fsInfo.GetMountpointForDevice(deviceName) 1355 if err != nil { 1356 return v2.FsInfo{}, fmt.Errorf("failed to get mount point for device %q: %v", deviceName, err) 1357 } 1358 infos, err := m.GetFsInfo("") 1359 if err != nil { 1360 return v2.FsInfo{}, err 1361 } 1362 for _, info := range infos { 1363 if info.Mountpoint == mountPoint { 1364 return info, nil 1365 } 1366 } 1367 return v2.FsInfo{}, fmt.Errorf("cannot find filesystem info for device %q", deviceName) 1368 } 1369 1370 func (m *manager) containersInfo(containers map[string]*containerData, query *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error) { 1371 output := make(map[string]info.ContainerInfo, len(containers)) 1372 for name, cont := range containers { 1373 inf, err := m.containerDataToContainerInfo(cont, query) 1374 if err != nil { 1375 // Ignore the error because of race condition and return best-effort result. 1376 if err == memory.ErrDataNotFound { 1377 klog.V(4).Infof("Error getting data for container %s because of race condition", name) 1378 continue 1379 } 1380 return nil, err 1381 } 1382 output[name] = *inf 1383 } 1384 return output, nil 1385 } 1386 1387 func (m *manager) AllPodmanContainers(query *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error) { 1388 containers := m.getAllNamespacedContainers(PodmanNamespace) 1389 return m.containersInfo(containers, query) 1390 } 1391 1392 func getVersionInfo() (*info.VersionInfo, error) { 1393 1394 kernelVersion := machine.KernelVersion() 1395 osVersion := machine.ContainerOsVersion() 1396 1397 return &info.VersionInfo{ 1398 KernelVersion: kernelVersion, 1399 ContainerOsVersion: osVersion, 1400 CadvisorVersion: version.Info["version"], 1401 CadvisorRevision: version.Info["revision"], 1402 }, nil 1403 } 1404 1405 // Helper for accumulating partial failures. 1406 type partialFailure []string 1407 1408 func (f *partialFailure) append(id, operation string, err error) { 1409 *f = append(*f, fmt.Sprintf("[%q: %s: %s]", id, operation, err)) 1410 } 1411 1412 func (f partialFailure) Error() string { 1413 return fmt.Sprintf("partial failures: %s", strings.Join(f, ", ")) 1414 } 1415 1416 func (f partialFailure) OrNil() error { 1417 if len(f) == 0 { 1418 return nil 1419 } 1420 return f 1421 }