github.com/google/cadvisor@v0.49.1/container/crio/handler.go (about)

     1  // Copyright 2017 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Handler for CRI-O containers.
    16  package crio
    17  
    18  import (
    19  	"fmt"
    20  	"path/filepath"
    21  	"strconv"
    22  	"strings"
    23  
    24  	"github.com/opencontainers/runc/libcontainer/cgroups"
    25  
    26  	"github.com/google/cadvisor/container"
    27  	"github.com/google/cadvisor/container/common"
    28  	containerlibcontainer "github.com/google/cadvisor/container/libcontainer"
    29  	"github.com/google/cadvisor/fs"
    30  	info "github.com/google/cadvisor/info/v1"
    31  )
    32  
    33  type crioContainerHandler struct {
    34  	client CrioClient
    35  	name   string
    36  
    37  	machineInfoFactory info.MachineInfoFactory
    38  
    39  	// Absolute path to the cgroup hierarchies of this container.
    40  	// (e.g.: "cpu" -> "/sys/fs/cgroup/cpu/test")
    41  	cgroupPaths map[string]string
    42  
    43  	// the CRI-O storage driver
    44  	storageDriver    storageDriver
    45  	fsInfo           fs.FsInfo
    46  	rootfsStorageDir string
    47  
    48  	// Metadata associated with the container.
    49  	envs   map[string]string
    50  	labels map[string]string
    51  
    52  	// TODO
    53  	// crio version handling...
    54  
    55  	// Image name used for this container.
    56  	image string
    57  
    58  	// The network mode of the container
    59  	// TODO
    60  
    61  	// Filesystem handler.
    62  	fsHandler common.FsHandler
    63  
    64  	// The IP address of the container
    65  	ipAddress string
    66  
    67  	includedMetrics container.MetricSet
    68  
    69  	reference info.ContainerReference
    70  
    71  	libcontainerHandler *containerlibcontainer.Handler
    72  	cgroupManager       cgroups.Manager
    73  	rootFs              string
    74  	pidKnown            bool
    75  }
    76  
    77  var _ container.ContainerHandler = &crioContainerHandler{}
    78  
    79  // newCrioContainerHandler returns a new container.ContainerHandler
    80  func newCrioContainerHandler(
    81  	client CrioClient,
    82  	name string,
    83  	machineInfoFactory info.MachineInfoFactory,
    84  	fsInfo fs.FsInfo,
    85  	storageDriver storageDriver,
    86  	storageDir string,
    87  	cgroupSubsystems map[string]string,
    88  	inHostNamespace bool,
    89  	metadataEnvAllowList []string,
    90  	includedMetrics container.MetricSet,
    91  ) (container.ContainerHandler, error) {
    92  	// Create the cgroup paths.
    93  	cgroupPaths := common.MakeCgroupPaths(cgroupSubsystems, name)
    94  
    95  	// Generate the equivalent cgroup manager for this container.
    96  	cgroupManager, err := containerlibcontainer.NewCgroupManager(name, cgroupPaths)
    97  	if err != nil {
    98  		return nil, err
    99  	}
   100  
   101  	rootFs := "/"
   102  	if !inHostNamespace {
   103  		rootFs = "/rootfs"
   104  	}
   105  
   106  	id := ContainerNameToCrioId(name)
   107  	pidKnown := true
   108  
   109  	cInfo, err := client.ContainerInfo(id)
   110  	if err != nil {
   111  		return nil, err
   112  	}
   113  	if cInfo.Pid == 0 {
   114  		// If pid is not known yet, network related stats can not be retrieved by the
   115  		// libcontainer handler GetStats().  In this case, the crio handler GetStats()
   116  		// will reattempt to get the pid and, if now known, will construct the libcontainer
   117  		// handler.  This libcontainer handler is then cached and reused without additional
   118  		// calls to crio.
   119  		pidKnown = false
   120  	}
   121  
   122  	// passed to fs handler below ...
   123  	// XXX: this is using the full container logpath, as constructed by the CRI
   124  	// /var/log/pods/<pod_uuid>/container_instance.log
   125  	// It's not actually a log dir, as the CRI doesn't have per-container dirs
   126  	// under /var/log/pods/<pod_uuid>/
   127  	// We can't use /var/log/pods/<pod_uuid>/ to count per-container log usage.
   128  	// We use the container log file directly.
   129  	storageLogDir := cInfo.LogPath
   130  
   131  	// Determine the rootfs storage dir
   132  	rootfsStorageDir := cInfo.Root
   133  	// TODO(runcom): CRI-O doesn't strip /merged but we need to in order to
   134  	// get device ID from root, otherwise, it's going to error out as overlay
   135  	// mounts doesn't have fixed dev ids.
   136  	rootfsStorageDir = strings.TrimSuffix(rootfsStorageDir, "/merged")
   137  	switch storageDriver {
   138  	case overlayStorageDriver, overlay2StorageDriver:
   139  		// overlay and overlay2 driver are the same "overlay2" driver so treat
   140  		// them the same.
   141  		rootfsStorageDir = filepath.Join(rootfsStorageDir, "diff")
   142  	}
   143  
   144  	containerReference := info.ContainerReference{
   145  		Id:        id,
   146  		Name:      name,
   147  		Aliases:   []string{cInfo.Name, id},
   148  		Namespace: CrioNamespace,
   149  	}
   150  
   151  	// Find out if we need network metrics reported for this container.
   152  	// Containers that don't have their own network -- this includes
   153  	// containers running in Kubernetes pods that use the network of the
   154  	// infrastructure container -- does not need their stats to be
   155  	// reported. This stops metrics being reported multiple times for each
   156  	// container in a pod.
   157  	metrics := common.RemoveNetMetrics(includedMetrics, cInfo.Labels["io.kubernetes.container.name"] != "POD")
   158  
   159  	libcontainerHandler := containerlibcontainer.NewHandler(cgroupManager, rootFs, cInfo.Pid, metrics)
   160  
   161  	// TODO: extract object mother method
   162  	handler := &crioContainerHandler{
   163  		client:              client,
   164  		name:                name,
   165  		machineInfoFactory:  machineInfoFactory,
   166  		cgroupPaths:         cgroupPaths,
   167  		storageDriver:       storageDriver,
   168  		fsInfo:              fsInfo,
   169  		rootfsStorageDir:    rootfsStorageDir,
   170  		envs:                make(map[string]string),
   171  		labels:              cInfo.Labels,
   172  		includedMetrics:     metrics,
   173  		reference:           containerReference,
   174  		libcontainerHandler: libcontainerHandler,
   175  		cgroupManager:       cgroupManager,
   176  		rootFs:              rootFs,
   177  		pidKnown:            pidKnown,
   178  	}
   179  
   180  	handler.image = cInfo.Image
   181  	// TODO: we wantd to know graph driver DeviceId (dont think this is needed now)
   182  
   183  	// ignore err and get zero as default, this happens with sandboxes, not sure why...
   184  	// kube isn't sending restart count in labels for sandboxes.
   185  	restartCount, _ := strconv.Atoi(cInfo.Annotations["io.kubernetes.container.restartCount"])
   186  	// Only adds restartcount label if it's greater than 0
   187  	if restartCount > 0 {
   188  		handler.labels["restartcount"] = strconv.Itoa(restartCount)
   189  	}
   190  
   191  	handler.ipAddress = cInfo.IP
   192  
   193  	// we optionally collect disk usage metrics
   194  	if includedMetrics.Has(container.DiskUsageMetrics) {
   195  		handler.fsHandler = common.NewFsHandler(common.DefaultPeriod, rootfsStorageDir, storageLogDir, fsInfo)
   196  	}
   197  	// TODO for env vars we wanted to show from container.Config.Env from whitelist
   198  	//for _, exposedEnv := range metadataEnvAllowList {
   199  	//klog.V(4).Infof("TODO env whitelist: %v", exposedEnv)
   200  	//}
   201  
   202  	return handler, nil
   203  }
   204  
   205  func (h *crioContainerHandler) Start() {
   206  	if h.fsHandler != nil {
   207  		h.fsHandler.Start()
   208  	}
   209  }
   210  
   211  func (h *crioContainerHandler) Cleanup() {
   212  	if h.fsHandler != nil {
   213  		h.fsHandler.Stop()
   214  	}
   215  }
   216  
   217  func (h *crioContainerHandler) ContainerReference() (info.ContainerReference, error) {
   218  	return h.reference, nil
   219  }
   220  
   221  func (h *crioContainerHandler) GetSpec() (info.ContainerSpec, error) {
   222  	hasFilesystem := h.includedMetrics.Has(container.DiskUsageMetrics)
   223  	hasNet := h.includedMetrics.Has(container.NetworkUsageMetrics)
   224  	spec, err := common.GetSpec(h.cgroupPaths, h.machineInfoFactory, hasNet, hasFilesystem)
   225  
   226  	spec.Labels = h.labels
   227  	spec.Envs = h.envs
   228  	spec.Image = h.image
   229  
   230  	return spec, err
   231  }
   232  
   233  func (h *crioContainerHandler) getFsStats(stats *info.ContainerStats) error {
   234  	mi, err := h.machineInfoFactory.GetMachineInfo()
   235  	if err != nil {
   236  		return err
   237  	}
   238  
   239  	if h.includedMetrics.Has(container.DiskIOMetrics) {
   240  		common.AssignDeviceNamesToDiskStats((*common.MachineInfoNamer)(mi), &stats.DiskIo)
   241  	}
   242  
   243  	if !h.includedMetrics.Has(container.DiskUsageMetrics) {
   244  		return nil
   245  	}
   246  	var device string
   247  	switch h.storageDriver {
   248  	case overlay2StorageDriver, overlayStorageDriver:
   249  		deviceInfo, err := h.fsInfo.GetDirFsDevice(h.rootfsStorageDir)
   250  		if err != nil {
   251  			return fmt.Errorf("unable to determine device info for dir: %v: %v", h.rootfsStorageDir, err)
   252  		}
   253  		device = deviceInfo.Device
   254  	default:
   255  		return nil
   256  	}
   257  
   258  	var (
   259  		limit  uint64
   260  		fsType string
   261  	)
   262  
   263  	// crio does not impose any filesystem limits for containers. So use capacity as limit.
   264  	for _, fs := range mi.Filesystems {
   265  		if fs.Device == device {
   266  			limit = fs.Capacity
   267  			fsType = fs.Type
   268  			break
   269  		}
   270  	}
   271  
   272  	if fsType == "" {
   273  		return fmt.Errorf("unable to determine fs type for device: %v", device)
   274  	}
   275  	fsStat := info.FsStats{Device: device, Type: fsType, Limit: limit}
   276  	usage := h.fsHandler.Usage()
   277  	fsStat.BaseUsage = usage.BaseUsageBytes
   278  	fsStat.Usage = usage.TotalUsageBytes
   279  	fsStat.Inodes = usage.InodeUsage
   280  
   281  	stats.Filesystem = append(stats.Filesystem, fsStat)
   282  
   283  	return nil
   284  }
   285  
   286  func (h *crioContainerHandler) getLibcontainerHandler() *containerlibcontainer.Handler {
   287  	if h.pidKnown {
   288  		return h.libcontainerHandler
   289  	}
   290  
   291  	id := ContainerNameToCrioId(h.name)
   292  
   293  	cInfo, err := h.client.ContainerInfo(id)
   294  	if err != nil || cInfo.Pid == 0 {
   295  		return h.libcontainerHandler
   296  	}
   297  
   298  	h.pidKnown = true
   299  	h.libcontainerHandler = containerlibcontainer.NewHandler(h.cgroupManager, h.rootFs, cInfo.Pid, h.includedMetrics)
   300  
   301  	return h.libcontainerHandler
   302  }
   303  
   304  func (h *crioContainerHandler) GetStats() (*info.ContainerStats, error) {
   305  	libcontainerHandler := h.getLibcontainerHandler()
   306  	stats, err := libcontainerHandler.GetStats()
   307  	if err != nil {
   308  		return stats, err
   309  	}
   310  
   311  	if h.includedMetrics.Has(container.NetworkUsageMetrics) && len(stats.Network.Interfaces) == 0 {
   312  		// No network related information indicates that the pid of the
   313  		// container is not longer valid and we need to ask crio to
   314  		// provide the pid of another container from that pod
   315  		h.pidKnown = false
   316  		return stats, nil
   317  	}
   318  	// Get filesystem stats.
   319  	err = h.getFsStats(stats)
   320  	if err != nil {
   321  		return stats, err
   322  	}
   323  
   324  	return stats, nil
   325  }
   326  
   327  func (h *crioContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
   328  	// No-op for Docker driver.
   329  	return []info.ContainerReference{}, nil
   330  }
   331  
   332  func (h *crioContainerHandler) GetCgroupPath(resource string) (string, error) {
   333  	var res string
   334  	if !cgroups.IsCgroup2UnifiedMode() {
   335  		res = resource
   336  	}
   337  	path, ok := h.cgroupPaths[res]
   338  	if !ok {
   339  		return "", fmt.Errorf("could not find path for resource %q for container %q", resource, h.reference.Name)
   340  	}
   341  	return path, nil
   342  }
   343  
   344  func (h *crioContainerHandler) GetContainerLabels() map[string]string {
   345  	return h.labels
   346  }
   347  
   348  func (h *crioContainerHandler) GetContainerIPAddress() string {
   349  	return h.ipAddress
   350  }
   351  
   352  func (h *crioContainerHandler) ListProcesses(listType container.ListType) ([]int, error) {
   353  	return h.libcontainerHandler.GetProcesses()
   354  }
   355  
   356  func (h *crioContainerHandler) Exists() bool {
   357  	return common.CgroupExists(h.cgroupPaths)
   358  }
   359  
   360  func (h *crioContainerHandler) Type() container.ContainerType {
   361  	return container.ContainerTypeCrio
   362  }