github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/pkg/cgroup/client.go (about)

     1  package cgroup
     2  
     3  import (
     4  	"bufio"
     5  	"errors"
     6  	"fmt"
     7  	"io/fs"
     8  	"os"
     9  	"path"
    10  	"path/filepath"
    11  	"regexp"
    12  	"strconv"
    13  	"strings"
    14  	"sync"
    15  	"syscall"
    16  
    17  	"github.com/castai/kvisor/pkg/logging"
    18  	"github.com/castai/kvisor/pkg/metrics"
    19  )
    20  
    21  var (
    22  	baseCgroupPath = ""
    23  
    24  	ErrContainerIDNotFoundInCgroupPath = errors.New("container id not found in cgroup path")
    25  	ErrCgroupNotFound                  = errors.New("cgroup not found")
    26  )
    27  
    28  type ID = uint64
    29  
    30  const (
    31  	procCgroups             = "/proc/cgroups"
    32  	cgroupControllersFile   = "/sys/fs/cgroup/cgroup.controllers"
    33  	cgroupDefaultController = "cpuset"
    34  )
    35  
    36  type Version uint8
    37  
    38  func (v Version) String() string {
    39  	if v == V1 {
    40  		return "V1"
    41  	}
    42  	if v == V2 {
    43  		return "V2"
    44  	}
    45  	return ""
    46  }
    47  
    48  const (
    49  	V1 = iota
    50  	V2
    51  )
    52  
    53  // Represents the internal ID of a container runtime
    54  type ContainerRuntimeID int
    55  
    56  const (
    57  	UnknownRuntime ContainerRuntimeID = iota
    58  	DockerRuntime
    59  	ContainerdRuntime
    60  	CrioRuntime
    61  	PodmanRuntime
    62  	GardenRuntime
    63  )
    64  
    65  var runtimeStringMap = map[ContainerRuntimeID]string{
    66  	UnknownRuntime:    "unknown",
    67  	DockerRuntime:     "docker",
    68  	ContainerdRuntime: "containerd",
    69  	CrioRuntime:       "crio",
    70  	PodmanRuntime:     "podman",
    71  	GardenRuntime:     "garden", // there is no enricher (yet ?) for garden
    72  }
    73  
    74  func (runtime ContainerRuntimeID) String() string {
    75  	return runtimeStringMap[runtime]
    76  }
    77  
    78  func FromString(str string) ContainerRuntimeID {
    79  	switch str {
    80  	case "docker":
    81  		return DockerRuntime
    82  	case "crio":
    83  		return CrioRuntime
    84  	case "cri-o":
    85  		return CrioRuntime
    86  	case "podman":
    87  		return PodmanRuntime
    88  	case "containerd":
    89  		return ContainerdRuntime
    90  	case "garden":
    91  		return GardenRuntime
    92  
    93  	default:
    94  		return UnknownRuntime
    95  	}
    96  }
    97  
    98  type Client struct {
    99  	version            Version
   100  	cgRoot             string
   101  	cgroupCacheByID    map[ID]func() *Cgroup
   102  	cgroupMu           sync.RWMutex
   103  	defaultHierarchyID uint32
   104  }
   105  
   106  func NewClient(log *logging.Logger, root string) (*Client, error) {
   107  	version, defaultHierarchyID, err := getDefaultVersionAndHierarchy(log)
   108  	if err != nil {
   109  		return nil, fmt.Errorf("getting default cgroups version: %w", err)
   110  	}
   111  	log.WithField("component", "cgroup").Infof("cgroups detected version=%s, root=%s", version, root)
   112  	return &Client{
   113  		version:            version,
   114  		cgRoot:             root,
   115  		cgroupCacheByID:    make(map[uint64]func() *Cgroup),
   116  		defaultHierarchyID: defaultHierarchyID,
   117  	}, nil
   118  }
   119  
   120  func (c *Client) lookupCgroupForIDInCache(id ID) (*Cgroup, bool) {
   121  	c.cgroupMu.RLock()
   122  	defer c.cgroupMu.RUnlock()
   123  
   124  	if cgroup, found := c.cgroupCacheByID[id]; found {
   125  		return cgroup(), true
   126  	}
   127  	return nil, false
   128  }
   129  
   130  func (c *Client) GetCgroupForID(cgroupID ID) (*Cgroup, error) {
   131  	if cg, found := c.lookupCgroupForIDInCache(cgroupID); found {
   132  		return cg, nil
   133  	}
   134  
   135  	metrics.AgentFindCgroupFS.Inc()
   136  
   137  	cgroupPath, _ := c.findCgroupPathForID(cgroupID)
   138  
   139  	if cgroupPath == "" {
   140  		return nil, ErrCgroupNotFound
   141  	}
   142  
   143  	cgroup := c.getCgroupForIDAndPath(cgroupID, cgroupPath)
   144  
   145  	c.cacheCgroup(cgroup)
   146  
   147  	return cgroup, nil
   148  }
   149  
   150  func (c *Client) getCgroupForIDAndPath(cgroupID ID, cgroupPath string) *Cgroup {
   151  	containerID, containerRuntime := getContainerIdFromCgroup(cgroupPath)
   152  
   153  	cg := &Cgroup{
   154  		Id:               cgroupID,
   155  		ContainerID:      containerID,
   156  		ContainerRuntime: containerRuntime,
   157  		Path:             cgroupPath,
   158  		cgRoot:           c.cgRoot,
   159  		subsystems:       map[string]string{},
   160  	}
   161  
   162  	switch c.version {
   163  	case V1:
   164  		after, _ := strings.CutPrefix(cgroupPath, cg.cgRoot)
   165  		subpath := strings.SplitN(after, "/", 1)
   166  		if len(subpath) != 2 {
   167  			return cg
   168  		}
   169  		last := subpath[1]
   170  		cg.Version = V1
   171  		cg.subsystems = map[string]string{
   172  			"cpu":     last,
   173  			"cpuacct": last,
   174  			"memory":  last,
   175  			"blkio":   last,
   176  		}
   177  	case V2:
   178  		after, _ := strings.CutPrefix(cgroupPath, cg.cgRoot)
   179  		cg.Version = V2
   180  		cg.subsystems = map[string]string{
   181  			"": after,
   182  		}
   183  	}
   184  	return cg
   185  }
   186  
   187  func (c *Client) getCgroupSearchBasePath() string {
   188  	rootDir := c.cgRoot
   189  
   190  	if c.version == V1 {
   191  		// TODO: we hardcode this for now, but in the future we might want to make this configurable
   192  		// (cpuset might not always be the first cgroup reported by the kernel)
   193  		rootDir = filepath.Join(rootDir, "cpuset")
   194  	}
   195  
   196  	return rootDir
   197  }
   198  
   199  func (c *Client) findCgroupPathForID(cgroupId ID) (string, ID) {
   200  	found := errors.New("found")
   201  	retPath := ""
   202  	var cgroupID ID
   203  
   204  	rootDir := c.getCgroupSearchBasePath()
   205  
   206  	_ = filepath.Walk(rootDir, func(path string, info fs.FileInfo, err error) error {
   207  		// nolint:nilerr
   208  		if err != nil || !info.IsDir() {
   209  			return nil
   210  		}
   211  
   212  		stat, ok := info.Sys().(*syscall.Stat_t)
   213  
   214  		if !ok {
   215  			return errors.New("unexpected stat")
   216  		}
   217  
   218  		if (stat.Ino & 0xFFFFFFFF) == (cgroupId & 0xFFFFFFFF) {
   219  			retPath = path
   220  			cgroupID = stat.Ino
   221  			return found
   222  		}
   223  
   224  		return nil
   225  	})
   226  
   227  	if retPath == rootDir {
   228  		return "", 0
   229  	}
   230  
   231  	return retPath, cgroupID
   232  }
   233  
   234  func (c *Client) DefaultCgroupVersion() Version {
   235  	return c.version
   236  }
   237  
   238  func (c *Client) IsDefaultHierarchy(hierarchyID uint32) bool {
   239  	// There is no such thing as a default hierarchy in cgroup v2, as this only applies to cgroup v1,
   240  	// where we need to ensure to always use the same type of cgroup for handling events.
   241  	if c.DefaultCgroupVersion() == V2 {
   242  		return true
   243  	}
   244  
   245  	return c.defaultHierarchyID == hierarchyID
   246  }
   247  
   248  func getDefaultVersionAndHierarchy(log *logging.Logger) (Version, uint32, error) {
   249  	// 1st Method: already mounted cgroupv1 filesystem
   250  
   251  	if ok, _ := isCgroupV2MountedAndDefault(); ok {
   252  		return V2, 0, nil
   253  	}
   254  
   255  	//
   256  	// 2nd Method: From cgroup man page:
   257  	// ...
   258  	// 2. The unique ID of the cgroup hierarchy on which this
   259  	//    controller is mounted. If multiple cgroups v1
   260  	//    controllers are bound to the same hierarchy, then each
   261  	//    will show the same hierarchy ID in this field.  The
   262  	//    value in this field will be 0 if:
   263  	//
   264  	//    a) the controller is not mounted on a cgroups v1
   265  	//       hierarchy;
   266  	//    b) the controller is bound to the cgroups v2 single
   267  	//       unified hierarchy; or
   268  	//    c) the controller is disabled (see below).
   269  	// ...
   270  
   271  	var value uint64
   272  	file, err := os.Open(procCgroups)
   273  	if err != nil {
   274  		return 0, 0, fmt.Errorf("opening %s: %w", procCgroups, err)
   275  	}
   276  	defer func() {
   277  		if err := file.Close(); err != nil {
   278  			log.Warnf("closing %s: %v", procCgroups, err)
   279  		}
   280  	}()
   281  
   282  	scanner := bufio.NewScanner(file)
   283  	for scanner.Scan() {
   284  		line := strings.Fields(scanner.Text())
   285  		if line[0] != cgroupDefaultController {
   286  			continue
   287  		}
   288  		value, err = strconv.ParseUint(line[1], 10, 32)
   289  		if err != nil {
   290  			return 0, 0, fmt.Errorf("parsing %s: %w", procCgroups, err)
   291  		}
   292  	}
   293  
   294  	if value == 0 { // == (a), (b) or (c)
   295  		return V2, 0, nil
   296  	}
   297  
   298  	return V1, uint32(value), nil
   299  }
   300  
   301  func isCgroupV2MountedAndDefault() (bool, error) {
   302  	_, err := os.Stat(cgroupControllersFile)
   303  	if os.IsNotExist(err) {
   304  		return false, nil
   305  	}
   306  	if err != nil {
   307  		return false, fmt.Errorf("opening %s: %w", cgroupControllersFile, err)
   308  	}
   309  
   310  	return true, nil
   311  }
   312  
   313  func NewFromProcessCgroupFile(filePath string) (*Cgroup, error) {
   314  	data, err := os.ReadFile(filePath)
   315  	if err != nil {
   316  		return nil, err
   317  	}
   318  	cg := &Cgroup{
   319  		subsystems: map[string]string{},
   320  		cgRoot:     baseCgroupPath,
   321  	}
   322  	for _, line := range strings.Split(string(data), "\n") {
   323  		parts := strings.SplitN(line, ":", 3)
   324  		if len(parts) < 3 {
   325  			continue
   326  		}
   327  		for _, cgType := range strings.Split(parts[1], ",") {
   328  			cg.subsystems[cgType] = path.Join(baseCgroupPath, parts[2])
   329  		}
   330  	}
   331  
   332  	if p := cg.subsystems["cpu"]; p != "" {
   333  		cg.Path = p
   334  		cg.Version = V1
   335  	} else {
   336  		cg.Path = cg.subsystems[""]
   337  		cg.Version = V2
   338  	}
   339  
   340  	if containerID, runtimeType := getContainerIdFromCgroup(cg.Path); containerID == "" {
   341  		return nil, ErrContainerIDNotFoundInCgroupPath
   342  	} else {
   343  		cg.ContainerID = containerID
   344  		cg.ContainerRuntime = runtimeType
   345  	}
   346  
   347  	if cg.Id, err = getCgroupIDForPath(cg.Path); err != nil {
   348  		return nil, err
   349  	}
   350  
   351  	return cg, nil
   352  }
   353  
   354  var (
   355  	containerIdFromCgroupRegex       = regexp.MustCompile(`^[A-Fa-f0-9]{64}$`)
   356  	gardenContainerIdFromCgroupRegex = regexp.MustCompile(`^[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){4}$`)
   357  )
   358  
   359  // getContainerIdFromCgroup extracts container id and its runtime from path. It returns
   360  // the container id and the used runtime.
   361  func getContainerIdFromCgroup(cgroupPath string) (string, ContainerRuntimeID) {
   362  	cgroupParts := strings.Split(cgroupPath, "/")
   363  
   364  	// search from the end to get the most inner container id
   365  	for i := len(cgroupParts) - 1; i >= 0; i = i - 1 {
   366  		pc := cgroupParts[i]
   367  		if len(pc) < 28 {
   368  			continue // container id is at least 28 characters long
   369  		}
   370  
   371  		runtime := UnknownRuntime
   372  		id := strings.TrimSuffix(pc, ".scope")
   373  
   374  		switch {
   375  		case strings.HasPrefix(id, "docker-"):
   376  			runtime = DockerRuntime
   377  			id = strings.TrimPrefix(id, "docker-")
   378  		case strings.HasPrefix(id, "crio-"):
   379  			runtime = CrioRuntime
   380  			id = strings.TrimPrefix(id, "crio-")
   381  		case strings.HasPrefix(id, "cri-containerd-"):
   382  			runtime = ContainerdRuntime
   383  			id = strings.TrimPrefix(id, "cri-containerd-")
   384  		case strings.Contains(pc, ":cri-containerd:"):
   385  			runtime = ContainerdRuntime
   386  			id = pc[strings.LastIndex(pc, ":cri-containerd:")+len(":cri-containerd:"):]
   387  		case strings.HasPrefix(id, "libpod-"):
   388  			runtime = PodmanRuntime
   389  			id = strings.TrimPrefix(id, "libpod-")
   390  		}
   391  
   392  		if matched := containerIdFromCgroupRegex.MatchString(id); matched {
   393  			if runtime == UnknownRuntime && i > 0 && cgroupParts[i-1] == "docker" {
   394  				// non-systemd docker with format: .../docker/01adbf...f26db7f/
   395  				runtime = DockerRuntime
   396  			}
   397  			if runtime == UnknownRuntime && i > 0 && cgroupParts[i-1] == "actions_job" {
   398  				// non-systemd docker with format in GitHub Actions: .../actions_job/01adbf...f26db7f/
   399  				runtime = DockerRuntime
   400  			}
   401  			if runtime == UnknownRuntime && i > 0 {
   402  				for l := i; l > 0; l-- {
   403  					if cgroupParts[l] == "kubepods" {
   404  						runtime = DockerRuntime
   405  						break
   406  					}
   407  				}
   408  			}
   409  
   410  			// Return the first match, closest to the root dir path component, so that the
   411  			// container id of the outer container is returned. The container root is
   412  			// determined by being matched on the last path part.
   413  			return id, runtime
   414  		}
   415  
   416  		if matched := gardenContainerIdFromCgroupRegex.MatchString(id); matched {
   417  			runtime = GardenRuntime
   418  			return id, runtime
   419  		}
   420  	}
   421  
   422  	// cgroup dirs unrelated to containers provides empty (containerId, runtime)
   423  	return "", UnknownRuntime
   424  }
   425  
   426  func getCgroupIDForPath(path string) (ID, error) {
   427  	// Lower 32 bits of the cgroup id == inode number of matching cgroupfs entry
   428  	var stat syscall.Stat_t
   429  	if err := syscall.Stat(path, &stat); err != nil {
   430  		return 0, err
   431  	}
   432  	return stat.Ino, nil
   433  }
   434  
   435  func (c *Client) LoadCgroup(id ID, path string) {
   436  	c.cgroupMu.Lock()
   437  	defer c.cgroupMu.Unlock()
   438  
   439  	if _, found := c.cgroupCacheByID[id]; found {
   440  		return
   441  	}
   442  
   443  	c.cgroupCacheByID[id] = sync.OnceValue(func() *Cgroup {
   444  		cgroup := c.getCgroupForIDAndPath(id, path)
   445  		return cgroup
   446  	})
   447  }
   448  
   449  func (c *Client) cacheCgroup(cgroup *Cgroup) {
   450  	c.cgroupMu.Lock()
   451  	c.cgroupCacheByID[cgroup.Id] = func() *Cgroup { return cgroup }
   452  	c.cgroupMu.Unlock()
   453  }
   454  
   455  func (c *Client) CleanupCgroup(id ID) {
   456  	c.cgroupMu.Lock()
   457  	defer c.cgroupMu.Unlock()
   458  
   459  	_, found := c.cgroupCacheByID[id]
   460  	if !found {
   461  		return
   462  	}
   463  
   464  	delete(c.cgroupCacheByID, id)
   465  }