github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/pkg/cgroup/client.go (about) 1 package cgroup 2 3 import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io/fs" 8 "os" 9 "path" 10 "path/filepath" 11 "regexp" 12 "strconv" 13 "strings" 14 "sync" 15 "syscall" 16 17 "github.com/castai/kvisor/pkg/logging" 18 "github.com/castai/kvisor/pkg/metrics" 19 ) 20 21 var ( 22 baseCgroupPath = "" 23 24 ErrContainerIDNotFoundInCgroupPath = errors.New("container id not found in cgroup path") 25 ErrCgroupNotFound = errors.New("cgroup not found") 26 ) 27 28 type ID = uint64 29 30 const ( 31 procCgroups = "/proc/cgroups" 32 cgroupControllersFile = "/sys/fs/cgroup/cgroup.controllers" 33 cgroupDefaultController = "cpuset" 34 ) 35 36 type Version uint8 37 38 func (v Version) String() string { 39 if v == V1 { 40 return "V1" 41 } 42 if v == V2 { 43 return "V2" 44 } 45 return "" 46 } 47 48 const ( 49 V1 = iota 50 V2 51 ) 52 53 // Represents the internal ID of a container runtime 54 type ContainerRuntimeID int 55 56 const ( 57 UnknownRuntime ContainerRuntimeID = iota 58 DockerRuntime 59 ContainerdRuntime 60 CrioRuntime 61 PodmanRuntime 62 GardenRuntime 63 ) 64 65 var runtimeStringMap = map[ContainerRuntimeID]string{ 66 UnknownRuntime: "unknown", 67 DockerRuntime: "docker", 68 ContainerdRuntime: "containerd", 69 CrioRuntime: "crio", 70 PodmanRuntime: "podman", 71 GardenRuntime: "garden", // there is no enricher (yet ?) for garden 72 } 73 74 func (runtime ContainerRuntimeID) String() string { 75 return runtimeStringMap[runtime] 76 } 77 78 func FromString(str string) ContainerRuntimeID { 79 switch str { 80 case "docker": 81 return DockerRuntime 82 case "crio": 83 return CrioRuntime 84 case "cri-o": 85 return CrioRuntime 86 case "podman": 87 return PodmanRuntime 88 case "containerd": 89 return ContainerdRuntime 90 case "garden": 91 return GardenRuntime 92 93 default: 94 return UnknownRuntime 95 } 96 } 97 98 type Client struct { 99 version Version 100 cgRoot string 101 cgroupCacheByID map[ID]func() *Cgroup 102 cgroupMu sync.RWMutex 103 defaultHierarchyID uint32 104 } 105 106 func NewClient(log *logging.Logger, root string) (*Client, error) { 107 version, defaultHierarchyID, err := getDefaultVersionAndHierarchy(log) 108 if err != nil { 109 return nil, fmt.Errorf("getting default cgroups version: %w", err) 110 } 111 log.WithField("component", "cgroup").Infof("cgroups detected version=%s, root=%s", version, root) 112 return &Client{ 113 version: version, 114 cgRoot: root, 115 cgroupCacheByID: make(map[uint64]func() *Cgroup), 116 defaultHierarchyID: defaultHierarchyID, 117 }, nil 118 } 119 120 func (c *Client) lookupCgroupForIDInCache(id ID) (*Cgroup, bool) { 121 c.cgroupMu.RLock() 122 defer c.cgroupMu.RUnlock() 123 124 if cgroup, found := c.cgroupCacheByID[id]; found { 125 return cgroup(), true 126 } 127 return nil, false 128 } 129 130 func (c *Client) GetCgroupForID(cgroupID ID) (*Cgroup, error) { 131 if cg, found := c.lookupCgroupForIDInCache(cgroupID); found { 132 return cg, nil 133 } 134 135 metrics.AgentFindCgroupFS.Inc() 136 137 cgroupPath, _ := c.findCgroupPathForID(cgroupID) 138 139 if cgroupPath == "" { 140 return nil, ErrCgroupNotFound 141 } 142 143 cgroup := c.getCgroupForIDAndPath(cgroupID, cgroupPath) 144 145 c.cacheCgroup(cgroup) 146 147 return cgroup, nil 148 } 149 150 func (c *Client) getCgroupForIDAndPath(cgroupID ID, cgroupPath string) *Cgroup { 151 containerID, containerRuntime := getContainerIdFromCgroup(cgroupPath) 152 153 cg := &Cgroup{ 154 Id: cgroupID, 155 ContainerID: containerID, 156 ContainerRuntime: containerRuntime, 157 Path: cgroupPath, 158 cgRoot: c.cgRoot, 159 subsystems: map[string]string{}, 160 } 161 162 switch c.version { 163 case V1: 164 after, _ := strings.CutPrefix(cgroupPath, cg.cgRoot) 165 subpath := strings.SplitN(after, "/", 1) 166 if len(subpath) != 2 { 167 return cg 168 } 169 last := subpath[1] 170 cg.Version = V1 171 cg.subsystems = map[string]string{ 172 "cpu": last, 173 "cpuacct": last, 174 "memory": last, 175 "blkio": last, 176 } 177 case V2: 178 after, _ := strings.CutPrefix(cgroupPath, cg.cgRoot) 179 cg.Version = V2 180 cg.subsystems = map[string]string{ 181 "": after, 182 } 183 } 184 return cg 185 } 186 187 func (c *Client) getCgroupSearchBasePath() string { 188 rootDir := c.cgRoot 189 190 if c.version == V1 { 191 // TODO: we hardcode this for now, but in the future we might want to make this configurable 192 // (cpuset might not always be the first cgroup reported by the kernel) 193 rootDir = filepath.Join(rootDir, "cpuset") 194 } 195 196 return rootDir 197 } 198 199 func (c *Client) findCgroupPathForID(cgroupId ID) (string, ID) { 200 found := errors.New("found") 201 retPath := "" 202 var cgroupID ID 203 204 rootDir := c.getCgroupSearchBasePath() 205 206 _ = filepath.Walk(rootDir, func(path string, info fs.FileInfo, err error) error { 207 // nolint:nilerr 208 if err != nil || !info.IsDir() { 209 return nil 210 } 211 212 stat, ok := info.Sys().(*syscall.Stat_t) 213 214 if !ok { 215 return errors.New("unexpected stat") 216 } 217 218 if (stat.Ino & 0xFFFFFFFF) == (cgroupId & 0xFFFFFFFF) { 219 retPath = path 220 cgroupID = stat.Ino 221 return found 222 } 223 224 return nil 225 }) 226 227 if retPath == rootDir { 228 return "", 0 229 } 230 231 return retPath, cgroupID 232 } 233 234 func (c *Client) DefaultCgroupVersion() Version { 235 return c.version 236 } 237 238 func (c *Client) IsDefaultHierarchy(hierarchyID uint32) bool { 239 // There is no such thing as a default hierarchy in cgroup v2, as this only applies to cgroup v1, 240 // where we need to ensure to always use the same type of cgroup for handling events. 241 if c.DefaultCgroupVersion() == V2 { 242 return true 243 } 244 245 return c.defaultHierarchyID == hierarchyID 246 } 247 248 func getDefaultVersionAndHierarchy(log *logging.Logger) (Version, uint32, error) { 249 // 1st Method: already mounted cgroupv1 filesystem 250 251 if ok, _ := isCgroupV2MountedAndDefault(); ok { 252 return V2, 0, nil 253 } 254 255 // 256 // 2nd Method: From cgroup man page: 257 // ... 258 // 2. The unique ID of the cgroup hierarchy on which this 259 // controller is mounted. If multiple cgroups v1 260 // controllers are bound to the same hierarchy, then each 261 // will show the same hierarchy ID in this field. The 262 // value in this field will be 0 if: 263 // 264 // a) the controller is not mounted on a cgroups v1 265 // hierarchy; 266 // b) the controller is bound to the cgroups v2 single 267 // unified hierarchy; or 268 // c) the controller is disabled (see below). 269 // ... 270 271 var value uint64 272 file, err := os.Open(procCgroups) 273 if err != nil { 274 return 0, 0, fmt.Errorf("opening %s: %w", procCgroups, err) 275 } 276 defer func() { 277 if err := file.Close(); err != nil { 278 log.Warnf("closing %s: %v", procCgroups, err) 279 } 280 }() 281 282 scanner := bufio.NewScanner(file) 283 for scanner.Scan() { 284 line := strings.Fields(scanner.Text()) 285 if line[0] != cgroupDefaultController { 286 continue 287 } 288 value, err = strconv.ParseUint(line[1], 10, 32) 289 if err != nil { 290 return 0, 0, fmt.Errorf("parsing %s: %w", procCgroups, err) 291 } 292 } 293 294 if value == 0 { // == (a), (b) or (c) 295 return V2, 0, nil 296 } 297 298 return V1, uint32(value), nil 299 } 300 301 func isCgroupV2MountedAndDefault() (bool, error) { 302 _, err := os.Stat(cgroupControllersFile) 303 if os.IsNotExist(err) { 304 return false, nil 305 } 306 if err != nil { 307 return false, fmt.Errorf("opening %s: %w", cgroupControllersFile, err) 308 } 309 310 return true, nil 311 } 312 313 func NewFromProcessCgroupFile(filePath string) (*Cgroup, error) { 314 data, err := os.ReadFile(filePath) 315 if err != nil { 316 return nil, err 317 } 318 cg := &Cgroup{ 319 subsystems: map[string]string{}, 320 cgRoot: baseCgroupPath, 321 } 322 for _, line := range strings.Split(string(data), "\n") { 323 parts := strings.SplitN(line, ":", 3) 324 if len(parts) < 3 { 325 continue 326 } 327 for _, cgType := range strings.Split(parts[1], ",") { 328 cg.subsystems[cgType] = path.Join(baseCgroupPath, parts[2]) 329 } 330 } 331 332 if p := cg.subsystems["cpu"]; p != "" { 333 cg.Path = p 334 cg.Version = V1 335 } else { 336 cg.Path = cg.subsystems[""] 337 cg.Version = V2 338 } 339 340 if containerID, runtimeType := getContainerIdFromCgroup(cg.Path); containerID == "" { 341 return nil, ErrContainerIDNotFoundInCgroupPath 342 } else { 343 cg.ContainerID = containerID 344 cg.ContainerRuntime = runtimeType 345 } 346 347 if cg.Id, err = getCgroupIDForPath(cg.Path); err != nil { 348 return nil, err 349 } 350 351 return cg, nil 352 } 353 354 var ( 355 containerIdFromCgroupRegex = regexp.MustCompile(`^[A-Fa-f0-9]{64}$`) 356 gardenContainerIdFromCgroupRegex = regexp.MustCompile(`^[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){4}$`) 357 ) 358 359 // getContainerIdFromCgroup extracts container id and its runtime from path. It returns 360 // the container id and the used runtime. 361 func getContainerIdFromCgroup(cgroupPath string) (string, ContainerRuntimeID) { 362 cgroupParts := strings.Split(cgroupPath, "/") 363 364 // search from the end to get the most inner container id 365 for i := len(cgroupParts) - 1; i >= 0; i = i - 1 { 366 pc := cgroupParts[i] 367 if len(pc) < 28 { 368 continue // container id is at least 28 characters long 369 } 370 371 runtime := UnknownRuntime 372 id := strings.TrimSuffix(pc, ".scope") 373 374 switch { 375 case strings.HasPrefix(id, "docker-"): 376 runtime = DockerRuntime 377 id = strings.TrimPrefix(id, "docker-") 378 case strings.HasPrefix(id, "crio-"): 379 runtime = CrioRuntime 380 id = strings.TrimPrefix(id, "crio-") 381 case strings.HasPrefix(id, "cri-containerd-"): 382 runtime = ContainerdRuntime 383 id = strings.TrimPrefix(id, "cri-containerd-") 384 case strings.Contains(pc, ":cri-containerd:"): 385 runtime = ContainerdRuntime 386 id = pc[strings.LastIndex(pc, ":cri-containerd:")+len(":cri-containerd:"):] 387 case strings.HasPrefix(id, "libpod-"): 388 runtime = PodmanRuntime 389 id = strings.TrimPrefix(id, "libpod-") 390 } 391 392 if matched := containerIdFromCgroupRegex.MatchString(id); matched { 393 if runtime == UnknownRuntime && i > 0 && cgroupParts[i-1] == "docker" { 394 // non-systemd docker with format: .../docker/01adbf...f26db7f/ 395 runtime = DockerRuntime 396 } 397 if runtime == UnknownRuntime && i > 0 && cgroupParts[i-1] == "actions_job" { 398 // non-systemd docker with format in GitHub Actions: .../actions_job/01adbf...f26db7f/ 399 runtime = DockerRuntime 400 } 401 if runtime == UnknownRuntime && i > 0 { 402 for l := i; l > 0; l-- { 403 if cgroupParts[l] == "kubepods" { 404 runtime = DockerRuntime 405 break 406 } 407 } 408 } 409 410 // Return the first match, closest to the root dir path component, so that the 411 // container id of the outer container is returned. The container root is 412 // determined by being matched on the last path part. 413 return id, runtime 414 } 415 416 if matched := gardenContainerIdFromCgroupRegex.MatchString(id); matched { 417 runtime = GardenRuntime 418 return id, runtime 419 } 420 } 421 422 // cgroup dirs unrelated to containers provides empty (containerId, runtime) 423 return "", UnknownRuntime 424 } 425 426 func getCgroupIDForPath(path string) (ID, error) { 427 // Lower 32 bits of the cgroup id == inode number of matching cgroupfs entry 428 var stat syscall.Stat_t 429 if err := syscall.Stat(path, &stat); err != nil { 430 return 0, err 431 } 432 return stat.Ino, nil 433 } 434 435 func (c *Client) LoadCgroup(id ID, path string) { 436 c.cgroupMu.Lock() 437 defer c.cgroupMu.Unlock() 438 439 if _, found := c.cgroupCacheByID[id]; found { 440 return 441 } 442 443 c.cgroupCacheByID[id] = sync.OnceValue(func() *Cgroup { 444 cgroup := c.getCgroupForIDAndPath(id, path) 445 return cgroup 446 }) 447 } 448 449 func (c *Client) cacheCgroup(cgroup *Cgroup) { 450 c.cgroupMu.Lock() 451 c.cgroupCacheByID[cgroup.Id] = func() *Cgroup { return cgroup } 452 c.cgroupMu.Unlock() 453 } 454 455 func (c *Client) CleanupCgroup(id ID) { 456 c.cgroupMu.Lock() 457 defer c.cgroupMu.Unlock() 458 459 _, found := c.cgroupCacheByID[id] 460 if !found { 461 return 462 } 463 464 delete(c.cgroupCacheByID, id) 465 }