github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/container-hook/tracer.go (about) 1 // Copyright 2023 The Inspektor Gadget authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package containerhook detects when a container is created or terminated. 16 // 17 // It uses two mechanisms to detect new containers: 18 // 1. fanotify with FAN_OPEN_EXEC_PERM. 19 // 2. ebpf on the sys_enter_execve tracepoint to get the execve arguments. 20 // 21 // Using fanotify with FAN_OPEN_EXEC_PERM allows to call a callback function 22 // while the container is being created. The container is paused until the 23 // callback function returns. 24 // 25 // Using ebpf on the sys_enter_execve tracepoint allows to get the execve 26 // arguments without the need to read /proc/$pid/cmdline or /proc/$pid/comm. 27 // Reading /proc/$pid/cmdline is not possible using only fanotify when the 28 // tracer is not in the same pidns as the process being traced. This is the 29 // case when Inspektor Gadget is started with hostPID=false. 30 // 31 // https://github.com/inspektor-gadget/inspektor-gadget/blob/main/docs/devel/fanotify-ebpf.png 32 package containerhook 33 34 import ( 35 "encoding/json" 36 "errors" 37 "fmt" 38 "io" 39 "math" 40 "os" 41 "path/filepath" 42 "strconv" 43 "strings" 44 "sync" 45 "sync/atomic" 46 "time" 47 48 "github.com/cilium/ebpf" 49 "github.com/cilium/ebpf/link" 50 securejoin "github.com/cyphar/filepath-securejoin" 51 ocispec "github.com/opencontainers/runtime-spec/specs-go" 52 "github.com/s3rj1k/go-fanotify/fanotify" 53 log "github.com/sirupsen/logrus" 54 "golang.org/x/sys/unix" 55 56 "github.com/inspektor-gadget/inspektor-gadget/pkg/btfgen" 57 "github.com/inspektor-gadget/inspektor-gadget/pkg/gadgets" 58 "github.com/inspektor-gadget/inspektor-gadget/pkg/kfilefields" 59 "github.com/inspektor-gadget/inspektor-gadget/pkg/utils/host" 60 ) 61 62 //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -target $TARGET -cc clang -cflags ${CFLAGS} -no-global-types -type record execruntime ./bpf/execruntime.bpf.c -- -I./bpf/ 63 64 type EventType int 65 66 const ( 67 EventTypeAddContainer EventType = iota 68 EventTypeRemoveContainer 69 ) 70 71 // ContainerEvent is the notification for container creation or termination 72 type ContainerEvent struct { 73 // Type is whether the container was added or removed 74 Type EventType 75 76 // ContainerID is the container id, typically a 64 hexadecimal string 77 ContainerID string 78 79 // ContainerName is the container name, typically two words with an underscore 80 ContainerName string 81 82 // ContainerPID is the process id of the container 83 ContainerPID uint32 84 85 // Container's configuration is the config.json from the OCI runtime 86 // spec 87 ContainerConfig *ocispec.Spec 88 89 // Bundle is the directory containing the config.json from the OCI 90 // runtime spec 91 // See https://github.com/opencontainers/runtime-spec/blob/main/bundle.md 92 Bundle string 93 } 94 95 type ContainerNotifyFunc func(notif ContainerEvent) 96 97 type watchedContainer struct { 98 id string 99 pid int 100 } 101 102 type futureContainer struct { 103 id string 104 name string 105 bundleDir string 106 pidFile string 107 } 108 109 type ContainerNotifier struct { 110 runtimeBinaryNotify *fanotify.NotifyFD 111 callback ContainerNotifyFunc 112 113 // containers is the set of containers that are being watched for 114 // termination. This prevents duplicate calls to 115 // AddWatchContainerTermination. 116 // 117 // Keys: Container ID 118 containers map[string]*watchedContainer 119 containersMu sync.Mutex 120 121 // futureContainers is the set of containers that are detected before 122 // oci-runtime (runc/crun) creates the container e.g. detected via conmon 123 // 124 // Keys: Container ID 125 futureContainers map[string]*futureContainer 126 futureMu sync.Mutex 127 128 objs execruntimeObjects 129 links []link.Link 130 131 // set to true when the notifier is closed is closed 132 closed atomic.Bool 133 // this channel is used in watchContainersTermination() to avoid having to wait for the 134 // ticker to trigger before returning 135 done chan bool 136 137 wg sync.WaitGroup 138 } 139 140 // runtimePaths is the list of paths where the container runtime runc or crun 141 // could be installed. Depending on the Linux distribution, it could be in 142 // different locations. 143 // 144 // When this package is executed in a container, it prepends the 145 // HOST_ROOT env variable to the path. 146 var runtimePaths = []string{ 147 "/bin/runc", 148 "/usr/bin/runc", 149 "/usr/sbin/runc", 150 "/usr/local/bin/runc", 151 "/usr/local/sbin/runc", 152 "/usr/lib/cri-o-runc/sbin/runc", 153 "/run/torcx/unpack/docker/bin/runc", 154 "/usr/bin/crun", 155 "/usr/bin/conmon", 156 "/var/lib/rancher/k3s/data/current/bin/runc", 157 } 158 159 // initFanotify initializes the fanotify API with the flags we need 160 func initFanotify() (*fanotify.NotifyFD, error) { 161 fanotifyFlags := uint(unix.FAN_CLOEXEC | unix.FAN_CLASS_CONTENT | unix.FAN_UNLIMITED_QUEUE | unix.FAN_UNLIMITED_MARKS | unix.FAN_NONBLOCK) 162 openFlags := os.O_RDONLY | unix.O_LARGEFILE | unix.O_CLOEXEC 163 return fanotify.Initialize(fanotifyFlags, openFlags) 164 } 165 166 // Supported detects if RuncNotifier is supported in the current environment 167 func Supported() bool { 168 notifier, err := NewContainerNotifier(func(notif ContainerEvent) {}) 169 if notifier != nil { 170 notifier.Close() 171 } 172 if err != nil { 173 log.Warnf("ContainerNotifier: not supported: %s", err) 174 } 175 return err == nil 176 } 177 178 // NewContainerNotifier uses fanotify and ebpf to detect when a container is 179 // created or terminated, and call the callback on such event. 180 // 181 // Limitations: 182 // - the container runtime must be installed in one of the paths listed by runtimePaths 183 func NewContainerNotifier(callback ContainerNotifyFunc) (*ContainerNotifier, error) { 184 n := &ContainerNotifier{ 185 callback: callback, 186 containers: make(map[string]*watchedContainer), 187 futureContainers: make(map[string]*futureContainer), 188 done: make(chan bool), 189 } 190 191 if err := n.install(); err != nil { 192 n.Close() 193 return nil, err 194 } 195 196 return n, nil 197 } 198 199 func (n *ContainerNotifier) installEbpf(fanotifyFd int) error { 200 spec, err := loadExecruntime() 201 if err != nil { 202 return fmt.Errorf("load ebpf program for container-hook: %w", err) 203 } 204 205 fanotifyPrivateData, err := kfilefields.ReadPrivateDataFromFd(fanotifyFd) 206 if err != nil { 207 return fmt.Errorf("reading private data from fanotify fd: %w", err) 208 } 209 210 consts := map[string]interface{}{ 211 "tracer_group": fanotifyPrivateData, 212 } 213 if err := spec.RewriteConstants(consts); err != nil { 214 return fmt.Errorf("RewriteConstants: %w", err) 215 } 216 217 opts := ebpf.CollectionOptions{ 218 Programs: ebpf.ProgramOptions{ 219 KernelTypes: btfgen.GetBTFSpec(), 220 }, 221 } 222 223 if err := spec.LoadAndAssign(&n.objs, &opts); err != nil { 224 return fmt.Errorf("loading maps and programs: %w", err) 225 } 226 227 // Attach ebpf programs 228 l, err := link.Kprobe("fsnotify_remove_first_event", n.objs.IgFaPickE, nil) 229 if err != nil { 230 return fmt.Errorf("attaching kprobe fsnotify_remove_first_event: %w", err) 231 } 232 n.links = append(n.links, l) 233 234 l, err = link.Kretprobe("fsnotify_remove_first_event", n.objs.IgFaPickX, nil) 235 if err != nil { 236 return fmt.Errorf("attaching kretprobe fsnotify_remove_first_event: %w", err) 237 } 238 n.links = append(n.links, l) 239 240 l, err = link.Tracepoint("syscalls", "sys_enter_execve", n.objs.IgExecveE, nil) 241 if err != nil { 242 return fmt.Errorf("attaching tracepoint: %w", err) 243 } 244 n.links = append(n.links, l) 245 246 l, err = link.Tracepoint("syscalls", "sys_exit_execve", n.objs.IgExecveX, nil) 247 if err != nil { 248 return fmt.Errorf("attaching tracepoint: %w", err) 249 } 250 n.links = append(n.links, l) 251 252 return nil 253 } 254 255 func (n *ContainerNotifier) install() error { 256 // Start fanotify 257 runtimeBinaryNotify, err := initFanotify() 258 if err != nil { 259 return err 260 } 261 n.runtimeBinaryNotify = runtimeBinaryNotify 262 263 // Load, initialize and attach ebpf program 264 err = n.installEbpf(runtimeBinaryNotify.Fd) 265 if err != nil { 266 return err 267 } 268 269 // Attach fanotify to various runtime binaries 270 runtimeFound := false 271 272 runtimePath := os.Getenv("RUNTIME_PATH") 273 if runtimePath != "" { 274 log.Debugf("container-hook: trying runtime from RUNTIME_PATH env variable at %s", runtimePath) 275 276 // Check if we have to prepend the host root to the runtime path 277 if !strings.HasPrefix(runtimePath, host.HostRoot) { 278 // SecureJoin will resolve symlinks according to the host root 279 runtimePath, err = securejoin.SecureJoin(host.HostRoot, runtimePath) 280 if err != nil { 281 return fmt.Errorf("container-hook: securejoin failed: %w", err) 282 } 283 } 284 285 if _, err := os.Stat(runtimePath); errors.Is(err, os.ErrNotExist) { 286 return err 287 } 288 289 if err := runtimeBinaryNotify.Mark(unix.FAN_MARK_ADD, unix.FAN_OPEN_EXEC_PERM, unix.AT_FDCWD, runtimePath); err != nil { 290 return fmt.Errorf("fanotify marking of %s: %w", runtimePath, err) 291 } 292 log.Debugf("container-hook: monitoring runtime at %s", runtimePath) 293 runtimeFound = true 294 } else { 295 for _, r := range runtimePaths { 296 // SecureJoin will resolve symlinks according to the host root 297 runtimePath, err := securejoin.SecureJoin(host.HostRoot, r) 298 if err != nil { 299 log.Debugf("container-hook: securejoin failed: %s", err) 300 continue 301 } 302 303 log.Debugf("container-hook: trying runtime at %s", runtimePath) 304 305 if _, err := os.Stat(runtimePath); errors.Is(err, os.ErrNotExist) { 306 log.Debugf("container-hook: runc at %s not found", runtimePath) 307 continue 308 } 309 310 if err := runtimeBinaryNotify.Mark(unix.FAN_MARK_ADD, unix.FAN_OPEN_EXEC_PERM, unix.AT_FDCWD, runtimePath); err != nil { 311 log.Warnf("container-hook: failed to fanotify mark: %s", err) 312 continue 313 } 314 log.Debugf("container-hook: monitoring runtime at %s", runtimePath) 315 runtimeFound = true 316 } 317 } 318 319 if !runtimeFound { 320 runtimeBinaryNotify.File.Close() 321 return fmt.Errorf("no container runtime can be monitored with fanotify. The following paths were tested: %s. You can use the RUNTIME_PATH env variable to specify a custom path. If you are successful doing so, please open a PR to add your custom path to runtimePaths", strings.Join(runtimePaths, ",")) 322 } 323 324 n.wg.Add(2) 325 go n.watchContainersTermination() 326 go n.watchRuntimeBinary() 327 328 return nil 329 } 330 331 // AddWatchContainerTermination watches a container for termination and 332 // generates an event on the notifier. This is automatically called for new 333 // containers detected by ContainerNotifier, but it can also be called for 334 // containers detected externally such as initial containers. 335 func (n *ContainerNotifier) AddWatchContainerTermination(containerID string, containerPID int) error { 336 n.containersMu.Lock() 337 defer n.containersMu.Unlock() 338 339 if _, ok := n.containers[containerID]; ok { 340 // This container is already being watched for termination 341 return nil 342 } 343 344 n.containers[containerID] = &watchedContainer{ 345 id: containerID, 346 pid: containerPID, 347 } 348 349 return nil 350 } 351 352 // watchContainerTermination waits until the container terminates 353 func (n *ContainerNotifier) watchContainersTermination() { 354 defer n.wg.Done() 355 356 ticker := time.NewTicker(time.Second) 357 defer ticker.Stop() 358 359 for { 360 select { 361 case <-n.done: 362 return 363 case <-ticker.C: 364 if n.closed.Load() { 365 return 366 } 367 368 dirEntries, err := os.ReadDir(host.HostProcFs) 369 if err != nil { 370 log.Errorf("reading /proc: %s", err) 371 return 372 } 373 pids := make(map[int]bool) 374 for _, entry := range dirEntries { 375 pid, err := strconv.Atoi(entry.Name()) 376 if err != nil { 377 // entry is not a process directory. Ignore. 378 continue 379 } 380 pids[pid] = true 381 } 382 383 n.containersMu.Lock() 384 for _, c := range n.containers { 385 if pids[c.pid] { 386 // container still running 387 continue 388 } 389 390 go n.callback(ContainerEvent{ 391 Type: EventTypeRemoveContainer, 392 ContainerID: c.id, 393 ContainerPID: uint32(c.pid), 394 }) 395 396 delete(n.containers, c.id) 397 } 398 n.containersMu.Unlock() 399 } 400 } 401 } 402 403 func (n *ContainerNotifier) watchPidFileIterate( 404 pidFileDirNotify *fanotify.NotifyFD, 405 bundleDir string, 406 configJSONPath string, 407 pidFile string, 408 pidFileDir string, 409 ) (bool, error) { 410 // Get the next event from fanotify. 411 // Even though the API allows to pass skipPIDs, we cannot use 412 // it here because ResponseAllow would not be called. 413 data, err := pidFileDirNotify.GetEvent() 414 if err != nil { 415 return false, fmt.Errorf("%w", err) 416 } 417 418 // data can be nil if the event received is from a process in skipPIDs. 419 // In that case, skip and get the next event. 420 if data == nil { 421 return false, nil 422 } 423 424 // Don't leak the fd received by GetEvent 425 defer data.Close() 426 dataFile := data.File() 427 defer dataFile.Close() 428 429 if !data.MatchMask(unix.FAN_ACCESS_PERM) { 430 // This should not happen: FAN_ACCESS_PERM is the only mask Marked 431 return false, fmt.Errorf("fanotify: unknown event on runc: mask=%d pid=%d", data.Mask, data.Pid) 432 } 433 434 // This unblocks whoever is accessing the pidfile 435 defer pidFileDirNotify.ResponseAllow(data) 436 437 path, err := data.GetPath() 438 if err != nil { 439 return false, err 440 } 441 path = filepath.Join(host.HostRoot, path) 442 443 // Consider files identical if they have the same device/inode, 444 // even if the paths differ due to symlinks (for example, 445 // the event's path is /run/... but the runc --pid-file argument 446 // uses /var/run/..., where /var/run is a symlink to /run). 447 filesAreIdentical, err := checkFilesAreIdentical(path, pidFile) 448 if err != nil { 449 return false, err 450 } else if !filesAreIdentical { 451 return false, nil 452 } 453 454 pidFileContent, err := io.ReadAll(dataFile) 455 if err != nil { 456 return false, err 457 } 458 if len(pidFileContent) == 0 { 459 return false, fmt.Errorf("empty pid file") 460 } 461 containerPID, err := strconv.Atoi(string(pidFileContent)) 462 if err != nil { 463 return false, err 464 } 465 466 // Unfortunately, Linux 5.4 doesn't respect ignore masks 467 // See fix in Linux 5.9: 468 // https://github.com/torvalds/linux/commit/497b0c5a7c0688c1b100a9c2e267337f677c198e 469 // Workaround: remove parent mask. We don't need it anymore :) 470 err = pidFileDirNotify.Mark(unix.FAN_MARK_REMOVE, unix.FAN_ACCESS_PERM|unix.FAN_EVENT_ON_CHILD, unix.AT_FDCWD, pidFileDir) 471 if err != nil { 472 return false, nil 473 } 474 475 bundleConfigJSON, err := os.ReadFile(configJSONPath) 476 if err != nil { 477 return false, err 478 } 479 containerConfig := &ocispec.Spec{} 480 err = json.Unmarshal(bundleConfigJSON, containerConfig) 481 if err != nil { 482 return false, err 483 } 484 485 // cri-o appends userdata to bundleDir, 486 // so we trim it here to get the correct containerID 487 containerID := filepath.Base(filepath.Clean(strings.TrimSuffix(bundleDir, "userdata"))) 488 489 err = n.AddWatchContainerTermination(containerID, containerPID) 490 if err != nil { 491 log.Errorf("container %s with pid %d terminated before we could watch it: %s", containerID, containerPID, err) 492 return true, nil 493 } 494 495 if containerPID > math.MaxUint32 { 496 log.Errorf("Container PID (%d) exceeds math.MaxUint32 (%d)", containerPID, math.MaxUint32) 497 return true, nil 498 } 499 500 var containerName string 501 n.futureMu.Lock() 502 fc, ok := n.futureContainers[containerID] 503 if ok { 504 containerName = fc.name 505 } 506 delete(n.futureContainers, containerID) 507 n.futureMu.Unlock() 508 509 n.callback(ContainerEvent{ 510 Type: EventTypeAddContainer, 511 ContainerID: containerID, 512 ContainerPID: uint32(containerPID), 513 ContainerConfig: containerConfig, 514 Bundle: bundleDir, 515 ContainerName: containerName, 516 }) 517 518 return true, nil 519 } 520 521 func checkFilesAreIdentical(path1, path2 string) (bool, error) { 522 // Since fanotify masks don't work on Linux 5.4, we could get a 523 // notification for an unrelated file before the pid file is created 524 // See fix in Linux 5.9: 525 // https://github.com/torvalds/linux/commit/497b0c5a7c0688c1b100a9c2e267337f677c198e 526 // In this case we should not return an error. 527 if filepath.Base(path1) != filepath.Base(path2) { 528 return false, nil 529 } 530 531 f1, err := os.Stat(path1) 532 if err != nil { 533 return false, err 534 } 535 536 f2, err := os.Stat(path2) 537 if err != nil { 538 return false, err 539 } 540 541 return os.SameFile(f1, f2), nil 542 } 543 544 func (n *ContainerNotifier) monitorRuntimeInstance(bundleDir string, pidFile string) error { 545 fanotifyFlags := uint(unix.FAN_CLOEXEC | unix.FAN_CLASS_CONTENT | unix.FAN_UNLIMITED_QUEUE | unix.FAN_UNLIMITED_MARKS) 546 openFlags := os.O_RDONLY | unix.O_LARGEFILE | unix.O_CLOEXEC 547 548 pidFileDirNotify, err := fanotify.Initialize(fanotifyFlags, openFlags) 549 if err != nil { 550 return err 551 } 552 553 // The pidfile does not exist yet, so we cannot monitor it directly. 554 // Instead we monitor its parent directory with FAN_EVENT_ON_CHILD to 555 // get events on the directory's children. 556 pidFileDir := filepath.Dir(pidFile) 557 err = pidFileDirNotify.Mark(unix.FAN_MARK_ADD, unix.FAN_ACCESS_PERM|unix.FAN_EVENT_ON_CHILD, unix.AT_FDCWD, pidFileDir) 558 if err != nil { 559 pidFileDirNotify.File.Close() 560 return fmt.Errorf("marking %s: %w", pidFileDir, err) 561 } 562 563 // watchPidFileIterate() will read config.json and it might be in the 564 // same directory as the pid file. To avoid getting events unrelated to 565 // the pidfile, add an ignore mask. 566 // 567 // This is best effort because the ignore mask is unfortunately not 568 // respected until a fix in Linux 5.9: 569 // https://github.com/torvalds/linux/commit/497b0c5a7c0688c1b100a9c2e267337f677c198e 570 configJSONPath := filepath.Join(bundleDir, "config.json") 571 if _, err := os.Stat(configJSONPath); errors.Is(err, os.ErrNotExist) { 572 // podman might install config.json in the userdata directory 573 configJSONPath = filepath.Join(bundleDir, "userdata", "config.json") 574 if _, err := os.Stat(configJSONPath); errors.Is(err, os.ErrNotExist) { 575 pidFileDirNotify.File.Close() 576 return fmt.Errorf("config not found at %s", configJSONPath) 577 } 578 } 579 err = pidFileDirNotify.Mark(unix.FAN_MARK_ADD|unix.FAN_MARK_IGNORED_MASK, unix.FAN_ACCESS_PERM, unix.AT_FDCWD, configJSONPath) 580 if err != nil { 581 pidFileDirNotify.File.Close() 582 return fmt.Errorf("marking %s: %w", configJSONPath, err) 583 } 584 585 // similar to config.json, we ignore passwd file if it exists 586 passwdPath := filepath.Join(bundleDir, "passwd") 587 if _, err := os.Stat(passwdPath); !errors.Is(err, os.ErrNotExist) { 588 err = pidFileDirNotify.Mark(unix.FAN_MARK_ADD|unix.FAN_MARK_IGNORED_MASK, unix.FAN_ACCESS_PERM, unix.AT_FDCWD, passwdPath) 589 if err != nil { 590 pidFileDirNotify.File.Close() 591 return fmt.Errorf("marking passwd path: %w", err) 592 } 593 } 594 595 n.wg.Add(1) 596 go func() { 597 defer n.wg.Done() 598 defer pidFileDirNotify.File.Close() 599 for { 600 stop, err := n.watchPidFileIterate(pidFileDirNotify, bundleDir, configJSONPath, pidFile, pidFileDir) 601 if n.closed.Load() { 602 return 603 } 604 if err != nil { 605 log.Warnf("error watching pid: %v\n", err) 606 return 607 } 608 if stop { 609 return 610 } 611 } 612 }() 613 614 return nil 615 } 616 617 func (n *ContainerNotifier) watchRuntimeBinary() { 618 defer n.wg.Done() 619 620 for { 621 stop, err := n.watchRuntimeIterate() 622 if n.closed.Load() { 623 n.runtimeBinaryNotify.File.Close() 624 return 625 } 626 if err != nil { 627 log.Errorf("error watching runtime binary: %v\n", err) 628 } 629 if stop { 630 n.runtimeBinaryNotify.File.Close() 631 return 632 } 633 } 634 } 635 636 func (n *ContainerNotifier) parseConmonCmdline(cmdlineArr []string) { 637 containerName := "" 638 containerID := "" 639 bundleDir := "" 640 pidFile := "" 641 642 for i := 0; i < len(cmdlineArr); i++ { 643 verb := cmdlineArr[i] 644 arg := "" 645 if i+1 < len(cmdlineArr) { 646 arg = cmdlineArr[i+1] 647 } 648 switch verb { 649 case "-n", "--name": 650 containerName = arg 651 i++ 652 case "-c", "--cid": 653 containerID = arg 654 i++ 655 case "-b", "--bundle": 656 bundleDir = arg 657 i++ 658 case "-p", "--container-pidfile": 659 pidFile = arg 660 i++ 661 } 662 } 663 664 if containerName == "" || containerID == "" || bundleDir == "" || pidFile == "" { 665 return 666 } 667 668 n.futureMu.Lock() 669 n.futureContainers[containerID] = &futureContainer{ 670 id: containerID, 671 pidFile: pidFile, 672 bundleDir: bundleDir, 673 name: containerName, 674 } 675 n.futureMu.Unlock() 676 } 677 678 func (n *ContainerNotifier) parseOCIRuntime(comm string, cmdlineArr []string) { 679 // Parse oci-runtime (runc/crun) command line 680 createFound := false 681 bundleDir := "" 682 pidFile := "" 683 684 for i := 0; i < len(cmdlineArr); i++ { 685 if cmdlineArr[i] == "create" { 686 createFound = true 687 continue 688 } 689 if cmdlineArr[i] == "--bundle" && i+1 < len(cmdlineArr) { 690 i++ 691 bundleDir = filepath.Join(host.HostRoot, cmdlineArr[i]) 692 continue 693 } 694 if cmdlineArr[i] == "--pid-file" && i+1 < len(cmdlineArr) { 695 i++ 696 pidFile = filepath.Join(host.HostRoot, cmdlineArr[i]) 697 continue 698 } 699 } 700 701 if createFound && bundleDir != "" && pidFile != "" { 702 err := n.monitorRuntimeInstance(bundleDir, pidFile) 703 if err != nil { 704 log.Errorf("error monitoring runtime instance: %v\n", err) 705 } 706 } 707 } 708 709 func (n *ContainerNotifier) watchRuntimeIterate() (bool, error) { 710 // Get the next event from fanotify. 711 // Even though the API allows to pass skipPIDs, we cannot use it here 712 // because ResponseAllow would not be called. 713 data, err := n.runtimeBinaryNotify.GetEvent() 714 if err != nil { 715 return true, err 716 } 717 718 // data can be nil if the event received is from a process in skipPIDs. 719 // In that case, skip and get the next event. 720 if data == nil { 721 return false, nil 722 } 723 724 // Don't leak the fd received by GetEvent 725 defer data.Close() 726 727 if !data.MatchMask(unix.FAN_OPEN_EXEC_PERM) { 728 // This should not happen: FAN_OPEN_EXEC_PERM is the only mask Marked 729 return false, fmt.Errorf("fanotify: unknown event on runc: mask=%d pid=%d", data.Mask, data.Pid) 730 } 731 732 // This unblocks the execution 733 defer n.runtimeBinaryNotify.ResponseAllow(data) 734 735 // Lookup entry in ebpf map ig_fa_records 736 var record execruntimeRecord 737 err = n.objs.IgFaRecords.LookupAndDelete(nil, &record) 738 if err != nil { 739 return false, fmt.Errorf("lookup record: %w", err) 740 } 741 742 // Skip empty record 743 // This can happen when the ebpf code didn't find the exec args 744 if record.Pid == 0 { 745 log.Debugf("skip event with pid=0") 746 return false, nil 747 } 748 if record.ArgsSize == 0 { 749 log.Debugf("skip event without args") 750 return false, nil 751 } 752 753 callerComm := strings.TrimRight(string(record.CallerComm[:]), "\x00") 754 755 cmdlineArr := []string{} 756 calleeComm := "" 757 for _, arg := range strings.Split(string(record.Args[0:record.ArgsSize]), "\x00") { 758 if arg != "" { 759 cmdlineArr = append(cmdlineArr, arg) 760 } 761 } 762 if len(cmdlineArr) == 0 { 763 log.Debugf("cannot get cmdline for pid %d", record.Pid) 764 return false, nil 765 } 766 if len(cmdlineArr) > 0 { 767 calleeComm = filepath.Base(cmdlineArr[0]) 768 } 769 770 log.Debugf("got event with pid=%d caller=%q callee=%q args=%v", 771 record.Pid, 772 callerComm, calleeComm, 773 cmdlineArr) 774 775 // runc is executing itself with unix.Exec(), so fanotify receives two 776 // FAN_OPEN_EXEC_PERM events: 777 // 1. from containerd-shim (or similar) 778 // 2. from runc, by this re-execution. 779 // This filter takes the first one. 780 781 switch calleeComm { 782 case "conmon": 783 // Calling sequence: crio/podman -> conmon -> runc/crun 784 n.parseConmonCmdline(cmdlineArr) 785 case "runc", "crun": 786 n.parseOCIRuntime(calleeComm, cmdlineArr) 787 default: 788 return false, nil 789 } 790 791 return false, nil 792 } 793 794 func (n *ContainerNotifier) Close() { 795 n.closed.Store(true) 796 close(n.done) 797 if n.runtimeBinaryNotify != nil { 798 n.runtimeBinaryNotify.File.Close() 799 } 800 n.wg.Wait() 801 802 for _, l := range n.links { 803 gadgets.CloseLink(l) 804 } 805 n.links = nil 806 n.objs.Close() 807 }