github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/utils/host/workarounds.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  // Copyright 2023 The Inspektor Gadget authors
     5  //
     6  // Licensed under the Apache License, Version 2.0 (the "License");
     7  // you may not use this file except in compliance with the License.
     8  // You may obtain a copy of the License at
     9  //
    10  //     http://www.apache.org/licenses/LICENSE-2.0
    11  //
    12  // Unless required by applicable law or agreed to in writing, software
    13  // distributed under the License is distributed on an "AS IS" BASIS,
    14  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  // See the License for the specific language governing permissions and
    16  // limitations under the License.
    17  
    18  // Package host provides ways to access the host filesystem.
    19  //
    20  // Inspektor Gadget can run either in the host or in a container. When running
    21  // in a container, the host filesystem must be available in a specific
    22  // directory.
    23  package host
    24  
    25  import (
    26  	"context"
    27  	"errors"
    28  	"fmt"
    29  	"os"
    30  	"os/signal"
    31  	"path/filepath"
    32  	"strconv"
    33  	"strings"
    34  	"syscall"
    35  
    36  	systemdDbus "github.com/coreos/go-systemd/v22/dbus"
    37  	"github.com/godbus/dbus/v5"
    38  	"github.com/google/uuid"
    39  	log "github.com/sirupsen/logrus"
    40  	"github.com/syndtr/gocapability/capability"
    41  	"golang.org/x/sys/unix"
    42  	"golang.org/x/term"
    43  )
    44  
    45  func hasCapSysAdmin() (bool, error) {
    46  	c, err := capability.NewPid2(0)
    47  	if err != nil {
    48  		return false, err
    49  	}
    50  	err = c.Load()
    51  	if err != nil {
    52  		return false, err
    53  	}
    54  	return c.Get(capability.EFFECTIVE, capability.CAP_SYS_ADMIN), nil
    55  }
    56  
    57  func suggestSdUnitRestart() error {
    58  	_, err := os.Stat("/var/run/secrets/kubernetes.io/serviceaccount/token")
    59  	if errors.Is(err, os.ErrNotExist) {
    60  		// Not running in a pod. Not suggesting --auto-sd-unit-restart
    61  		return nil
    62  	}
    63  
    64  	hasCap, err := hasCapSysAdmin()
    65  	if err != nil {
    66  		return err
    67  	}
    68  	if !hasCap {
    69  		return errors.New("need CAP_SYS_ADMIN (did you try --auto-sd-unit-restart?)")
    70  	}
    71  	return nil
    72  }
    73  
    74  // autoSdUnitRestart will automatically restart the process in a privileged
    75  // systemd unit if the current process does not have enough capabilities.
    76  func autoSdUnitRestart() (exit bool, err error) {
    77  	const IgInSystemdUnitEnv = "IG_IN_SYSTEMD_UNIT"
    78  
    79  	// No recursive restarts
    80  	if os.Getenv(IgInSystemdUnitEnv) == "1" {
    81  		return false, nil
    82  	}
    83  
    84  	// If we already have CAP_SYS_ADMIN, we don't need a workaround
    85  	hasCap, err := hasCapSysAdmin()
    86  	if err != nil {
    87  		return false, err
    88  	}
    89  	if hasCap {
    90  		return false, nil
    91  	}
    92  
    93  	// From here, we decided to use the workaround. This function will return
    94  	// exit=true.
    95  
    96  	if HostRoot == "/" {
    97  		return true, errors.New("host rootfs not found")
    98  	}
    99  
   100  	// if the host does not use systemd, we cannot use this workaround
   101  	_, err = os.Stat(filepath.Join(HostRoot, "/run/systemd/private"))
   102  	if err != nil {
   103  		return true, errors.New("systemd private socket not found")
   104  	}
   105  
   106  	// Only root can talk to the systemd socket
   107  	if os.Geteuid() != 0 {
   108  		return true, errors.New("need root user")
   109  	}
   110  
   111  	runID := uuid.New().String()[:8]
   112  	unitName := fmt.Sprintf("kubectl-debug-ig-%s.service", runID)
   113  	log.Debugf("Missing capability. Starting systemd unit %q", unitName)
   114  
   115  	// systemdDbus.NewSystemdConnectionContext() hard codes the path to the
   116  	// systemd socket to /run/systemd/private. We need to make sure that this
   117  	// path exists (if the /run:/run mount was set up correctly). If it doesn't
   118  	// exist, we create the symlink to /host/run/systemd/private.
   119  	_, err = os.Stat("/run/systemd/private")
   120  	if errors.Is(err, os.ErrNotExist) {
   121  		err := os.MkdirAll("/run/systemd", 0o755)
   122  		if err != nil {
   123  			return true, err
   124  		}
   125  
   126  		err = os.Symlink("/host/run/systemd/private", "/run/systemd/private")
   127  		if err != nil {
   128  			return true, fmt.Errorf("linking /run/systemd/private: %w", err)
   129  		}
   130  	} else if err != nil {
   131  		return true, fmt.Errorf("statting /run/systemd/private: %w", err)
   132  	}
   133  
   134  	conn, err := systemdDbus.NewSystemdConnectionContext(context.TODO())
   135  	if err != nil {
   136  		return true, fmt.Errorf("connecting to systemd: %w", err)
   137  	}
   138  	defer conn.Close()
   139  
   140  	signalChan := make(chan os.Signal, 1)
   141  	signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM)
   142  
   143  	statusChan := make(chan string, 1)
   144  	cmd := []string{
   145  		fmt.Sprintf("/proc/%d/root/usr/bin/ig", os.Getpid()),
   146  	}
   147  	cmd = append(cmd, os.Args[1:]...)
   148  	envs := []string{IgInSystemdUnitEnv + "=1"}
   149  	isTerminal := term.IsTerminal(int(os.Stdin.Fd())) || term.IsTerminal(int(os.Stdout.Fd())) || term.IsTerminal(int(os.Stderr.Fd()))
   150  	if isTerminal && os.Getenv("TERM") != "" {
   151  		envs = append(envs, "TERM="+os.Getenv("TERM"))
   152  	}
   153  
   154  	properties := []systemdDbus.Property{
   155  		systemdDbus.PropDescription("Inspektor Gadget via kubectl debug"),
   156  		// Type=oneshot ensures that StartTransientUnitContext will only return "done" when the job is done
   157  		systemdDbus.PropType("oneshot"),
   158  		// Pass stdio to the systemd unit
   159  		{
   160  			Name:  "StandardInputFileDescriptor",
   161  			Value: dbus.MakeVariant(dbus.UnixFD(unix.Stdin)),
   162  		},
   163  		{
   164  			Name:  "StandardOutputFileDescriptor",
   165  			Value: dbus.MakeVariant(dbus.UnixFD(unix.Stdout)),
   166  		},
   167  		{
   168  			Name:  "StandardErrorFileDescriptor",
   169  			Value: dbus.MakeVariant(dbus.UnixFD(unix.Stderr)),
   170  		},
   171  		{
   172  			Name:  "Environment",
   173  			Value: dbus.MakeVariant(envs),
   174  		},
   175  		systemdDbus.PropExecStart(cmd, true),
   176  	}
   177  
   178  	_, err = conn.StartTransientUnitContext(context.TODO(),
   179  		unitName, "fail", properties, statusChan)
   180  	if err != nil {
   181  		return true, fmt.Errorf("starting transient unit %q: %w", unitName, err)
   182  	}
   183  
   184  	select {
   185  	case s := <-statusChan:
   186  		log.Debugf("systemd unit %q returned %q", unitName, s)
   187  		// "done" indicates successful execution of a job
   188  		// See https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
   189  		if s != "done" {
   190  			conn.ResetFailedUnitContext(context.TODO(), unitName)
   191  
   192  			return true, fmt.Errorf("creating systemd unit `%s`: got `%s`", unitName, s)
   193  		}
   194  	case sig := <-signalChan:
   195  		log.Debugf("%s: interrupt systemd unit %q", sig, unitName)
   196  		statusStopChan := make(chan string, 1)
   197  		_, err := conn.StopUnitContext(context.TODO(), unitName, "replace", statusStopChan)
   198  		if err != nil {
   199  			return true, fmt.Errorf("stopping transient unit %q: %w", unitName, err)
   200  		}
   201  		s := <-statusChan
   202  		if s != "done" && s != "canceled" {
   203  			return true, fmt.Errorf("stopping transient unit %q: got `%s`", unitName, s)
   204  		}
   205  	}
   206  
   207  	return true, nil
   208  }
   209  
   210  // autoMount ensures that filesystems are mounted correctly.
   211  // Some environments (e.g. minikube) runs with a read-only /sys without bpf
   212  // https://github.com/kubernetes/minikube/blob/99a0c91459f17ad8c83c80fc37a9ded41e34370c/deploy/kicbase/entrypoint#L76-L81
   213  // Docker Desktop with WSL2 also has filesystems unmounted.
   214  //
   215  // If dryRun is true, autoMount will only check if the filesystems need to be
   216  // mounted.
   217  // Returns the list of filesystems that need to be mounted.
   218  func autoMountFilesystems(dryRun bool) ([]string, error) {
   219  	var mountsSuggested []string
   220  
   221  	fs := []struct {
   222  		name    string
   223  		paths   []string
   224  		magic   int64
   225  		suggest bool // suggest mounting this filesystem
   226  	}{
   227  		{
   228  			"bpf",
   229  			[]string{"/sys/fs/bpf"},
   230  			unix.BPF_FS_MAGIC,
   231  			false, // do not make 'ig --auto-mount-filesystems=false' fail if bpffs is not mounted
   232  		},
   233  		{
   234  			"debugfs",
   235  			[]string{"/sys/kernel/debug"},
   236  			unix.DEBUGFS_MAGIC,
   237  			true,
   238  		},
   239  		{
   240  			"tracefs",
   241  			[]string{"/sys/kernel/tracing", "/sys/kernel/debug/tracing"},
   242  			unix.TRACEFS_MAGIC,
   243  			true,
   244  		},
   245  	}
   246  
   247  filesystemLoop:
   248  	for _, f := range fs {
   249  		var statfs unix.Statfs_t
   250  		for _, path := range f.paths {
   251  			if err := unix.Statfs(path, &statfs); err != nil {
   252  				log.Debugf("statfs returned error on %s: %s", path, err)
   253  				continue
   254  			}
   255  			if statfs.Type == f.magic {
   256  				log.Debugf("%s already mounted", f.name)
   257  				continue filesystemLoop
   258  			}
   259  		}
   260  
   261  		if f.suggest {
   262  			mountsSuggested = append(mountsSuggested, f.name)
   263  		}
   264  		if dryRun {
   265  			continue
   266  		}
   267  
   268  		if err := unix.Mount("none", f.paths[0], f.name, 0, ""); err != nil {
   269  			return mountsSuggested, fmt.Errorf("mounting %s: %w", f.paths[0], err)
   270  		}
   271  		log.Debugf("%s mounted (%s)", f.name, f.paths[0])
   272  	}
   273  	return mountsSuggested, nil
   274  }
   275  
   276  func suggestWSLWorkaround() error {
   277  	var utsname unix.Utsname
   278  	err := unix.Uname(&utsname)
   279  	if err != nil {
   280  		return err
   281  	}
   282  	release := unix.ByteSliceToString(utsname.Release[:])
   283  	if !strings.HasSuffix(release, "-WSL2") {
   284  		return nil
   285  	}
   286  
   287  	// If /host/proc is correctly set up, we don't need this workaround
   288  	target, err := os.Readlink(HostProcFs + "/self")
   289  	if target != "" && err == nil {
   290  		return nil
   291  	}
   292  
   293  	return fmt.Errorf("%s/self not found on WSL2 (did you try --auto-wsl-workaround?)", HostProcFs)
   294  }
   295  
   296  // autoWSLWorkaround overrides HostRoot and HostProcFs if necessary.
   297  // Docker Desktop with WSL2 sets up host volumes with weird pidns.
   298  func autoWSLWorkaround() error {
   299  	// If we're not in a container, we can't use this workaround
   300  	if HostRoot == "/" {
   301  		return nil
   302  	}
   303  
   304  	// If /host/proc is correctly set up, we don't need this workaround
   305  	target, err := os.Readlink(HostProcFs + "/self")
   306  	if target != "" && err == nil {
   307  		return nil
   308  	}
   309  
   310  	log.Warnf("%s's pidns is neither the current pidns or a parent of the current pidns. Remounting.", HostProcFs)
   311  	err = unix.Mount("/proc", HostProcFs, "", unix.MS_BIND, "")
   312  	if err != nil {
   313  		return fmt.Errorf("remounting %s: %w", HostProcFs, err)
   314  	}
   315  	// Find lifecycle-server process and set HOST_PID to its root
   316  	processes, err := os.ReadDir(HostProcFs)
   317  	if err != nil {
   318  		return fmt.Errorf("reading %s: %w", HostProcFs, err)
   319  	}
   320  	for _, p := range processes {
   321  		if !p.IsDir() {
   322  			continue
   323  		}
   324  
   325  		pid, err := strconv.Atoi(p.Name())
   326  		if err != nil {
   327  			continue
   328  		}
   329  
   330  		cmdLine := GetProcCmdline(pid)
   331  		if cmdLine[0] != "/usr/bin/lifecycle-server" {
   332  			continue
   333  		}
   334  		log.Debugf("Found lifecycle-server process %s", p.Name())
   335  
   336  		buf, err := os.ReadFile(fmt.Sprintf("/proc/%s/cgroup", p.Name()))
   337  		if err != nil {
   338  			continue
   339  		}
   340  		if !strings.Contains(string(buf), "/podruntime/docker") {
   341  			continue
   342  		}
   343  		log.Debugf("Found lifecycle-server process %s in cgroup /podruntime/docker", p.Name())
   344  
   345  		HostRoot = fmt.Sprintf("/proc/%s/root/", p.Name())
   346  		HostProcFs = filepath.Join(HostRoot, "/proc")
   347  		log.Warnf("Overriding HostRoot=%s HostProcFs=%s (lifecycle-server)", HostRoot, HostProcFs)
   348  
   349  		return nil
   350  	}
   351  
   352  	return errors.New("lifecycle-server process not found")
   353  }