github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/utils/host/workarounds.go (about) 1 //go:build linux 2 // +build linux 3 4 // Copyright 2023 The Inspektor Gadget authors 5 // 6 // Licensed under the Apache License, Version 2.0 (the "License"); 7 // you may not use this file except in compliance with the License. 8 // You may obtain a copy of the License at 9 // 10 // http://www.apache.org/licenses/LICENSE-2.0 11 // 12 // Unless required by applicable law or agreed to in writing, software 13 // distributed under the License is distributed on an "AS IS" BASIS, 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 // See the License for the specific language governing permissions and 16 // limitations under the License. 17 18 // Package host provides ways to access the host filesystem. 19 // 20 // Inspektor Gadget can run either in the host or in a container. When running 21 // in a container, the host filesystem must be available in a specific 22 // directory. 23 package host 24 25 import ( 26 "context" 27 "errors" 28 "fmt" 29 "os" 30 "os/signal" 31 "path/filepath" 32 "strconv" 33 "strings" 34 "syscall" 35 36 systemdDbus "github.com/coreos/go-systemd/v22/dbus" 37 "github.com/godbus/dbus/v5" 38 "github.com/google/uuid" 39 log "github.com/sirupsen/logrus" 40 "github.com/syndtr/gocapability/capability" 41 "golang.org/x/sys/unix" 42 "golang.org/x/term" 43 ) 44 45 func hasCapSysAdmin() (bool, error) { 46 c, err := capability.NewPid2(0) 47 if err != nil { 48 return false, err 49 } 50 err = c.Load() 51 if err != nil { 52 return false, err 53 } 54 return c.Get(capability.EFFECTIVE, capability.CAP_SYS_ADMIN), nil 55 } 56 57 func suggestSdUnitRestart() error { 58 _, err := os.Stat("/var/run/secrets/kubernetes.io/serviceaccount/token") 59 if errors.Is(err, os.ErrNotExist) { 60 // Not running in a pod. Not suggesting --auto-sd-unit-restart 61 return nil 62 } 63 64 hasCap, err := hasCapSysAdmin() 65 if err != nil { 66 return err 67 } 68 if !hasCap { 69 return errors.New("need CAP_SYS_ADMIN (did you try --auto-sd-unit-restart?)") 70 } 71 return nil 72 } 73 74 // autoSdUnitRestart will automatically restart the process in a privileged 75 // systemd unit if the current process does not have enough capabilities. 76 func autoSdUnitRestart() (exit bool, err error) { 77 const IgInSystemdUnitEnv = "IG_IN_SYSTEMD_UNIT" 78 79 // No recursive restarts 80 if os.Getenv(IgInSystemdUnitEnv) == "1" { 81 return false, nil 82 } 83 84 // If we already have CAP_SYS_ADMIN, we don't need a workaround 85 hasCap, err := hasCapSysAdmin() 86 if err != nil { 87 return false, err 88 } 89 if hasCap { 90 return false, nil 91 } 92 93 // From here, we decided to use the workaround. This function will return 94 // exit=true. 95 96 if HostRoot == "/" { 97 return true, errors.New("host rootfs not found") 98 } 99 100 // if the host does not use systemd, we cannot use this workaround 101 _, err = os.Stat(filepath.Join(HostRoot, "/run/systemd/private")) 102 if err != nil { 103 return true, errors.New("systemd private socket not found") 104 } 105 106 // Only root can talk to the systemd socket 107 if os.Geteuid() != 0 { 108 return true, errors.New("need root user") 109 } 110 111 runID := uuid.New().String()[:8] 112 unitName := fmt.Sprintf("kubectl-debug-ig-%s.service", runID) 113 log.Debugf("Missing capability. Starting systemd unit %q", unitName) 114 115 // systemdDbus.NewSystemdConnectionContext() hard codes the path to the 116 // systemd socket to /run/systemd/private. We need to make sure that this 117 // path exists (if the /run:/run mount was set up correctly). If it doesn't 118 // exist, we create the symlink to /host/run/systemd/private. 119 _, err = os.Stat("/run/systemd/private") 120 if errors.Is(err, os.ErrNotExist) { 121 err := os.MkdirAll("/run/systemd", 0o755) 122 if err != nil { 123 return true, err 124 } 125 126 err = os.Symlink("/host/run/systemd/private", "/run/systemd/private") 127 if err != nil { 128 return true, fmt.Errorf("linking /run/systemd/private: %w", err) 129 } 130 } else if err != nil { 131 return true, fmt.Errorf("statting /run/systemd/private: %w", err) 132 } 133 134 conn, err := systemdDbus.NewSystemdConnectionContext(context.TODO()) 135 if err != nil { 136 return true, fmt.Errorf("connecting to systemd: %w", err) 137 } 138 defer conn.Close() 139 140 signalChan := make(chan os.Signal, 1) 141 signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM) 142 143 statusChan := make(chan string, 1) 144 cmd := []string{ 145 fmt.Sprintf("/proc/%d/root/usr/bin/ig", os.Getpid()), 146 } 147 cmd = append(cmd, os.Args[1:]...) 148 envs := []string{IgInSystemdUnitEnv + "=1"} 149 isTerminal := term.IsTerminal(int(os.Stdin.Fd())) || term.IsTerminal(int(os.Stdout.Fd())) || term.IsTerminal(int(os.Stderr.Fd())) 150 if isTerminal && os.Getenv("TERM") != "" { 151 envs = append(envs, "TERM="+os.Getenv("TERM")) 152 } 153 154 properties := []systemdDbus.Property{ 155 systemdDbus.PropDescription("Inspektor Gadget via kubectl debug"), 156 // Type=oneshot ensures that StartTransientUnitContext will only return "done" when the job is done 157 systemdDbus.PropType("oneshot"), 158 // Pass stdio to the systemd unit 159 { 160 Name: "StandardInputFileDescriptor", 161 Value: dbus.MakeVariant(dbus.UnixFD(unix.Stdin)), 162 }, 163 { 164 Name: "StandardOutputFileDescriptor", 165 Value: dbus.MakeVariant(dbus.UnixFD(unix.Stdout)), 166 }, 167 { 168 Name: "StandardErrorFileDescriptor", 169 Value: dbus.MakeVariant(dbus.UnixFD(unix.Stderr)), 170 }, 171 { 172 Name: "Environment", 173 Value: dbus.MakeVariant(envs), 174 }, 175 systemdDbus.PropExecStart(cmd, true), 176 } 177 178 _, err = conn.StartTransientUnitContext(context.TODO(), 179 unitName, "fail", properties, statusChan) 180 if err != nil { 181 return true, fmt.Errorf("starting transient unit %q: %w", unitName, err) 182 } 183 184 select { 185 case s := <-statusChan: 186 log.Debugf("systemd unit %q returned %q", unitName, s) 187 // "done" indicates successful execution of a job 188 // See https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit 189 if s != "done" { 190 conn.ResetFailedUnitContext(context.TODO(), unitName) 191 192 return true, fmt.Errorf("creating systemd unit `%s`: got `%s`", unitName, s) 193 } 194 case sig := <-signalChan: 195 log.Debugf("%s: interrupt systemd unit %q", sig, unitName) 196 statusStopChan := make(chan string, 1) 197 _, err := conn.StopUnitContext(context.TODO(), unitName, "replace", statusStopChan) 198 if err != nil { 199 return true, fmt.Errorf("stopping transient unit %q: %w", unitName, err) 200 } 201 s := <-statusChan 202 if s != "done" && s != "canceled" { 203 return true, fmt.Errorf("stopping transient unit %q: got `%s`", unitName, s) 204 } 205 } 206 207 return true, nil 208 } 209 210 // autoMount ensures that filesystems are mounted correctly. 211 // Some environments (e.g. minikube) runs with a read-only /sys without bpf 212 // https://github.com/kubernetes/minikube/blob/99a0c91459f17ad8c83c80fc37a9ded41e34370c/deploy/kicbase/entrypoint#L76-L81 213 // Docker Desktop with WSL2 also has filesystems unmounted. 214 // 215 // If dryRun is true, autoMount will only check if the filesystems need to be 216 // mounted. 217 // Returns the list of filesystems that need to be mounted. 218 func autoMountFilesystems(dryRun bool) ([]string, error) { 219 var mountsSuggested []string 220 221 fs := []struct { 222 name string 223 paths []string 224 magic int64 225 suggest bool // suggest mounting this filesystem 226 }{ 227 { 228 "bpf", 229 []string{"/sys/fs/bpf"}, 230 unix.BPF_FS_MAGIC, 231 false, // do not make 'ig --auto-mount-filesystems=false' fail if bpffs is not mounted 232 }, 233 { 234 "debugfs", 235 []string{"/sys/kernel/debug"}, 236 unix.DEBUGFS_MAGIC, 237 true, 238 }, 239 { 240 "tracefs", 241 []string{"/sys/kernel/tracing", "/sys/kernel/debug/tracing"}, 242 unix.TRACEFS_MAGIC, 243 true, 244 }, 245 } 246 247 filesystemLoop: 248 for _, f := range fs { 249 var statfs unix.Statfs_t 250 for _, path := range f.paths { 251 if err := unix.Statfs(path, &statfs); err != nil { 252 log.Debugf("statfs returned error on %s: %s", path, err) 253 continue 254 } 255 if statfs.Type == f.magic { 256 log.Debugf("%s already mounted", f.name) 257 continue filesystemLoop 258 } 259 } 260 261 if f.suggest { 262 mountsSuggested = append(mountsSuggested, f.name) 263 } 264 if dryRun { 265 continue 266 } 267 268 if err := unix.Mount("none", f.paths[0], f.name, 0, ""); err != nil { 269 return mountsSuggested, fmt.Errorf("mounting %s: %w", f.paths[0], err) 270 } 271 log.Debugf("%s mounted (%s)", f.name, f.paths[0]) 272 } 273 return mountsSuggested, nil 274 } 275 276 func suggestWSLWorkaround() error { 277 var utsname unix.Utsname 278 err := unix.Uname(&utsname) 279 if err != nil { 280 return err 281 } 282 release := unix.ByteSliceToString(utsname.Release[:]) 283 if !strings.HasSuffix(release, "-WSL2") { 284 return nil 285 } 286 287 // If /host/proc is correctly set up, we don't need this workaround 288 target, err := os.Readlink(HostProcFs + "/self") 289 if target != "" && err == nil { 290 return nil 291 } 292 293 return fmt.Errorf("%s/self not found on WSL2 (did you try --auto-wsl-workaround?)", HostProcFs) 294 } 295 296 // autoWSLWorkaround overrides HostRoot and HostProcFs if necessary. 297 // Docker Desktop with WSL2 sets up host volumes with weird pidns. 298 func autoWSLWorkaround() error { 299 // If we're not in a container, we can't use this workaround 300 if HostRoot == "/" { 301 return nil 302 } 303 304 // If /host/proc is correctly set up, we don't need this workaround 305 target, err := os.Readlink(HostProcFs + "/self") 306 if target != "" && err == nil { 307 return nil 308 } 309 310 log.Warnf("%s's pidns is neither the current pidns or a parent of the current pidns. Remounting.", HostProcFs) 311 err = unix.Mount("/proc", HostProcFs, "", unix.MS_BIND, "") 312 if err != nil { 313 return fmt.Errorf("remounting %s: %w", HostProcFs, err) 314 } 315 // Find lifecycle-server process and set HOST_PID to its root 316 processes, err := os.ReadDir(HostProcFs) 317 if err != nil { 318 return fmt.Errorf("reading %s: %w", HostProcFs, err) 319 } 320 for _, p := range processes { 321 if !p.IsDir() { 322 continue 323 } 324 325 pid, err := strconv.Atoi(p.Name()) 326 if err != nil { 327 continue 328 } 329 330 cmdLine := GetProcCmdline(pid) 331 if cmdLine[0] != "/usr/bin/lifecycle-server" { 332 continue 333 } 334 log.Debugf("Found lifecycle-server process %s", p.Name()) 335 336 buf, err := os.ReadFile(fmt.Sprintf("/proc/%s/cgroup", p.Name())) 337 if err != nil { 338 continue 339 } 340 if !strings.Contains(string(buf), "/podruntime/docker") { 341 continue 342 } 343 log.Debugf("Found lifecycle-server process %s in cgroup /podruntime/docker", p.Name()) 344 345 HostRoot = fmt.Sprintf("/proc/%s/root/", p.Name()) 346 HostProcFs = filepath.Join(HostRoot, "/proc") 347 log.Warnf("Overriding HostRoot=%s HostProcFs=%s (lifecycle-server)", HostRoot, HostProcFs) 348 349 return nil 350 } 351 352 return errors.New("lifecycle-server process not found") 353 }