github.com/mvdan/u-root-coreutils@v0.0.0-20230122170626-c2eef2898555/cmds/exp/pflask/pflask.go (about) 1 // Copyright 2015-2017 the u-root Authors. All rights reserved 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package main 6 7 import ( 8 "flag" 9 "fmt" 10 "io" 11 "log" 12 "os" 13 "os/exec" 14 "path/filepath" 15 "strconv" 16 "strings" 17 "syscall" 18 "time" 19 "unsafe" 20 21 // "github.com/mvdan/u-root-coreutils/pkg/termios" 22 "golang.org/x/sys/unix" 23 ) 24 25 // pty support. We used to import github.com/kr/pty but what we need is not that complex. 26 // Thanks to keith rarick for these functions. 27 28 func ptsopen() (controlPTY, processTTY *os.File, ttyname string, err error) { 29 p, err := os.OpenFile("/dev/ptmx", os.O_RDWR, 0) 30 if err != nil { 31 return 32 } 33 34 ttyname, err = ptsname(p) 35 if err != nil { 36 return 37 } 38 39 err = ptsunlock(p) 40 if err != nil { 41 return 42 } 43 44 v("OpenFile %v %x\n", ttyname, os.O_RDWR|syscall.O_NOCTTY) 45 t, err := os.OpenFile(ttyname, os.O_RDWR|syscall.O_NOCTTY, 0) 46 if err != nil { 47 return 48 } 49 return p, t, ttyname, nil 50 } 51 52 func ptsname(f *os.File) (string, error) { 53 n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN) 54 if err != nil { 55 return "", err 56 } 57 return "/dev/pts/" + strconv.Itoa(n), nil 58 } 59 60 func ptsunlock(f *os.File) error { 61 var u uintptr 62 // use TIOCSPTLCK with a zero valued arg to clear the pty lock 63 _, _, err := syscall.Syscall(syscall.SYS_IOCTL, f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&u))) 64 if err != 0 { 65 return err 66 } 67 return nil 68 } 69 70 type cgroupname string 71 72 func (c cgroupname) apply(s string, f func(s string)) { 73 // range of strings.Split("",",") is 1. 74 // not exactly what we might expect. 75 if s == "" { 76 return 77 } 78 for _, g := range strings.Split(s, ",") { 79 p := filepath.Join(g) 80 f(p) 81 } 82 } 83 84 func (c cgroupname) Validate(s string) { 85 c.apply(s, func(s string) { 86 if st, err := os.Stat(filepath.Join(string(c), s)); err != nil { 87 log.Fatalf("%v", err) 88 } else if !st.IsDir() { 89 log.Fatalf("%s: not a directory", s) 90 } 91 }) 92 } 93 94 func (c cgroupname) Create(s, name string) { 95 if err := os.MkdirAll(filepath.Join(string(c), s, name), 0o755); err != nil { 96 log.Fatal(err) 97 } 98 } 99 100 func (c cgroupname) Attach(s, name string, pid int) { 101 t := filepath.Join(string(c), s, name, "tasks") 102 b := []byte(fmt.Sprintf("%v", pid)) 103 if err := os.WriteFile(t, b, 0o600); err != nil { 104 log.Fatal(err) 105 } 106 } 107 108 func (c cgroupname) Destroy(s, n string) { 109 if err := os.RemoveAll(filepath.Join(string(c), s, n)); err != nil { 110 log.Fatal(err) 111 } 112 } 113 114 func (c cgroupname) Do(groups string, pid int) { 115 cgn := fmt.Sprintf("pflask.%d", pid) 116 c.apply(groups, func(s string) { 117 c.Create(s, cgn) 118 c.Attach(s, cgn, pid) 119 }) 120 } 121 122 type mount struct { 123 src, dst, mtype, opts string 124 flags uintptr 125 dir bool 126 needPrivilege bool 127 } 128 129 // Add adds a mount to the global mountlist. Don't know if we need it, but we might for additional volumes? 130 func Add(src, dst, mtype, opts string, flags uintptr, dir bool) { 131 mounts = append(mounts, mount{src: src, dst: dst, mtype: mtype, flags: flags, opts: opts, dir: dir}) 132 } 133 134 // One mounts one mountpoint, using base as a prefix for the destination. 135 // If anything goes wrong, we just bail out; we've privatized the namespace 136 // so there is no cleanup we need to do. 137 func (m *mount) One(base string) { 138 dst := filepath.Join(base, m.dst) 139 if m.dir { 140 if err := os.MkdirAll(dst, 0o755); err != nil { 141 log.Fatalf("One: mkdirall %v: %v", m.dst, err) 142 } 143 } 144 if err := syscall.Mount(m.src, dst, m.mtype, m.flags, m.opts); err != nil { 145 log.Fatalf("Mount :%s: on :%s: type :%s: flags %x: opts :%v: %v\n", 146 m.src, m.dst, m.mtype, m.flags, m.opts, err) 147 } 148 } 149 150 // MountAll mounts all the mount points. root is a bit special in that it just sets 151 // needed flags for non-shared mounts. 152 func MountAll(base string, unprivileged bool) { 153 root.One("") 154 for _, m := range mounts { 155 if m.needPrivilege && unprivileged { 156 continue 157 } 158 m.One(base) 159 } 160 } 161 162 // modedev returns a mode and dev suitable for use in mknod. 163 // It's very odd, but the Dev either needs to be byteswapped 164 // or comes back byteswapped. I just love it that the world 165 // has fixed on a 45-year-old ABI (stat in this case) 166 // that was abandoned by its designers 30 years ago. 167 // Oh well. 168 func modedev(st os.FileInfo) (uint32, int) { 169 // Weird. The Dev is byte-swapped for some reason. 170 dev := int(st.Sys().(*syscall.Stat_t).Dev) 171 devlo := dev & 0xff 172 dev >>= 8 173 dev |= (devlo << 8) 174 return uint32(st.Sys().(*syscall.Stat_t).Mode), dev 175 } 176 177 // makeConsole sets the right modes for the real console, then creates 178 // a /dev/console in the chroot. 179 func makeConsole(base, console string, unprivileged bool) { 180 if err := os.Chmod(console, 0o600); err != nil { 181 log.Printf("%v", err) 182 } 183 if err := os.Chown(console, 0, 0); err != nil { 184 log.Printf("%v", err) 185 } 186 187 st, err := os.Stat(console) 188 if err != nil { 189 log.Printf("%v", err) 190 } 191 192 nn := filepath.Join(base, "/dev/console") 193 mode, dev := modedev(st) 194 if unprivileged { 195 // In unprivileged uses, we can't mknod /dev/console, however, 196 // we can just create a file /dev/console and use bind mount on file. 197 if _, err := os.Stat(nn); err != nil { 198 os.WriteFile(nn, []byte{}, 0o600) // best effort, ignore error 199 } 200 } else { 201 if err := syscall.Mknod(nn, mode, dev); err != nil { 202 log.Printf("%v", err) 203 } 204 } 205 206 // if any previous steps failed, this one will too, so we can bail here. 207 if err := syscall.Mount(console, nn, "", syscall.MS_BIND, ""); err != nil { 208 log.Fatalf("Mount :%s: on :%s: flags %v: %v", 209 console, nn, syscall.MS_BIND, err) 210 } 211 } 212 213 // copyNodes makes copies of needed nodes in the chroot. 214 func copyNodes(base string) { 215 nodes := []string{ 216 "/dev/tty", 217 "/dev/full", 218 "/dev/null", 219 "/dev/zero", 220 "/dev/random", 221 "/dev/urandom", 222 } 223 224 for _, n := range nodes { 225 st, err := os.Stat(n) 226 if err != nil { 227 log.Printf("%v", err) 228 } 229 nn := filepath.Join(base, n) 230 mode, dev := modedev(st) 231 if err := syscall.Mknod(nn, mode, dev); err != nil { 232 log.Printf("%v", err) 233 } 234 } 235 } 236 237 // makePtmx creates /dev/ptmx in the root. Because of order of operations 238 // it has to happen at a different time than copyNodes. 239 func makePtmx(base string) { 240 dst := filepath.Join(base, "/dev/ptmx") 241 242 if _, err := os.Stat(dst); err == nil { 243 return 244 } 245 246 if err := os.Symlink("/dev/pts/ptmx", dst); err != nil { 247 log.Printf("%v", err) 248 } 249 } 250 251 // makeSymlinks sets up standard symlinks as found in /dev. 252 func makeSymlinks(base string) { 253 linkit := []struct { 254 src, dst string 255 }{ 256 {"/dev/pts/ptmx", "/dev/ptmx"}, 257 {"/proc/kcore", "/dev/core"}, 258 {"/proc/self/fd", "/dev/fd"}, 259 {"/proc/self/fd/0", "/dev/stdin"}, 260 {"/proc/self/fd/1", "/dev/stdout"}, 261 {"/proc/self/fd/2", "/dev/stderr"}, 262 } 263 264 for i := range linkit { 265 dst := filepath.Join(base, linkit[i].dst) 266 267 if _, err := os.Stat(dst); err == nil { 268 continue 269 } 270 271 if err := os.Symlink(linkit[i].src, dst); err != nil { 272 log.Printf("%v", err) 273 } 274 } 275 } 276 277 var ( 278 cgpath = flag.String("cgpath", "/sys/fs/cgroup", "set the cgroups") 279 cgroup = flag.String("cgroup", "", "set the cgroups") 280 mnt = flag.String("mount", "", "define mounts") 281 chroot = flag.String("chroot", "", "where to chroot to") 282 chdir = flag.String("chdir", "/", "where to chrdir to in the chroot") 283 console = flag.String("console", "/dev/console", "where the console is") 284 keepenv = flag.Bool("keepenv", false, "Keep the environment") 285 debug = flag.Bool("d", false, "Enable debug logs") 286 env = flag.String("env", "", "other environment variables") 287 user = flag.String("user", "root" /*user.User.Username*/, "User name") 288 root = &mount{"", "/", "", "", syscall.MS_SLAVE | syscall.MS_REC, false, false} 289 mounts = []mount{ 290 {"proc", "/proc", "proc", "", syscall.MS_NOSUID | syscall.MS_NOEXEC | syscall.MS_NODEV, true, false}, 291 {"/proc/sys", "/proc/sys", "", "", syscall.MS_BIND, true, true}, 292 {"", "/proc/sys", "", "", syscall.MS_BIND | syscall.MS_RDONLY | syscall.MS_REMOUNT, true, true}, 293 {"sysfs", "/sys", "sysfs", "", syscall.MS_NOSUID | syscall.MS_NOEXEC | syscall.MS_NODEV | syscall.MS_RDONLY, true, true}, 294 {"tmpfs", "/dev", "tmpfs", "mode=755", syscall.MS_NOSUID | syscall.MS_STRICTATIME, true, true}, // unprivileged system needs a pre-populated /dev 295 {"devpts", "/dev/pts", "devpts", "newinstance,ptmxmode=0660,mode=0620", syscall.MS_NOSUID | syscall.MS_NOEXEC, true, false}, 296 {"tmpfs", "/dev/shm", "tmpfs", "mode=1777", syscall.MS_NOSUID | syscall.MS_STRICTATIME | syscall.MS_NODEV, true, false}, 297 {"tmpfs", "/run", "tmpfs", "mode=755", syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_STRICTATIME, true, false}, 298 } 299 v = func(string, ...interface{}) {} 300 ) 301 302 func main() { 303 flag.Parse() 304 if *debug { 305 v = log.Printf 306 } 307 v("pflask: Let's go!") 308 309 if len(flag.Args()) < 1 { 310 v("pflask: no args given") 311 os.Exit(1) 312 } 313 314 // note the unshare system call worketh not for Go. 315 // So do it ourselves. We have to start ourselves up again, 316 // after having spawned ourselves with lots of clone 317 // flags sets. To know that we spawned ourselves we add '#' 318 // as the last arg. # was chosen because shells normally filter 319 // it out, so its presence as our last arg is highly indicative 320 // that we really spawned us. Also, for testing, you can always 321 // pass it by hand to see what the namespace looks like. 322 a := os.Args 323 if a[len(a)-1][0] != '#' { 324 a = append(a, "#") 325 euid := syscall.Geteuid() 326 v("Running as user %v\n", euid) 327 if euid != 0 { 328 a[len(a)-1] = "#u" 329 } 330 if *debug { 331 testc := exec.Command("/bbin/echo", " ===== cmd test") 332 testc.Stdout = os.Stdout 333 testc.Run() 334 testc = exec.Command("/bbin/ls", a[0]) 335 testc.Stdout = os.Stdout 336 testc.SysProcAttr = &syscall.SysProcAttr{Cloneflags: 0} 337 testc.SysProcAttr.Cloneflags |= syscall.CLONE_NEWNS 338 testc.SysProcAttr.Cloneflags |= syscall.CLONE_NEWUTS 339 testc.SysProcAttr.Cloneflags |= syscall.CLONE_NEWIPC 340 testc.SysProcAttr.Cloneflags |= syscall.CLONE_NEWPID 341 if err := testc.Run(); err != nil { 342 log.Printf("Could not run:\n %v\n %v\n", testc, err.Error()) 343 } 344 } 345 // spawn ourselves with the right unsharing settings. 346 c := exec.Command(a[0], a[1:]...) 347 c.SysProcAttr = &syscall.SysProcAttr{Cloneflags: syscall.CLONE_NEWNS | syscall.CLONE_NEWUTS | syscall.CLONE_NEWIPC | syscall.CLONE_NEWPID} 348 c.SysProcAttr.Cloneflags |= syscall.CLONE_NEWNET 349 350 if euid != 0 { 351 c.SysProcAttr.Cloneflags |= syscall.CLONE_NEWUSER 352 c.SysProcAttr.UidMappings = []syscall.SysProcIDMap{{ContainerID: 0, HostID: syscall.Getuid(), Size: 1}} 353 c.SysProcAttr.GidMappings = []syscall.SysProcIDMap{{ContainerID: 0, HostID: syscall.Getgid(), Size: 1}} 354 } 355 c.Stdin = os.Stdin 356 c.Stdout = os.Stdout 357 c.Stderr = os.Stderr 358 //t, err := termios.GetTermios(1) 359 //if err != nil { 360 // log.Fatalf("Can't get termios on fd 1: %v", err) 361 //} 362 v("pflask: respawning...") 363 if err := c.Run(); err != nil { 364 log.Printf("Could not run:\n %v\n %v\n", c, err.Error()) 365 if strings.Contains(err.Error(), "invalid argument") { 366 log.Println("Ensure that your kernel is configured for CGROUPs and NS.") 367 log.Println("The following are needed: IPC, PID, USER, UTS") 368 } 369 if strings.Contains(err.Error(), "device or resource busy") { 370 log.Println("No clue...") 371 } 372 } 373 //if err := termios.SetTermios(1, t); err != nil { 374 // log.Printf("Can't reset termios on fd1: %v", err) 375 //} 376 os.Exit(1) 377 } 378 379 unprivileged := a[len(a)-1] == "#u" 380 381 // unlike the original pflask, we require that you set a chroot. 382 // If you make it /, strange things are bound to happen. 383 // if that is too limiting we'll have to change this. 384 if *chroot == "" { 385 log.Fatalf("you are required to set the chroot via -chroot") 386 } 387 if *chroot == "/" { 388 log.Println("[WARN]: chroot set to /: strange things are bound to happen") 389 } 390 391 a = flag.Args() 392 v("greetings %v\n", a) 393 a = a[:len(a)-1] 394 395 v("pflask: ptsopen") 396 controlPTY, processTTY, sname, err := ptsopen() 397 if err != nil { 398 log.Fatalf(err.Error()) 399 } 400 401 // child code. Not really. What really happens here is we set 402 // ourselves into the container, and spawn the child. It's a bit odd 403 // but we're the parent, but we'll run in the container? I don't know 404 // how else to do it. This may require we set some things up first, 405 // esp. the network. But, it's all fun and games until someone loses 406 // an eye. 407 v("MountAll") 408 MountAll(*chroot, unprivileged) 409 410 if !unprivileged { 411 v("copyNodes") 412 copyNodes(*chroot) 413 } 414 415 v("makePtmx") 416 makePtmx(*chroot) 417 418 v("makeSymlinks") 419 makeSymlinks(*chroot) 420 421 v("makeConsole") 422 makeConsole(*chroot, sname, unprivileged) 423 424 // umask(0022); 425 426 /* TODO: drop capabilities */ 427 428 // do_user(user); 429 430 e := make(map[string]string) 431 if *keepenv { 432 for _, v := range os.Environ() { 433 k := strings.SplitN(v, "=", 2) 434 e[k[0]] = k[1] 435 } 436 } 437 438 term := os.Getenv("TERM") 439 e["TERM"] = term 440 e["PATH"] = "/usr/sbin:/usr/bin:/sbin:/bin" 441 e["USER"] = *user 442 e["LOGNAME"] = *user 443 e["HOME"] = "/root" 444 445 if *env != "" { 446 for _, c := range strings.Split(*env, ",") { 447 k := strings.SplitN(c, "=", 2) 448 if len(k) != 2 { 449 log.Printf("Bogus environment string %v", c) 450 continue 451 } 452 e[k[0]] = k[1] 453 } 454 } 455 e["container"] = "pflask" 456 457 if *cgroup == "" { 458 var envs []string 459 for k, v := range e { 460 envs = append(envs, k+"="+v) 461 } 462 v("envs\n %v\n", e) 463 v("-- chroot --") 464 if err := syscall.Chroot(*chroot); err != nil { 465 log.Fatal(err) 466 } 467 v("--- chdir --") 468 if err := syscall.Chdir(*chdir); err != nil { 469 log.Fatal(err) 470 } 471 v("---- exec --") 472 log.Fatal(syscall.Exec(a[0], a[1:], envs)) 473 } 474 475 v("exec.Command") 476 c := exec.Command(a[0], a[1:]...) 477 c.Env = nil 478 for k, v := range e { 479 c.Env = append(c.Env, k+"="+v) 480 } 481 482 c.SysProcAttr = &syscall.SysProcAttr{ 483 Chroot: *chroot, 484 Setctty: true, 485 Setsid: true, 486 } 487 c.Stdout = processTTY 488 c.Stdin = processTTY 489 c.Stderr = c.Stdout 490 c.SysProcAttr.Setctty = true 491 c.SysProcAttr.Setsid = true 492 c.SysProcAttr.Ptrace = true 493 c.Dir = *chdir 494 err = c.Start() 495 if err != nil { 496 panic(err) 497 } 498 kid := c.Process.Pid 499 log.Printf("Started %d\n", kid) 500 501 // set up the containers, then resume the process. 502 // Its children will get the containers as it clones. 503 504 cg := cgroupname(*cgpath) 505 cg.Do(*cgroup, kid) 506 507 // sometimes the detach fails. Looks like a race condition: we're 508 // sending the detach before the child has hit the TRACE_ME point. 509 // Experimentally, when it fails, even one seconds it too short to 510 // sleep. Sleep for 5 seconds. 511 // Oh well it's not that. It's that there is some one of these 512 // processes not in the PID namespace of the child? Who knows, sigh. 513 // This is an aspect of the Go runtime that is seriously broken. 514 515 for i := 0; ; i++ { 516 if err = syscall.PtraceDetach(kid); err != nil { 517 log.Printf("Could not detach %v, sleeping 250 milliseconds", kid) 518 time.Sleep(250 * time.Millisecond) 519 continue 520 } 521 if i > 100 { 522 log.Fatalf("Tried for 10 seconds to get a DETACH. Let's fix the go runtime someday") 523 } 524 break 525 } 526 527 raw() 528 529 go func() { 530 io.Copy(os.Stdout, controlPTY) 531 os.Exit(1) 532 }() 533 io.Copy(controlPTY, os.Stdin) 534 }