github.com/mvdan/u-root-coreutils@v0.0.0-20230122170626-c2eef2898555/cmds/exp/pflask/pflask.go (about)

     1  // Copyright 2015-2017 the u-root Authors. All rights reserved
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"flag"
     9  	"fmt"
    10  	"io"
    11  	"log"
    12  	"os"
    13  	"os/exec"
    14  	"path/filepath"
    15  	"strconv"
    16  	"strings"
    17  	"syscall"
    18  	"time"
    19  	"unsafe"
    20  
    21  	// "github.com/mvdan/u-root-coreutils/pkg/termios"
    22  	"golang.org/x/sys/unix"
    23  )
    24  
    25  // pty support. We used to import github.com/kr/pty but what we need is not that complex.
    26  // Thanks to keith rarick for these functions.
    27  
    28  func ptsopen() (controlPTY, processTTY *os.File, ttyname string, err error) {
    29  	p, err := os.OpenFile("/dev/ptmx", os.O_RDWR, 0)
    30  	if err != nil {
    31  		return
    32  	}
    33  
    34  	ttyname, err = ptsname(p)
    35  	if err != nil {
    36  		return
    37  	}
    38  
    39  	err = ptsunlock(p)
    40  	if err != nil {
    41  		return
    42  	}
    43  
    44  	v("OpenFile %v %x\n", ttyname, os.O_RDWR|syscall.O_NOCTTY)
    45  	t, err := os.OpenFile(ttyname, os.O_RDWR|syscall.O_NOCTTY, 0)
    46  	if err != nil {
    47  		return
    48  	}
    49  	return p, t, ttyname, nil
    50  }
    51  
    52  func ptsname(f *os.File) (string, error) {
    53  	n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN)
    54  	if err != nil {
    55  		return "", err
    56  	}
    57  	return "/dev/pts/" + strconv.Itoa(n), nil
    58  }
    59  
    60  func ptsunlock(f *os.File) error {
    61  	var u uintptr
    62  	// use TIOCSPTLCK with a zero valued arg to clear the pty lock
    63  	_, _, err := syscall.Syscall(syscall.SYS_IOCTL, f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&u)))
    64  	if err != 0 {
    65  		return err
    66  	}
    67  	return nil
    68  }
    69  
    70  type cgroupname string
    71  
    72  func (c cgroupname) apply(s string, f func(s string)) {
    73  	// range of strings.Split("",",") is 1.
    74  	// not exactly what we might expect.
    75  	if s == "" {
    76  		return
    77  	}
    78  	for _, g := range strings.Split(s, ",") {
    79  		p := filepath.Join(g)
    80  		f(p)
    81  	}
    82  }
    83  
    84  func (c cgroupname) Validate(s string) {
    85  	c.apply(s, func(s string) {
    86  		if st, err := os.Stat(filepath.Join(string(c), s)); err != nil {
    87  			log.Fatalf("%v", err)
    88  		} else if !st.IsDir() {
    89  			log.Fatalf("%s: not a directory", s)
    90  		}
    91  	})
    92  }
    93  
    94  func (c cgroupname) Create(s, name string) {
    95  	if err := os.MkdirAll(filepath.Join(string(c), s, name), 0o755); err != nil {
    96  		log.Fatal(err)
    97  	}
    98  }
    99  
   100  func (c cgroupname) Attach(s, name string, pid int) {
   101  	t := filepath.Join(string(c), s, name, "tasks")
   102  	b := []byte(fmt.Sprintf("%v", pid))
   103  	if err := os.WriteFile(t, b, 0o600); err != nil {
   104  		log.Fatal(err)
   105  	}
   106  }
   107  
   108  func (c cgroupname) Destroy(s, n string) {
   109  	if err := os.RemoveAll(filepath.Join(string(c), s, n)); err != nil {
   110  		log.Fatal(err)
   111  	}
   112  }
   113  
   114  func (c cgroupname) Do(groups string, pid int) {
   115  	cgn := fmt.Sprintf("pflask.%d", pid)
   116  	c.apply(groups, func(s string) {
   117  		c.Create(s, cgn)
   118  		c.Attach(s, cgn, pid)
   119  	})
   120  }
   121  
   122  type mount struct {
   123  	src, dst, mtype, opts string
   124  	flags                 uintptr
   125  	dir                   bool
   126  	needPrivilege         bool
   127  }
   128  
   129  // Add adds a mount to the global mountlist. Don't know if we need it, but we might for additional volumes?
   130  func Add(src, dst, mtype, opts string, flags uintptr, dir bool) {
   131  	mounts = append(mounts, mount{src: src, dst: dst, mtype: mtype, flags: flags, opts: opts, dir: dir})
   132  }
   133  
   134  // One mounts one mountpoint, using base as a prefix for the destination.
   135  // If anything goes wrong, we just bail out; we've privatized the namespace
   136  // so there is no cleanup we need to do.
   137  func (m *mount) One(base string) {
   138  	dst := filepath.Join(base, m.dst)
   139  	if m.dir {
   140  		if err := os.MkdirAll(dst, 0o755); err != nil {
   141  			log.Fatalf("One: mkdirall %v: %v", m.dst, err)
   142  		}
   143  	}
   144  	if err := syscall.Mount(m.src, dst, m.mtype, m.flags, m.opts); err != nil {
   145  		log.Fatalf("Mount :%s: on :%s: type :%s: flags %x: opts :%v: %v\n",
   146  			m.src, m.dst, m.mtype, m.flags, m.opts, err)
   147  	}
   148  }
   149  
   150  // MountAll mounts all the mount points. root is a bit special in that it just sets
   151  // needed flags for non-shared mounts.
   152  func MountAll(base string, unprivileged bool) {
   153  	root.One("")
   154  	for _, m := range mounts {
   155  		if m.needPrivilege && unprivileged {
   156  			continue
   157  		}
   158  		m.One(base)
   159  	}
   160  }
   161  
   162  // modedev returns a mode and dev suitable for use in mknod.
   163  // It's very odd, but the Dev either needs to be byteswapped
   164  // or comes back byteswapped. I just love it that the world
   165  // has fixed on a 45-year-old ABI (stat in this case)
   166  // that was abandoned by its designers 30 years ago.
   167  // Oh well.
   168  func modedev(st os.FileInfo) (uint32, int) {
   169  	// Weird. The Dev is byte-swapped for some reason.
   170  	dev := int(st.Sys().(*syscall.Stat_t).Dev)
   171  	devlo := dev & 0xff
   172  	dev >>= 8
   173  	dev |= (devlo << 8)
   174  	return uint32(st.Sys().(*syscall.Stat_t).Mode), dev
   175  }
   176  
   177  // makeConsole sets the right modes for the real console, then creates
   178  // a /dev/console in the chroot.
   179  func makeConsole(base, console string, unprivileged bool) {
   180  	if err := os.Chmod(console, 0o600); err != nil {
   181  		log.Printf("%v", err)
   182  	}
   183  	if err := os.Chown(console, 0, 0); err != nil {
   184  		log.Printf("%v", err)
   185  	}
   186  
   187  	st, err := os.Stat(console)
   188  	if err != nil {
   189  		log.Printf("%v", err)
   190  	}
   191  
   192  	nn := filepath.Join(base, "/dev/console")
   193  	mode, dev := modedev(st)
   194  	if unprivileged {
   195  		// In unprivileged uses, we can't mknod /dev/console, however,
   196  		// we can just create a file /dev/console and use bind mount on file.
   197  		if _, err := os.Stat(nn); err != nil {
   198  			os.WriteFile(nn, []byte{}, 0o600) // best effort, ignore error
   199  		}
   200  	} else {
   201  		if err := syscall.Mknod(nn, mode, dev); err != nil {
   202  			log.Printf("%v", err)
   203  		}
   204  	}
   205  
   206  	// if any previous steps failed, this one will too, so we can bail here.
   207  	if err := syscall.Mount(console, nn, "", syscall.MS_BIND, ""); err != nil {
   208  		log.Fatalf("Mount :%s: on :%s: flags %v: %v",
   209  			console, nn, syscall.MS_BIND, err)
   210  	}
   211  }
   212  
   213  // copyNodes makes copies of needed nodes in the chroot.
   214  func copyNodes(base string) {
   215  	nodes := []string{
   216  		"/dev/tty",
   217  		"/dev/full",
   218  		"/dev/null",
   219  		"/dev/zero",
   220  		"/dev/random",
   221  		"/dev/urandom",
   222  	}
   223  
   224  	for _, n := range nodes {
   225  		st, err := os.Stat(n)
   226  		if err != nil {
   227  			log.Printf("%v", err)
   228  		}
   229  		nn := filepath.Join(base, n)
   230  		mode, dev := modedev(st)
   231  		if err := syscall.Mknod(nn, mode, dev); err != nil {
   232  			log.Printf("%v", err)
   233  		}
   234  	}
   235  }
   236  
   237  // makePtmx creates /dev/ptmx in the root. Because of order of operations
   238  // it has to happen at a different time than copyNodes.
   239  func makePtmx(base string) {
   240  	dst := filepath.Join(base, "/dev/ptmx")
   241  
   242  	if _, err := os.Stat(dst); err == nil {
   243  		return
   244  	}
   245  
   246  	if err := os.Symlink("/dev/pts/ptmx", dst); err != nil {
   247  		log.Printf("%v", err)
   248  	}
   249  }
   250  
   251  // makeSymlinks sets up standard symlinks as found in /dev.
   252  func makeSymlinks(base string) {
   253  	linkit := []struct {
   254  		src, dst string
   255  	}{
   256  		{"/dev/pts/ptmx", "/dev/ptmx"},
   257  		{"/proc/kcore", "/dev/core"},
   258  		{"/proc/self/fd", "/dev/fd"},
   259  		{"/proc/self/fd/0", "/dev/stdin"},
   260  		{"/proc/self/fd/1", "/dev/stdout"},
   261  		{"/proc/self/fd/2", "/dev/stderr"},
   262  	}
   263  
   264  	for i := range linkit {
   265  		dst := filepath.Join(base, linkit[i].dst)
   266  
   267  		if _, err := os.Stat(dst); err == nil {
   268  			continue
   269  		}
   270  
   271  		if err := os.Symlink(linkit[i].src, dst); err != nil {
   272  			log.Printf("%v", err)
   273  		}
   274  	}
   275  }
   276  
   277  var (
   278  	cgpath  = flag.String("cgpath", "/sys/fs/cgroup", "set the cgroups")
   279  	cgroup  = flag.String("cgroup", "", "set the cgroups")
   280  	mnt     = flag.String("mount", "", "define mounts")
   281  	chroot  = flag.String("chroot", "", "where to chroot to")
   282  	chdir   = flag.String("chdir", "/", "where to chrdir to in the chroot")
   283  	console = flag.String("console", "/dev/console", "where the console is")
   284  	keepenv = flag.Bool("keepenv", false, "Keep the environment")
   285  	debug   = flag.Bool("d", false, "Enable debug logs")
   286  	env     = flag.String("env", "", "other environment variables")
   287  	user    = flag.String("user", "root" /*user.User.Username*/, "User name")
   288  	root    = &mount{"", "/", "", "", syscall.MS_SLAVE | syscall.MS_REC, false, false}
   289  	mounts  = []mount{
   290  		{"proc", "/proc", "proc", "", syscall.MS_NOSUID | syscall.MS_NOEXEC | syscall.MS_NODEV, true, false},
   291  		{"/proc/sys", "/proc/sys", "", "", syscall.MS_BIND, true, true},
   292  		{"", "/proc/sys", "", "", syscall.MS_BIND | syscall.MS_RDONLY | syscall.MS_REMOUNT, true, true},
   293  		{"sysfs", "/sys", "sysfs", "", syscall.MS_NOSUID | syscall.MS_NOEXEC | syscall.MS_NODEV | syscall.MS_RDONLY, true, true},
   294  		{"tmpfs", "/dev", "tmpfs", "mode=755", syscall.MS_NOSUID | syscall.MS_STRICTATIME, true, true}, // unprivileged system needs a pre-populated /dev
   295  		{"devpts", "/dev/pts", "devpts", "newinstance,ptmxmode=0660,mode=0620", syscall.MS_NOSUID | syscall.MS_NOEXEC, true, false},
   296  		{"tmpfs", "/dev/shm", "tmpfs", "mode=1777", syscall.MS_NOSUID | syscall.MS_STRICTATIME | syscall.MS_NODEV, true, false},
   297  		{"tmpfs", "/run", "tmpfs", "mode=755", syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_STRICTATIME, true, false},
   298  	}
   299  	v = func(string, ...interface{}) {}
   300  )
   301  
   302  func main() {
   303  	flag.Parse()
   304  	if *debug {
   305  		v = log.Printf
   306  	}
   307  	v("pflask: Let's go!")
   308  
   309  	if len(flag.Args()) < 1 {
   310  		v("pflask: no args given")
   311  		os.Exit(1)
   312  	}
   313  
   314  	// note the unshare system call worketh not for Go.
   315  	// So do it ourselves. We have to start ourselves up again,
   316  	// after having spawned ourselves with lots of clone
   317  	// flags sets. To know that we spawned ourselves we add '#'
   318  	// as the last arg. # was chosen because shells normally filter
   319  	// it out, so its presence as our last arg is highly indicative
   320  	// that we really spawned us. Also, for testing, you can always
   321  	// pass it by hand to see what the namespace looks like.
   322  	a := os.Args
   323  	if a[len(a)-1][0] != '#' {
   324  		a = append(a, "#")
   325  		euid := syscall.Geteuid()
   326  		v("Running as user %v\n", euid)
   327  		if euid != 0 {
   328  			a[len(a)-1] = "#u"
   329  		}
   330  		if *debug {
   331  			testc := exec.Command("/bbin/echo", "    ===== cmd test")
   332  			testc.Stdout = os.Stdout
   333  			testc.Run()
   334  			testc = exec.Command("/bbin/ls", a[0])
   335  			testc.Stdout = os.Stdout
   336  			testc.SysProcAttr = &syscall.SysProcAttr{Cloneflags: 0}
   337  			testc.SysProcAttr.Cloneflags |= syscall.CLONE_NEWNS
   338  			testc.SysProcAttr.Cloneflags |= syscall.CLONE_NEWUTS
   339  			testc.SysProcAttr.Cloneflags |= syscall.CLONE_NEWIPC
   340  			testc.SysProcAttr.Cloneflags |= syscall.CLONE_NEWPID
   341  			if err := testc.Run(); err != nil {
   342  				log.Printf("Could not run:\n   %v\n    %v\n", testc, err.Error())
   343  			}
   344  		}
   345  		// spawn ourselves with the right unsharing settings.
   346  		c := exec.Command(a[0], a[1:]...)
   347  		c.SysProcAttr = &syscall.SysProcAttr{Cloneflags: syscall.CLONE_NEWNS | syscall.CLONE_NEWUTS | syscall.CLONE_NEWIPC | syscall.CLONE_NEWPID}
   348  		c.SysProcAttr.Cloneflags |= syscall.CLONE_NEWNET
   349  
   350  		if euid != 0 {
   351  			c.SysProcAttr.Cloneflags |= syscall.CLONE_NEWUSER
   352  			c.SysProcAttr.UidMappings = []syscall.SysProcIDMap{{ContainerID: 0, HostID: syscall.Getuid(), Size: 1}}
   353  			c.SysProcAttr.GidMappings = []syscall.SysProcIDMap{{ContainerID: 0, HostID: syscall.Getgid(), Size: 1}}
   354  		}
   355  		c.Stdin = os.Stdin
   356  		c.Stdout = os.Stdout
   357  		c.Stderr = os.Stderr
   358  		//t, err := termios.GetTermios(1)
   359  		//if err != nil {
   360  		//	log.Fatalf("Can't get termios on fd 1: %v", err)
   361  		//}
   362  		v("pflask: respawning...")
   363  		if err := c.Run(); err != nil {
   364  			log.Printf("Could not run:\n   %v\n    %v\n", c, err.Error())
   365  			if strings.Contains(err.Error(), "invalid argument") {
   366  				log.Println("Ensure that your kernel is configured for CGROUPs and NS.")
   367  				log.Println("The following are needed: IPC, PID, USER, UTS")
   368  			}
   369  			if strings.Contains(err.Error(), "device or resource busy") {
   370  				log.Println("No clue...")
   371  			}
   372  		}
   373  		//if err := termios.SetTermios(1, t); err != nil {
   374  		//	log.Printf("Can't reset termios on fd1: %v", err)
   375  		//}
   376  		os.Exit(1)
   377  	}
   378  
   379  	unprivileged := a[len(a)-1] == "#u"
   380  
   381  	// unlike the original pflask, we require that you set a chroot.
   382  	// If you make it /, strange things are bound to happen.
   383  	// if that is too limiting we'll have to change this.
   384  	if *chroot == "" {
   385  		log.Fatalf("you are required to set the chroot via -chroot")
   386  	}
   387  	if *chroot == "/" {
   388  		log.Println("[WARN]: chroot set to /: strange things are bound to happen")
   389  	}
   390  
   391  	a = flag.Args()
   392  	v("greetings %v\n", a)
   393  	a = a[:len(a)-1]
   394  
   395  	v("pflask: ptsopen")
   396  	controlPTY, processTTY, sname, err := ptsopen()
   397  	if err != nil {
   398  		log.Fatalf(err.Error())
   399  	}
   400  
   401  	// child code. Not really. What really happens here is we set
   402  	// ourselves into the container, and spawn the child. It's a bit odd
   403  	// but we're the parent, but we'll run in the container? I don't know
   404  	// how else to do it. This may require we set some things up first,
   405  	// esp. the network. But, it's all fun and games until someone loses
   406  	// an eye.
   407  	v("MountAll")
   408  	MountAll(*chroot, unprivileged)
   409  
   410  	if !unprivileged {
   411  		v("copyNodes")
   412  		copyNodes(*chroot)
   413  	}
   414  
   415  	v("makePtmx")
   416  	makePtmx(*chroot)
   417  
   418  	v("makeSymlinks")
   419  	makeSymlinks(*chroot)
   420  
   421  	v("makeConsole")
   422  	makeConsole(*chroot, sname, unprivileged)
   423  
   424  	// umask(0022);
   425  
   426  	/* TODO: drop capabilities */
   427  
   428  	// do_user(user);
   429  
   430  	e := make(map[string]string)
   431  	if *keepenv {
   432  		for _, v := range os.Environ() {
   433  			k := strings.SplitN(v, "=", 2)
   434  			e[k[0]] = k[1]
   435  		}
   436  	}
   437  
   438  	term := os.Getenv("TERM")
   439  	e["TERM"] = term
   440  	e["PATH"] = "/usr/sbin:/usr/bin:/sbin:/bin"
   441  	e["USER"] = *user
   442  	e["LOGNAME"] = *user
   443  	e["HOME"] = "/root"
   444  
   445  	if *env != "" {
   446  		for _, c := range strings.Split(*env, ",") {
   447  			k := strings.SplitN(c, "=", 2)
   448  			if len(k) != 2 {
   449  				log.Printf("Bogus environment string %v", c)
   450  				continue
   451  			}
   452  			e[k[0]] = k[1]
   453  		}
   454  	}
   455  	e["container"] = "pflask"
   456  
   457  	if *cgroup == "" {
   458  		var envs []string
   459  		for k, v := range e {
   460  			envs = append(envs, k+"="+v)
   461  		}
   462  		v("envs\n  %v\n", e)
   463  		v("-- chroot --")
   464  		if err := syscall.Chroot(*chroot); err != nil {
   465  			log.Fatal(err)
   466  		}
   467  		v("--- chdir --")
   468  		if err := syscall.Chdir(*chdir); err != nil {
   469  			log.Fatal(err)
   470  		}
   471  		v("---- exec --")
   472  		log.Fatal(syscall.Exec(a[0], a[1:], envs))
   473  	}
   474  
   475  	v("exec.Command")
   476  	c := exec.Command(a[0], a[1:]...)
   477  	c.Env = nil
   478  	for k, v := range e {
   479  		c.Env = append(c.Env, k+"="+v)
   480  	}
   481  
   482  	c.SysProcAttr = &syscall.SysProcAttr{
   483  		Chroot:  *chroot,
   484  		Setctty: true,
   485  		Setsid:  true,
   486  	}
   487  	c.Stdout = processTTY
   488  	c.Stdin = processTTY
   489  	c.Stderr = c.Stdout
   490  	c.SysProcAttr.Setctty = true
   491  	c.SysProcAttr.Setsid = true
   492  	c.SysProcAttr.Ptrace = true
   493  	c.Dir = *chdir
   494  	err = c.Start()
   495  	if err != nil {
   496  		panic(err)
   497  	}
   498  	kid := c.Process.Pid
   499  	log.Printf("Started %d\n", kid)
   500  
   501  	// set up the containers, then resume the process.
   502  	// Its children will get the containers as it clones.
   503  
   504  	cg := cgroupname(*cgpath)
   505  	cg.Do(*cgroup, kid)
   506  
   507  	// sometimes the detach fails. Looks like a race condition: we're
   508  	// sending the detach before the child has hit the TRACE_ME point.
   509  	// Experimentally, when it fails, even one seconds it too short to
   510  	// sleep. Sleep for 5 seconds.
   511  	// Oh well it's not that. It's that there is some one of these
   512  	// processes not in the PID namespace of the child? Who knows, sigh.
   513  	// This is an aspect of the Go runtime that is seriously broken.
   514  
   515  	for i := 0; ; i++ {
   516  		if err = syscall.PtraceDetach(kid); err != nil {
   517  			log.Printf("Could not detach %v, sleeping 250 milliseconds", kid)
   518  			time.Sleep(250 * time.Millisecond)
   519  			continue
   520  		}
   521  		if i > 100 {
   522  			log.Fatalf("Tried for 10 seconds to get a DETACH. Let's fix the go runtime someday")
   523  		}
   524  		break
   525  	}
   526  
   527  	raw()
   528  
   529  	go func() {
   530  		io.Copy(os.Stdout, controlPTY)
   531  		os.Exit(1)
   532  	}()
   533  	io.Copy(controlPTY, os.Stdin)
   534  }