github.com/u-root/u-root@v7.0.1-0.20200915234505-ad7babab0a8e+incompatible/cmds/exp/pflask/pflask.go (about)

     1  // Copyright 2015-2017 the u-root Authors. All rights reserved
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"flag"
     9  	"fmt"
    10  	"io"
    11  	"io/ioutil"
    12  	"log"
    13  	"os"
    14  	"os/exec"
    15  	"path/filepath"
    16  	"strconv"
    17  	"strings"
    18  	"syscall"
    19  	"time"
    20  
    21  	"unsafe"
    22  
    23  	// "github.com/u-root/u-root/pkg/termios"
    24  	"golang.org/x/sys/unix"
    25  )
    26  
    27  // pty support. We used to import github.com/kr/pty but what we need is not that complex.
    28  // Thanks to keith rarick for these functions.
    29  
    30  func ptsopen() (controlPTY, processTTY *os.File, ttyname string, err error) {
    31  	p, err := os.OpenFile("/dev/ptmx", os.O_RDWR, 0)
    32  	if err != nil {
    33  		return
    34  	}
    35  
    36  	ttyname, err = ptsname(p)
    37  	if err != nil {
    38  		return
    39  	}
    40  
    41  	err = ptsunlock(p)
    42  	if err != nil {
    43  		return
    44  	}
    45  
    46  	t, err := os.OpenFile(ttyname, os.O_RDWR|syscall.O_NOCTTY, 0)
    47  	if err != nil {
    48  		return
    49  	}
    50  	return p, t, ttyname, nil
    51  }
    52  
    53  func ptsname(f *os.File) (string, error) {
    54  	n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN)
    55  	if err != nil {
    56  		return "", err
    57  	}
    58  	return "/dev/pts/" + strconv.Itoa(n), nil
    59  }
    60  
    61  func ptsunlock(f *os.File) error {
    62  	var u uintptr
    63  	// use TIOCSPTLCK with a zero valued arg to clear the pty lock
    64  	_, _, err := syscall.Syscall(syscall.SYS_IOCTL, f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&u)))
    65  	if err != 0 {
    66  		return err
    67  	}
    68  	return nil
    69  }
    70  
    71  type cgroupname string
    72  
    73  func (c cgroupname) apply(s string, f func(s string)) {
    74  	// range of strings.Split("",",") is 1.
    75  	// not exactly what we might expect.
    76  	if s == "" {
    77  		return
    78  	}
    79  	for _, g := range strings.Split(s, ",") {
    80  		p := filepath.Join(g)
    81  		f(p)
    82  	}
    83  }
    84  
    85  func (c cgroupname) Validate(s string) {
    86  	c.apply(s, func(s string) {
    87  		if st, err := os.Stat(filepath.Join(string(c), s)); err != nil {
    88  			log.Fatalf("%v", err)
    89  		} else if !st.IsDir() {
    90  			log.Fatalf("%s: not a directory", s)
    91  		}
    92  	})
    93  }
    94  
    95  func (c cgroupname) Create(s, name string) {
    96  	if err := os.MkdirAll(filepath.Join(string(c), s, name), 0755); err != nil {
    97  		log.Fatal(err)
    98  	}
    99  }
   100  
   101  func (c cgroupname) Attach(s, name string, pid int) {
   102  	t := filepath.Join(string(c), s, name, "tasks")
   103  	b := []byte(fmt.Sprintf("%v", pid))
   104  	if err := ioutil.WriteFile(t, b, 0600); err != nil {
   105  		log.Fatal(err)
   106  	}
   107  }
   108  
   109  func (c cgroupname) Destroy(s, n string) {
   110  	if err := os.RemoveAll(filepath.Join(string(c), s, n)); err != nil {
   111  		log.Fatal(err)
   112  	}
   113  }
   114  
   115  func (c cgroupname) Do(groups string, pid int) {
   116  	cgn := fmt.Sprintf("pflask.%d", pid)
   117  	c.apply(groups, func(s string) {
   118  		c.Create(s, cgn)
   119  		c.Attach(s, cgn, pid)
   120  	})
   121  }
   122  
   123  type mount struct {
   124  	src, dst, mtype, opts string
   125  	flags                 uintptr
   126  	dir                   bool
   127  	needPrivilege         bool
   128  }
   129  
   130  // Add adds a mount to the global mountlist. Don't know if we need it, but we might for additional volumes?
   131  func Add(src, dst, mtype, opts string, flags uintptr, dir bool) {
   132  	mounts = append(mounts, mount{src: src, dst: dst, mtype: mtype, flags: flags, opts: opts, dir: dir})
   133  
   134  }
   135  
   136  // One mounts one mountpoint, using base as a prefix for the destination.
   137  // If anything goes wrong, we just bail out; we've privatized the namespace
   138  // so there is no cleanup we need to do.
   139  func (m *mount) One(base string) {
   140  	dst := filepath.Join(base, m.dst)
   141  	if m.dir {
   142  		if err := os.MkdirAll(dst, 0755); err != nil {
   143  			log.Fatalf("One: mkdirall %v: %v", m.dst, err)
   144  		}
   145  	}
   146  	if err := syscall.Mount(m.src, dst, m.mtype, m.flags, m.opts); err != nil {
   147  		log.Fatalf("Mount :%s: on :%s: type :%s: flags %x: opts :%v: %v\n",
   148  			m.src, m.dst, m.mtype, m.flags, m.opts, err)
   149  	}
   150  }
   151  
   152  // MountAll mounts all the mount points. root is a bit special in that it just sets
   153  // needed flags for non-shared mounts.
   154  func MountAll(base string, unprivileged bool) {
   155  	root.One("")
   156  	for _, m := range mounts {
   157  		if m.needPrivilege && unprivileged {
   158  			continue
   159  		}
   160  		m.One(base)
   161  	}
   162  }
   163  
   164  // modedev returns a mode and dev suitable for use in mknod.
   165  // It's very odd, but the Dev either needs to be byteswapped
   166  // or comes back byteswapped. I just love it that the world
   167  // has fixed on a 45-year-old ABI (stat in this case)
   168  // that was abandoned by its designers 30 years ago.
   169  // Oh well.
   170  func modedev(st os.FileInfo) (uint32, int) {
   171  	// Weird. The Dev is byte-swapped for some reason.
   172  	dev := int(st.Sys().(*syscall.Stat_t).Dev)
   173  	devlo := dev & 0xff
   174  	dev >>= 8
   175  	dev |= (devlo << 8)
   176  	return uint32(st.Sys().(*syscall.Stat_t).Mode), dev
   177  }
   178  
   179  // makeConsole sets the right modes for the real console, then creates
   180  // a /dev/console in the chroot.
   181  func makeConsole(base, console string, unprivileged bool) {
   182  	if err := os.Chmod(console, 0600); err != nil {
   183  		log.Printf("%v", err)
   184  	}
   185  	if err := os.Chown(console, 0, 0); err != nil {
   186  		log.Printf("%v", err)
   187  	}
   188  
   189  	st, err := os.Stat(console)
   190  	if err != nil {
   191  		log.Printf("%v", err)
   192  	}
   193  
   194  	nn := filepath.Join(base, "/dev/console")
   195  	mode, dev := modedev(st)
   196  	if unprivileged {
   197  		// In unprivileged uses, we can't mknod /dev/console, however,
   198  		// we can just create a file /dev/console and use bind mount on file.
   199  		if _, err := os.Stat(nn); err != nil {
   200  			ioutil.WriteFile(nn, []byte{}, 0600) // best effort, ignore error
   201  		}
   202  	} else {
   203  		if err := syscall.Mknod(nn, mode, dev); err != nil {
   204  			log.Printf("%v", err)
   205  		}
   206  	}
   207  
   208  	// if any previous steps failed, this one will too, so we can bail here.
   209  	if err := syscall.Mount(console, nn, "", syscall.MS_BIND, ""); err != nil {
   210  		log.Fatalf("Mount :%s: on :%s: flags %v: %v",
   211  			console, nn, syscall.MS_BIND, err)
   212  	}
   213  
   214  }
   215  
   216  // copyNodes makes copies of needed nodes in the chroot.
   217  func copyNodes(base string) {
   218  	nodes := []string{
   219  		"/dev/tty",
   220  		"/dev/full",
   221  		"/dev/null",
   222  		"/dev/zero",
   223  		"/dev/random",
   224  		"/dev/urandom"}
   225  
   226  	for _, n := range nodes {
   227  		st, err := os.Stat(n)
   228  		if err != nil {
   229  			log.Printf("%v", err)
   230  		}
   231  		nn := filepath.Join(base, n)
   232  		mode, dev := modedev(st)
   233  		if err := syscall.Mknod(nn, mode, dev); err != nil {
   234  			log.Printf("%v", err)
   235  		}
   236  	}
   237  }
   238  
   239  // makePtmx creates /dev/ptmx in the root. Because of order of operations
   240  // it has to happen at a different time than copyNodes.
   241  func makePtmx(base string) {
   242  	dst := filepath.Join(base, "/dev/ptmx")
   243  
   244  	if _, err := os.Stat(dst); err == nil {
   245  		return
   246  	}
   247  
   248  	if err := os.Symlink("/dev/pts/ptmx", dst); err != nil {
   249  		log.Printf("%v", err)
   250  	}
   251  }
   252  
   253  // makeSymlinks sets up standard symlinks as found in /dev.
   254  func makeSymlinks(base string) {
   255  	linkit := []struct {
   256  		src, dst string
   257  	}{
   258  		{"/dev/pts/ptmx", "/dev/ptmx"},
   259  		{"/proc/kcore", "/dev/core"},
   260  		{"/proc/self/fd", "/dev/fd"},
   261  		{"/proc/self/fd/0", "/dev/stdin"},
   262  		{"/proc/self/fd/1", "/dev/stdout"},
   263  		{"/proc/self/fd/2", "/dev/stderr"},
   264  	}
   265  
   266  	for i := range linkit {
   267  		dst := filepath.Join(base, linkit[i].dst)
   268  
   269  		if _, err := os.Stat(dst); err == nil {
   270  			continue
   271  		}
   272  
   273  		if err := os.Symlink(linkit[i].src, dst); err != nil {
   274  			log.Printf("%v", err)
   275  		}
   276  	}
   277  }
   278  
   279  var (
   280  	cgpath  = flag.String("cgpath", "/sys/fs/cgroup", "set the cgroups")
   281  	cgroup  = flag.String("cgroup", "", "set the cgroups")
   282  	mnt     = flag.String("mount", "", "define mounts")
   283  	chroot  = flag.String("chroot", "", "where to chroot to")
   284  	chdir   = flag.String("chdir", "/", "where to chrdir to in the chroot")
   285  	console = flag.String("console", "/dev/console", "where the console is")
   286  	keepenv = flag.Bool("keepenv", false, "Keep the environment")
   287  	env     = flag.String("env", "", "other environment variables")
   288  	user    = flag.String("user", "root" /*user.User.Username*/, "User name")
   289  	root    = &mount{"", "/", "", "", syscall.MS_SLAVE | syscall.MS_REC, false, false}
   290  	mounts  = []mount{
   291  		{"proc", "/proc", "proc", "", syscall.MS_NOSUID | syscall.MS_NOEXEC | syscall.MS_NODEV, true, false},
   292  		{"/proc/sys", "/proc/sys", "", "", syscall.MS_BIND, true, true},
   293  		{"", "/proc/sys", "", "", syscall.MS_BIND | syscall.MS_RDONLY | syscall.MS_REMOUNT, true, true},
   294  		{"sysfs", "/sys", "sysfs", "", syscall.MS_NOSUID | syscall.MS_NOEXEC | syscall.MS_NODEV | syscall.MS_RDONLY, true, true},
   295  		{"tmpfs", "/dev", "tmpfs", "mode=755", syscall.MS_NOSUID | syscall.MS_STRICTATIME, true, true}, // unprivileged system needs a pre-populated /dev
   296  		{"devpts", "/dev/pts", "devpts", "newinstance,ptmxmode=0660,mode=0620", syscall.MS_NOSUID | syscall.MS_NOEXEC, true, false},
   297  		{"tmpfs", "/dev/shm", "tmpfs", "mode=1777", syscall.MS_NOSUID | syscall.MS_STRICTATIME | syscall.MS_NODEV, true, false},
   298  		{"tmpfs", "/run", "tmpfs", "mode=755", syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_STRICTATIME, true, false},
   299  	}
   300  )
   301  
   302  func main() {
   303  	flag.Parse()
   304  
   305  	if len(flag.Args()) < 1 {
   306  		os.Exit(1)
   307  	}
   308  
   309  	// note the unshare system call worketh not for Go.
   310  	// So do it ourselves. We have to start ourselves up again,
   311  	// after having spawned ourselves with lots of clone
   312  	// flags sets. To know that we spawned ourselves we add '#'
   313  	// as the last arg. # was chosen because shells normally filter
   314  	// it out, so its presence as our last arg is highly indicative
   315  	// that we really spawned us. Also, for testing, you can always
   316  	// pass it by hand to see what the namespace looks like.
   317  	a := os.Args
   318  	if a[len(a)-1][0] != '#' {
   319  		a = append(a, "#")
   320  		if syscall.Geteuid() != 0 {
   321  			a[len(a)-1] = "#u"
   322  		}
   323  		// spawn ourselves with the right unsharing settings.
   324  		c := exec.Command(a[0], a[1:]...)
   325  		c.SysProcAttr = &syscall.SysProcAttr{Cloneflags: syscall.CLONE_NEWNS | syscall.CLONE_NEWUTS | syscall.CLONE_NEWIPC | syscall.CLONE_NEWPID}
   326  		//		c.SysProcAttr.Cloneflags |= syscall.CLONE_NEWNET
   327  
   328  		if syscall.Geteuid() != 0 {
   329  			c.SysProcAttr.Cloneflags |= syscall.CLONE_NEWUSER
   330  			// Interesting. Won't build statically?
   331  			//c.SysProcAttr.UidMappings = []syscall.SysProcIDMap{{ContainerID: 0, HostID: syscall.Getuid(), Size: 1}}
   332  			//c.SysProcAttr.GidMappings = []syscall.SysProcIDMap{{ContainerID: 0, HostID: syscall.Getgid(), Size: 1}}
   333  		}
   334  
   335  		c.Stdin = os.Stdin
   336  		c.Stdout = os.Stdout
   337  		c.Stderr = os.Stderr
   338  		//t, err := termios.GetTermios(1)
   339  		//if err != nil {
   340  		//	log.Fatalf("Can't get termios on fd 1: %v", err)
   341  		//}
   342  		if err := c.Run(); err != nil {
   343  			log.Printf(err.Error())
   344  		}
   345  		//if err := termios.SetTermios(1, t); err != nil {
   346  		//	log.Printf("Can't reset termios on fd1: %v", err)
   347  		//}
   348  		os.Exit(1)
   349  	}
   350  
   351  	unprivileged := a[len(a)-1] == "#u"
   352  
   353  	// unlike pflask, we require that you set a chroot.
   354  	// If you make it /, strange things are bound to happen.
   355  	// if that is too limiting we'll have to change this.
   356  	if *chroot == "" {
   357  		log.Fatalf("you are required to set the chroot via --chroot")
   358  	}
   359  
   360  	a = flag.Args()
   361  	//log.Printf("greetings %v\n", a)
   362  	a = a[:len(a)-1]
   363  
   364  	controlPTY, processTTY, sname, err := ptsopen()
   365  	if err != nil {
   366  		log.Fatalf(err.Error())
   367  	}
   368  
   369  	// child code. Not really. What really happens here is we set
   370  	// ourselves into the container, and spawn the child. It's a bit odd
   371  	// but we're the parent, but we'll run in the container? I don't know
   372  	// how else to do it. This may require we set some things up first,
   373  	// esp. the network. But, it's all fun and games until someone loses
   374  	// an eye.
   375  	MountAll(*chroot, unprivileged)
   376  
   377  	if !unprivileged {
   378  		copyNodes(*chroot)
   379  	}
   380  
   381  	makePtmx(*chroot)
   382  
   383  	makeSymlinks(*chroot)
   384  
   385  	makeConsole(*chroot, sname, unprivileged)
   386  
   387  	//umask(0022);
   388  
   389  	/* TODO: drop capabilities */
   390  
   391  	//do_user(user);
   392  
   393  	e := make(map[string]string)
   394  	if *keepenv {
   395  		for _, v := range os.Environ() {
   396  			k := strings.SplitN(v, "=", 2)
   397  			e[k[0]] = k[1]
   398  		}
   399  	}
   400  
   401  	term := os.Getenv("TERM")
   402  	e["TERM"] = term
   403  	e["PATH"] = "/usr/sbin:/usr/bin:/sbin:/bin"
   404  	e["USER"] = *user
   405  	e["LOGNAME"] = *user
   406  	e["HOME"] = "/root"
   407  
   408  	if *env != "" {
   409  		for _, c := range strings.Split(*env, ",") {
   410  			k := strings.SplitN(c, "=", 2)
   411  			if len(k) != 2 {
   412  				log.Printf("Bogus environment string %v", c)
   413  				continue
   414  			}
   415  			e[k[0]] = k[1]
   416  		}
   417  	}
   418  	e["container"] = "pflask"
   419  
   420  	if *cgroup == "" {
   421  		var envs []string
   422  		for k, v := range e {
   423  			envs = append(envs, k+"="+v)
   424  		}
   425  		if err := syscall.Chroot(*chroot); err != nil {
   426  			log.Fatal(err)
   427  		}
   428  		if err := syscall.Chdir(*chdir); err != nil {
   429  			log.Fatal(err)
   430  		}
   431  		log.Fatal(syscall.Exec(a[0], a[1:], envs))
   432  	}
   433  
   434  	c := exec.Command(a[0], a[1:]...)
   435  	c.Env = nil
   436  	for k, v := range e {
   437  		c.Env = append(c.Env, k+"="+v)
   438  	}
   439  
   440  	c.SysProcAttr = &syscall.SysProcAttr{
   441  		Chroot:  *chroot,
   442  		Setctty: true,
   443  		Setsid:  true,
   444  	}
   445  	c.Stdout = processTTY
   446  	c.Stdin = processTTY
   447  	c.Stderr = c.Stdout
   448  	c.SysProcAttr.Setctty = true
   449  	c.SysProcAttr.Setsid = true
   450  	c.SysProcAttr.Ptrace = true
   451  	c.Dir = *chdir
   452  	err = c.Start()
   453  	if err != nil {
   454  		panic(err)
   455  	}
   456  	kid := c.Process.Pid
   457  	log.Printf("Started %d\n", kid)
   458  
   459  	// set up the containers, then resume the process.
   460  	// Its children will get the containers as it clones.
   461  
   462  	cg := cgroupname(*cgpath)
   463  	cg.Do(*cgroup, kid)
   464  
   465  	// sometimes the detach fails. Looks like a race condition: we're
   466  	// sending the detach before the child has hit the TRACE_ME point.
   467  	// Experimentally, when it fails, even one seconds it too short to
   468  	// sleep. Sleep for 5 seconds.
   469  	// Oh well it's not that. It's that there is some one of these
   470  	// processes not in the PID namespace of the child? Who knows, sigh.
   471  	// This is an aspect of the Go runtime that is seriously broken.
   472  
   473  	for i := 0; ; i++ {
   474  		if err = syscall.PtraceDetach(kid); err != nil {
   475  			log.Printf("Could not detach %v, sleeping 250 milliseconds", kid)
   476  			time.Sleep(250 * time.Millisecond)
   477  			continue
   478  		}
   479  		if i > 100 {
   480  			log.Fatalf("Tried for 10 seconds to get a DETACH. Let's fix the go runtime someday")
   481  		}
   482  		break
   483  	}
   484  
   485  	raw()
   486  
   487  	go func() {
   488  		io.Copy(os.Stdout, controlPTY)
   489  		os.Exit(1)
   490  	}()
   491  	io.Copy(controlPTY, os.Stdin)
   492  }