github.com/jlowellwofford/u-root@v1.0.0/xcmds/pflask/pflask.go (about) 1 // Copyright 2015-2017 the u-root Authors. All rights reserved 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package main 6 7 import ( 8 "flag" 9 "fmt" 10 "io" 11 "io/ioutil" 12 "log" 13 "os" 14 "os/exec" 15 "path/filepath" 16 "strconv" 17 "strings" 18 "syscall" 19 "time" 20 21 "unsafe" 22 23 // "github.com/u-root/u-root/pkg/termios" 24 "golang.org/x/sys/unix" 25 ) 26 27 // pty support. We used to import github.com/kr/pty but what we need is not that complex. 28 // Thanks to keith rarick for these functions. 29 30 func ptsopen() (pty, tty *os.File, slavename string, err error) { 31 p, err := os.OpenFile("/dev/ptmx", os.O_RDWR, 0) 32 if err != nil { 33 return 34 } 35 36 slavename, err = ptsname(p) 37 if err != nil { 38 return 39 } 40 41 err = ptsunlock(p) 42 if err != nil { 43 return 44 } 45 46 t, err := os.OpenFile(slavename, os.O_RDWR|syscall.O_NOCTTY, 0) 47 if err != nil { 48 return 49 } 50 return p, t, slavename, nil 51 } 52 53 func ptsname(f *os.File) (string, error) { 54 n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN) 55 if err != nil { 56 return "", err 57 } 58 return "/dev/pts/" + strconv.Itoa(n), nil 59 } 60 61 func ptsunlock(f *os.File) error { 62 var u uintptr 63 // use TIOCSPTLCK with a zero valued arg to clear the slave pty lock 64 _, _, err := syscall.Syscall(syscall.SYS_IOCTL, f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&u))) 65 if err != 0 { 66 return err 67 } 68 return nil 69 } 70 71 type cgroupname string 72 73 func (c cgroupname) apply(s string, f func(s string)) { 74 // range of strings.Split("",",") is 1. 75 // not exactly what we might expect. 76 if s == "" { 77 return 78 } 79 for _, g := range strings.Split(s, ",") { 80 p := filepath.Join(g) 81 f(p) 82 } 83 } 84 85 func (c cgroupname) Validate(s string) { 86 c.apply(s, func(s string) { 87 if st, err := os.Stat(filepath.Join(string(c), s)); err != nil { 88 log.Fatalf("%v", err) 89 } else if !st.IsDir() { 90 log.Fatalf("%s: not a directory", s) 91 } 92 }) 93 } 94 95 func (c cgroupname) Create(s, name string) { 96 if err := os.MkdirAll(filepath.Join(string(c), s, name), 0755); err != nil { 97 log.Fatal(err) 98 } 99 } 100 101 func (c cgroupname) Attach(s, name string, pid int) { 102 t := filepath.Join(string(c), s, name, "tasks") 103 b := []byte(fmt.Sprintf("%v", pid)) 104 if err := ioutil.WriteFile(t, b, 0600); err != nil { 105 log.Fatal(err) 106 } 107 } 108 109 func (c cgroupname) Destroy(s, n string) { 110 if err := os.RemoveAll(filepath.Join(string(c), s, n)); err != nil { 111 log.Fatal(err) 112 } 113 } 114 115 func (c cgroupname) Do(groups string, pid int) { 116 cgn := fmt.Sprintf("pflask.%d", pid) 117 c.apply(groups, func(s string) { 118 c.Create(s, cgn) 119 c.Attach(s, cgn, pid) 120 }) 121 } 122 123 type mount struct { 124 src, dst, mtype, opts string 125 flags uintptr 126 dir bool 127 needPrivilege bool 128 } 129 130 // Add adds a mount to the global mountlist. Don't know if we need it, but we might for additional volumes? 131 func Add(src, dst, mtype, opts string, flags uintptr, dir bool) { 132 mounts = append(mounts, mount{src: src, dst: dst, mtype: mtype, flags: flags, opts: opts, dir: dir}) 133 134 } 135 136 // One mounts one mountpoint, using base as a prefix for the destination. 137 // If anything goes wrong, we just bail out; we've privatized the namespace 138 // so there is no cleanup we need to do. 139 func (m *mount) One(base string) { 140 dst := filepath.Join(base, m.dst) 141 if m.dir { 142 if err := os.MkdirAll(dst, 0755); err != nil { 143 log.Fatalf("One: mkdirall %v: %v", m.dst, err) 144 } 145 } 146 if err := syscall.Mount(m.src, dst, m.mtype, m.flags, m.opts); err != nil { 147 log.Fatalf("Mount :%s: on :%s: type :%s: flags %x: opts :%v: %v\n", 148 m.src, m.dst, m.mtype, m.flags, m.opts, err) 149 } 150 } 151 152 // MountAll mounts all the mount points. root is a bit special in that it just sets 153 // needed flags for non-shared mounts. 154 func MountAll(base string, unprivileged bool) { 155 root.One("") 156 for _, m := range mounts { 157 if m.needPrivilege && unprivileged { 158 continue 159 } 160 m.One(base) 161 } 162 } 163 164 // modedev returns a mode and dev suitable for use in mknod. 165 // It's very odd, but the Dev either needs to be byteswapped 166 // or comes back byteswapped. I just love it that the world 167 // has fixed on a 45-year-old ABI (stat in this case) 168 // that was abandoned by its designers 30 years ago. 169 // Oh well. 170 func modedev(st os.FileInfo) (uint32, int) { 171 // Weird. The Dev is byte-swapped for some reason. 172 dev := int(st.Sys().(*syscall.Stat_t).Dev) 173 devlo := dev & 0xff 174 dev >>= 8 175 dev |= (devlo << 8) 176 return uint32(st.Sys().(*syscall.Stat_t).Mode), dev 177 } 178 179 // makeConsole sets the right modes for the real console, then creates 180 // a /dev/console in the chroot. 181 func makeConsole(base, console string, unprivileged bool) { 182 if err := os.Chmod(console, 0600); err != nil { 183 log.Printf("%v", err) 184 } 185 if err := os.Chown(console, 0, 0); err != nil { 186 log.Printf("%v", err) 187 } 188 189 st, err := os.Stat(console) 190 if err != nil { 191 log.Printf("%v", err) 192 } 193 194 nn := filepath.Join(base, "/dev/console") 195 mode, dev := modedev(st) 196 if unprivileged { 197 // In unprivileged uses, we can't mknod /dev/console, however, 198 // we can just create a file /dev/console and use bind mount on file. 199 if _, err := os.Stat(nn); err != nil { 200 ioutil.WriteFile(nn, []byte{}, 0600) // best effort, ignore error 201 } 202 } else { 203 if err := syscall.Mknod(nn, mode, dev); err != nil { 204 log.Printf("%v", err) 205 } 206 } 207 208 // if any previous steps failed, this one will too, so we can bail here. 209 if err := syscall.Mount(console, nn, "", syscall.MS_BIND, ""); err != nil { 210 log.Fatalf("Mount :%s: on :%s: flags %v: %v", 211 console, nn, syscall.MS_BIND, err) 212 } 213 214 } 215 216 // copyNodes makes copies of needed nodes in the chroot. 217 func copyNodes(base string) { 218 nodes := []string{ 219 "/dev/tty", 220 "/dev/full", 221 "/dev/null", 222 "/dev/zero", 223 "/dev/random", 224 "/dev/urandom"} 225 226 for _, n := range nodes { 227 st, err := os.Stat(n) 228 if err != nil { 229 log.Printf("%v", err) 230 } 231 nn := filepath.Join(base, n) 232 mode, dev := modedev(st) 233 if err := syscall.Mknod(nn, mode, dev); err != nil { 234 log.Printf("%v", err) 235 } 236 } 237 } 238 239 // makePtmx creates /dev/ptmx in the root. Because of order of operations 240 // it has to happen at a different time than copyNodes. 241 func makePtmx(base string) { 242 dst := filepath.Join(base, "/dev/ptmx") 243 244 if _, err := os.Stat(dst); err == nil { 245 return 246 } 247 248 if err := os.Symlink("/dev/pts/ptmx", dst); err != nil { 249 log.Printf("%v", err) 250 } 251 } 252 253 // makeSymlinks sets up standard symlinks as found in /dev. 254 func makeSymlinks(base string) { 255 linkit := []struct { 256 src, dst string 257 }{ 258 {"/dev/pts/ptmx", "/dev/ptmx"}, 259 {"/proc/kcore", "/dev/core"}, 260 {"/proc/self/fd", "/dev/fd"}, 261 {"/proc/self/fd/0", "/dev/stdin"}, 262 {"/proc/self/fd/1", "/dev/stdout"}, 263 {"/proc/self/fd/2", "/dev/stderr"}, 264 } 265 266 for i := range linkit { 267 dst := filepath.Join(base, linkit[i].dst) 268 269 if _, err := os.Stat(dst); err == nil { 270 continue 271 } 272 273 if err := os.Symlink(linkit[i].src, dst); err != nil { 274 log.Printf("%v", err) 275 } 276 } 277 } 278 279 var ( 280 cgpath = flag.String("cgpath", "/sys/fs/cgroup", "set the cgroups") 281 cgroup = flag.String("cgroup", "", "set the cgroups") 282 mnt = flag.String("mount", "", "define mounts") 283 chroot = flag.String("chroot", "", "where to chroot to") 284 chdir = flag.String("chdir", "/", "where to chrdir to in the chroot") 285 console = flag.String("console", "/dev/console", "where the console is") 286 keepenv = flag.Bool("keepenv", false, "Keep the environment") 287 env = flag.String("env", "", "other environment variables") 288 user = flag.String("user", "root" /*user.User.Username*/, "User name") 289 root = &mount{"", "/", "", "", syscall.MS_SLAVE | syscall.MS_REC, false, false} 290 mounts = []mount{ 291 {"proc", "/proc", "proc", "", syscall.MS_NOSUID | syscall.MS_NOEXEC | syscall.MS_NODEV, true, false}, 292 {"/proc/sys", "/proc/sys", "", "", syscall.MS_BIND, true, true}, 293 {"", "/proc/sys", "", "", syscall.MS_BIND | syscall.MS_RDONLY | syscall.MS_REMOUNT, true, true}, 294 {"sysfs", "/sys", "sysfs", "", syscall.MS_NOSUID | syscall.MS_NOEXEC | syscall.MS_NODEV | syscall.MS_RDONLY, true, true}, 295 {"tmpfs", "/dev", "tmpfs", "mode=755", syscall.MS_NOSUID | syscall.MS_STRICTATIME, true, true}, // unprivileged system needs a pre-populated /dev 296 {"devpts", "/dev/pts", "devpts", "newinstance,ptmxmode=0660,mode=0620", syscall.MS_NOSUID | syscall.MS_NOEXEC, true, false}, 297 {"tmpfs", "/dev/shm", "tmpfs", "mode=1777", syscall.MS_NOSUID | syscall.MS_STRICTATIME | syscall.MS_NODEV, true, false}, 298 {"tmpfs", "/run", "tmpfs", "mode=755", syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_STRICTATIME, true, false}, 299 } 300 ) 301 302 func main() { 303 flag.Parse() 304 305 if len(flag.Args()) < 1 { 306 os.Exit(1) 307 } 308 309 // note the unshare system call worketh not for Go. 310 // So do it ourselves. We have to start ourselves up again, 311 // after having spawned ourselves with lots of clone 312 // flags sets. To know that we spawned ourselves we add '#' 313 // as the last arg. # was chosen because shells normally filter 314 // it out, so its presence as our last arg is highly indicative 315 // that we really spawned us. Also, for testing, you can always 316 // pass it by hand to see what the namespace looks like. 317 a := os.Args 318 if a[len(a)-1][0] != '#' { 319 a = append(a, "#") 320 if syscall.Geteuid() != 0 { 321 a[len(a)-1] = "#u" 322 } 323 // spawn ourselves with the right unsharing settings. 324 c := exec.Command(a[0], a[1:]...) 325 c.SysProcAttr = &syscall.SysProcAttr{Cloneflags: syscall.CLONE_NEWNS | syscall.CLONE_NEWUTS | syscall.CLONE_NEWIPC | syscall.CLONE_NEWPID} 326 // c.SysProcAttr.Cloneflags |= syscall.CLONE_NEWNET 327 328 if syscall.Geteuid() != 0 { 329 c.SysProcAttr.Cloneflags |= syscall.CLONE_NEWUSER 330 // Interesting. Won't build statically? 331 //c.SysProcAttr.UidMappings = []syscall.SysProcIDMap{{ContainerID: 0, HostID: syscall.Getuid(), Size: 1}} 332 //c.SysProcAttr.GidMappings = []syscall.SysProcIDMap{{ContainerID: 0, HostID: syscall.Getgid(), Size: 1}} 333 } 334 335 c.Stdin = os.Stdin 336 c.Stdout = os.Stdout 337 c.Stderr = os.Stderr 338 //t, err := termios.GetTermios(1) 339 //if err != nil { 340 // log.Fatalf("Can't get termios on fd 1: %v", err) 341 //} 342 if err := c.Run(); err != nil { 343 log.Printf(err.Error()) 344 } 345 //if err := termios.SetTermios(1, t); err != nil { 346 // log.Printf("Can't reset termios on fd1: %v", err) 347 //} 348 os.Exit(1) 349 } 350 351 unprivileged := a[len(a)-1] == "#u" 352 353 // unlike pflask, we require that you set a chroot. 354 // If you make it /, strange things are bound to happen. 355 // if that is too limiting we'll have to change this. 356 if *chroot == "" { 357 log.Fatalf("you are required to set the chroot via --chroot") 358 } 359 360 a = flag.Args() 361 //log.Printf("greetings %v\n", a) 362 a = a[:len(a)-1] 363 364 ptm, pts, sname, err := ptsopen() 365 if err != nil { 366 log.Fatalf(err.Error()) 367 } 368 369 // child code. Not really. What really happens here is we set 370 // ourselves into the container, and spawn the child. It's a bit odd 371 // but we're the master, but we'll run in the container? I don't know 372 // how else to do it. This may require we set some things up first, 373 // esp. the network. But, it's all fun and games until someone loses 374 // an eye. 375 MountAll(*chroot, unprivileged) 376 377 if !unprivileged { 378 copyNodes(*chroot) 379 } 380 381 makePtmx(*chroot) 382 383 makeSymlinks(*chroot) 384 385 makeConsole(*chroot, sname, unprivileged) 386 387 //umask(0022); 388 389 /* TODO: drop capabilities */ 390 391 //do_user(user); 392 393 e := make(map[string]string) 394 if *keepenv { 395 for _, v := range os.Environ() { 396 k := strings.SplitN(v, "=", 2) 397 e[k[0]] = k[1] 398 } 399 } 400 401 term := os.Getenv("TERM") 402 e["TERM"] = term 403 e["PATH"] = "/usr/sbin:/usr/bin:/sbin:/bin" 404 e["USER"] = *user 405 e["LOGNAME"] = *user 406 e["HOME"] = "/root" 407 408 if *env != "" { 409 for _, c := range strings.Split(*env, ",") { 410 k := strings.SplitN(c, "=", 2) 411 if len(k) != 2 { 412 log.Printf("Bogus environment string %v", c) 413 continue 414 } 415 e[k[0]] = k[1] 416 } 417 } 418 e["container"] = "pflask" 419 420 if *cgroup == "" { 421 var envs []string 422 for k, v := range e { 423 envs = append(envs, k+"="+v) 424 } 425 if err := syscall.Chroot(*chroot); err != nil { 426 log.Fatal(err) 427 } 428 if err := syscall.Chdir(*chdir); err != nil { 429 log.Fatal(err) 430 } 431 log.Fatal(syscall.Exec(a[0], a[1:], envs)) 432 } 433 434 c := exec.Command(a[0], a[1:]...) 435 c.Env = nil 436 for k, v := range e { 437 c.Env = append(c.Env, k+"="+v) 438 } 439 440 c.SysProcAttr = &syscall.SysProcAttr{ 441 Chroot: *chroot, 442 Setctty: true, 443 Setsid: true, 444 } 445 c.Stdout = pts 446 c.Stdin = pts 447 c.Stderr = c.Stdout 448 c.SysProcAttr.Setctty = true 449 c.SysProcAttr.Setsid = true 450 c.SysProcAttr.Ptrace = true 451 c.Dir = *chdir 452 err = c.Start() 453 if err != nil { 454 panic(err) 455 } 456 kid := c.Process.Pid 457 log.Printf("Started %d\n", kid) 458 459 // set up the containers, then resume the process. 460 // Its children will get the containers as it clones. 461 462 cg := cgroupname(*cgpath) 463 cg.Do(*cgroup, kid) 464 465 // sometimes the detach fails. Looks like a race condition: we're 466 // sending the detach before the child has hit the TRACE_ME point. 467 // Experimentally, when it fails, even one seconds it too short to 468 // sleep. Sleep for 5 seconds. 469 // Oh well it's not that. It's that there is some one of these 470 // processes not in the PID namespace of the child? Who knows, sigh. 471 // This is an aspect of the Go runtime that is seriously broken. 472 473 for i := 0; ; i++ { 474 if err = syscall.PtraceDetach(kid); err != nil { 475 log.Printf("Could not detach %v, sleeping 250 milliseconds", kid) 476 time.Sleep(250 * time.Millisecond) 477 continue 478 } 479 if i > 100 { 480 log.Fatalf("Tried for 10 seconds to get a DETACH. Let's fix the go runtime someday") 481 } 482 break 483 } 484 485 raw() 486 487 go func() { 488 io.Copy(os.Stdout, ptm) 489 os.Exit(1) 490 }() 491 io.Copy(ptm, os.Stdin) 492 }