github.com/hanks177/podman/v4@v4.1.3-0.20220613032544-16d90015bc83/pkg/rootless/rootless_linux.go (about) 1 //go:build linux && cgo 2 // +build linux,cgo 3 4 package rootless 5 6 import ( 7 "bufio" 8 "bytes" 9 "fmt" 10 "io" 11 "io/ioutil" 12 "os" 13 "os/exec" 14 gosignal "os/signal" 15 "os/user" 16 "runtime" 17 "strconv" 18 "strings" 19 "sync" 20 "unsafe" 21 22 "github.com/hanks177/podman/v4/pkg/errorhandling" 23 "github.com/containers/storage/pkg/idtools" 24 pmount "github.com/containers/storage/pkg/mount" 25 "github.com/containers/storage/pkg/unshare" 26 "github.com/pkg/errors" 27 "github.com/sirupsen/logrus" 28 "github.com/syndtr/gocapability/capability" 29 "golang.org/x/sys/unix" 30 ) 31 32 /* 33 #cgo remote CFLAGS: -Wall -Werror -DDISABLE_JOIN_SHORTCUT 34 #include <stdlib.h> 35 #include <sys/types.h> 36 extern uid_t rootless_uid(); 37 extern uid_t rootless_gid(); 38 extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd); 39 extern int reexec_in_user_namespace_wait(int pid, int options); 40 extern int reexec_userns_join(int pid, char *pause_pid_file_path); 41 extern int is_fd_inherited(int fd); 42 */ 43 import "C" 44 45 const ( 46 numSig = 65 // max number of signals 47 ) 48 49 func runInUser() error { 50 return os.Setenv("_CONTAINERS_USERNS_CONFIGURED", "done") 51 } 52 53 var ( 54 isRootlessOnce sync.Once 55 isRootless bool 56 ) 57 58 // IsRootless tells us if we are running in rootless mode 59 func IsRootless() bool { 60 isRootlessOnce.Do(func() { 61 rootlessUIDInit := int(C.rootless_uid()) 62 rootlessGIDInit := int(C.rootless_gid()) 63 if rootlessUIDInit != 0 { 64 // This happens if we joined the user+mount namespace as part of 65 if err := os.Setenv("_CONTAINERS_USERNS_CONFIGURED", "done"); err != nil { 66 logrus.Errorf("Failed to set environment variable %s as %s", "_CONTAINERS_USERNS_CONFIGURED", "done") 67 } 68 if err := os.Setenv("_CONTAINERS_ROOTLESS_UID", fmt.Sprintf("%d", rootlessUIDInit)); err != nil { 69 logrus.Errorf("Failed to set environment variable %s as %d", "_CONTAINERS_ROOTLESS_UID", rootlessUIDInit) 70 } 71 if err := os.Setenv("_CONTAINERS_ROOTLESS_GID", fmt.Sprintf("%d", rootlessGIDInit)); err != nil { 72 logrus.Errorf("Failed to set environment variable %s as %d", "_CONTAINERS_ROOTLESS_GID", rootlessGIDInit) 73 } 74 } 75 isRootless = os.Geteuid() != 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" 76 if !isRootless { 77 hasCapSysAdmin, err := unshare.HasCapSysAdmin() 78 if err != nil { 79 logrus.Warnf("Failed to read CAP_SYS_ADMIN presence for the current process") 80 } 81 if err == nil && !hasCapSysAdmin { 82 isRootless = true 83 } 84 } 85 }) 86 return isRootless 87 } 88 89 // GetRootlessUID returns the UID of the user in the parent userNS 90 func GetRootlessUID() int { 91 uidEnv := os.Getenv("_CONTAINERS_ROOTLESS_UID") 92 if uidEnv != "" { 93 u, _ := strconv.Atoi(uidEnv) 94 return u 95 } 96 return os.Geteuid() 97 } 98 99 // GetRootlessGID returns the GID of the user in the parent userNS 100 func GetRootlessGID() int { 101 gidEnv := os.Getenv("_CONTAINERS_ROOTLESS_GID") 102 if gidEnv != "" { 103 u, _ := strconv.Atoi(gidEnv) 104 return u 105 } 106 107 /* If the _CONTAINERS_ROOTLESS_UID is set, assume the gid==uid. */ 108 uidEnv := os.Getenv("_CONTAINERS_ROOTLESS_UID") 109 if uidEnv != "" { 110 u, _ := strconv.Atoi(uidEnv) 111 return u 112 } 113 return os.Getegid() 114 } 115 116 func tryMappingTool(uid bool, pid int, hostID int, mappings []idtools.IDMap) error { 117 var tool = "newuidmap" 118 mode := os.ModeSetuid 119 cap := capability.CAP_SETUID 120 idtype := "setuid" 121 if !uid { 122 tool = "newgidmap" 123 mode = os.ModeSetgid 124 cap = capability.CAP_SETGID 125 idtype = "setgid" 126 } 127 path, err := exec.LookPath(tool) 128 if err != nil { 129 return errors.Wrapf(err, "command required for rootless mode with multiple IDs") 130 } 131 132 appendTriplet := func(l []string, a, b, c int) []string { 133 return append(l, strconv.Itoa(a), strconv.Itoa(b), strconv.Itoa(c)) 134 } 135 136 args := []string{path, fmt.Sprintf("%d", pid)} 137 args = appendTriplet(args, 0, hostID, 1) 138 for _, i := range mappings { 139 if hostID >= i.HostID && hostID < i.HostID+i.Size { 140 what := "UID" 141 where := "/etc/subuid" 142 if !uid { 143 what = "GID" 144 where = "/etc/subgid" 145 } 146 return errors.Errorf("invalid configuration: the specified mapping %d:%d in %q includes the user %s", i.HostID, i.Size, where, what) 147 } 148 args = appendTriplet(args, i.ContainerID+1, i.HostID, i.Size) 149 } 150 cmd := exec.Cmd{ 151 Path: path, 152 Args: args, 153 } 154 155 if output, err := cmd.CombinedOutput(); err != nil { 156 logrus.Errorf("running `%s`: %s", strings.Join(args, " "), output) 157 errorStr := fmt.Sprintf("cannot setup namespace using %q", path) 158 if isSet, err := unshare.IsSetID(cmd.Path, mode, cap); err != nil { 159 logrus.Errorf("Failed to check for %s on %s: %v", idtype, path, err) 160 } else if !isSet { 161 errorStr = fmt.Sprintf("%s: should have %s or have filecaps %s", errorStr, idtype, idtype) 162 } 163 return errors.Wrapf(err, errorStr) 164 } 165 return nil 166 } 167 168 // joinUserAndMountNS re-exec podman in a new userNS and join the user and mount 169 // namespace of the specified PID without looking up its parent. Useful to join directly 170 // the conmon process. 171 func joinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { 172 hasCapSysAdmin, err := unshare.HasCapSysAdmin() 173 if err != nil { 174 return false, 0, err 175 } 176 if hasCapSysAdmin || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { 177 return false, 0, nil 178 } 179 180 cPausePid := C.CString(pausePid) 181 defer C.free(unsafe.Pointer(cPausePid)) 182 183 pidC := C.reexec_userns_join(C.int(pid), cPausePid) 184 if int(pidC) < 0 { 185 return false, -1, errors.Errorf("cannot re-exec process") 186 } 187 188 ret := C.reexec_in_user_namespace_wait(pidC, 0) 189 if ret < 0 { 190 return false, -1, errors.New("waiting for the re-exec process") 191 } 192 193 return true, int(ret), nil 194 } 195 196 // GetConfiguredMappings returns the additional IDs configured for the current user. 197 func GetConfiguredMappings() ([]idtools.IDMap, []idtools.IDMap, error) { 198 var uids, gids []idtools.IDMap 199 username := os.Getenv("USER") 200 if username == "" { 201 var id string 202 if os.Geteuid() == 0 { 203 id = strconv.Itoa(GetRootlessUID()) 204 } else { 205 id = strconv.Itoa(os.Geteuid()) 206 } 207 userID, err := user.LookupId(id) 208 if err == nil { 209 username = userID.Username 210 } 211 } 212 mappings, err := idtools.NewIDMappings(username, username) 213 if err != nil { 214 logLevel := logrus.ErrorLevel 215 if os.Geteuid() == 0 && GetRootlessUID() == 0 { 216 logLevel = logrus.DebugLevel 217 } 218 logrus.StandardLogger().Logf(logLevel, "cannot find UID/GID for user %s: %v - check rootless mode in man pages.", username, err) 219 } else { 220 uids = mappings.UIDs() 221 gids = mappings.GIDs() 222 } 223 return uids, gids, nil 224 } 225 226 func copyMappings(from, to string) error { 227 content, err := ioutil.ReadFile(from) 228 if err != nil { 229 return err 230 } 231 // Both runc and crun check whether the current process is in a user namespace 232 // by looking up 4294967295 in /proc/self/uid_map. If the mappings would be 233 // copied as they are, the check in the OCI runtimes would fail. So just split 234 // it in two different ranges. 235 if bytes.Contains(content, []byte("4294967295")) { 236 content = []byte("0 0 1\n1 1 4294967294\n") 237 } 238 return ioutil.WriteFile(to, content, 0600) 239 } 240 241 func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ bool, _ int, retErr error) { 242 hasCapSysAdmin, err := unshare.HasCapSysAdmin() 243 if err != nil { 244 return false, 0, err 245 } 246 247 if hasCapSysAdmin || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { 248 if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" { 249 return false, 0, runInUser() 250 } 251 return false, 0, nil 252 } 253 254 if mounts, err := pmount.GetMounts(); err == nil { 255 for _, m := range mounts { 256 if m.Mountpoint == "/" { 257 isShared := false 258 for _, o := range strings.Split(m.Optional, ",") { 259 if strings.HasPrefix(o, "shared:") { 260 isShared = true 261 break 262 } 263 } 264 if !isShared { 265 logrus.Warningf("%q is not a shared mount, this could cause issues or missing mounts with rootless containers", m.Mountpoint) 266 } 267 break 268 } 269 } 270 } 271 272 cPausePid := C.CString(pausePid) 273 defer C.free(unsafe.Pointer(cPausePid)) 274 275 cFileToRead := C.CString(fileToRead) 276 defer C.free(unsafe.Pointer(cFileToRead)) 277 var fileOutputFD C.int 278 if fileOutput != nil { 279 fileOutputFD = C.int(fileOutput.Fd()) 280 } 281 282 runtime.LockOSThread() 283 defer runtime.UnlockOSThread() 284 285 fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_DGRAM, 0) 286 if err != nil { 287 return false, -1, err 288 } 289 r, w := os.NewFile(uintptr(fds[0]), "sync host"), os.NewFile(uintptr(fds[1]), "sync child") 290 291 var pid int 292 293 defer errorhandling.CloseQuiet(r) 294 defer errorhandling.CloseQuiet(w) 295 defer func() { 296 toWrite := []byte("0") 297 if retErr != nil { 298 toWrite = []byte("1") 299 } 300 if _, err := w.Write(toWrite); err != nil { 301 logrus.Errorf("Failed to write byte 0: %q", err) 302 } 303 if retErr != nil && pid > 0 { 304 if err := unix.Kill(pid, unix.SIGKILL); err != nil { 305 if err != unix.ESRCH { 306 logrus.Errorf("Failed to cleanup process %d: %v", pid, err) 307 } 308 } 309 C.reexec_in_user_namespace_wait(C.int(pid), 0) 310 } 311 }() 312 313 pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid, cFileToRead, fileOutputFD) 314 pid = int(pidC) 315 if pid < 0 { 316 return false, -1, errors.Errorf("cannot re-exec process") 317 } 318 319 uids, gids, err := GetConfiguredMappings() 320 if err != nil { 321 return false, -1, err 322 } 323 324 uidMap := fmt.Sprintf("/proc/%d/uid_map", pid) 325 gidMap := fmt.Sprintf("/proc/%d/gid_map", pid) 326 327 uidsMapped := false 328 329 if err := copyMappings("/proc/self/uid_map", uidMap); err == nil { 330 uidsMapped = true 331 } 332 333 if uids != nil && !uidsMapped { 334 err := tryMappingTool(true, pid, os.Geteuid(), uids) 335 // If some mappings were specified, do not ignore the error 336 if err != nil && len(uids) > 0 { 337 return false, -1, err 338 } 339 uidsMapped = err == nil 340 } 341 if !uidsMapped { 342 logrus.Warnf("Using rootless single mapping into the namespace. This might break some images. Check /etc/subuid and /etc/subgid for adding sub*ids if not using a network user") 343 setgroups := fmt.Sprintf("/proc/%d/setgroups", pid) 344 err = ioutil.WriteFile(setgroups, []byte("deny\n"), 0666) 345 if err != nil { 346 return false, -1, errors.Wrapf(err, "cannot write setgroups file") 347 } 348 logrus.Debugf("write setgroups file exited with 0") 349 350 err = ioutil.WriteFile(uidMap, []byte(fmt.Sprintf("%d %d 1\n", 0, os.Geteuid())), 0666) 351 if err != nil { 352 return false, -1, errors.Wrapf(err, "cannot write uid_map") 353 } 354 logrus.Debugf("write uid_map exited with 0") 355 } 356 357 gidsMapped := false 358 if err := copyMappings("/proc/self/gid_map", gidMap); err == nil { 359 gidsMapped = true 360 } 361 if gids != nil && !gidsMapped { 362 err := tryMappingTool(false, pid, os.Getegid(), gids) 363 // If some mappings were specified, do not ignore the error 364 if err != nil && len(gids) > 0 { 365 return false, -1, err 366 } 367 gidsMapped = err == nil 368 } 369 if !gidsMapped { 370 err = ioutil.WriteFile(gidMap, []byte(fmt.Sprintf("%d %d 1\n", 0, os.Getegid())), 0666) 371 if err != nil { 372 return false, -1, errors.Wrapf(err, "cannot write gid_map") 373 } 374 } 375 376 _, err = w.Write([]byte("0")) 377 if err != nil { 378 return false, -1, errors.Wrapf(err, "write to sync pipe") 379 } 380 381 b := make([]byte, 1) 382 _, err = w.Read(b) 383 if err != nil { 384 return false, -1, errors.Wrapf(err, "read from sync pipe") 385 } 386 387 if fileOutput != nil { 388 ret := C.reexec_in_user_namespace_wait(pidC, 0) 389 if ret < 0 { 390 return false, -1, errors.New("waiting for the re-exec process") 391 } 392 393 return true, 0, nil 394 } 395 396 if b[0] == '2' { 397 // We have lost the race for writing the PID file, as probably another 398 // process created a namespace and wrote the PID. 399 // Try to join it. 400 data, err := ioutil.ReadFile(pausePid) 401 if err == nil { 402 pid, err := strconv.ParseUint(string(data), 10, 0) 403 if err == nil { 404 return joinUserAndMountNS(uint(pid), "") 405 } 406 } 407 return false, -1, errors.New("setting up the process") 408 } 409 410 if b[0] != '0' { 411 return false, -1, errors.New("setting up the process") 412 } 413 414 signals := []os.Signal{} 415 for sig := 0; sig < numSig; sig++ { 416 if sig == int(unix.SIGTSTP) { 417 continue 418 } 419 signals = append(signals, unix.Signal(sig)) 420 } 421 422 c := make(chan os.Signal, len(signals)) 423 gosignal.Notify(c, signals...) 424 defer gosignal.Reset() 425 go func() { 426 for s := range c { 427 if s == unix.SIGCHLD || s == unix.SIGPIPE { 428 continue 429 } 430 431 if err := unix.Kill(int(pidC), s.(unix.Signal)); err != nil { 432 if err != unix.ESRCH { 433 logrus.Errorf("Failed to propagate signal to child process %d: %v", int(pidC), err) 434 } 435 } 436 } 437 }() 438 439 ret := C.reexec_in_user_namespace_wait(pidC, 0) 440 if ret < 0 { 441 return false, -1, errors.New("waiting for the re-exec process") 442 } 443 444 return true, int(ret), nil 445 } 446 447 // BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed 448 // into a new user namespace and the return code from the re-executed podman process. 449 // If podman was re-executed the caller needs to propagate the error code returned by the child 450 // process. 451 func BecomeRootInUserNS(pausePid string) (bool, int, error) { 452 return becomeRootInUserNS(pausePid, "", nil) 453 } 454 455 // TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths. 456 // This is useful when there are already running containers and we 457 // don't have a pause process yet. We can use the paths to the conmon 458 // processes to attempt joining their namespaces. 459 // If needNewNamespace is set, the file is read from a temporary user 460 // namespace, this is useful for containers that are running with a 461 // different uidmap and the unprivileged user has no way to read the 462 // file owned by the root in the container. 463 func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) { 464 if len(paths) == 0 { 465 return BecomeRootInUserNS(pausePidPath) 466 } 467 468 var lastErr error 469 var pausePid int 470 foundProcess := false 471 472 for _, path := range paths { 473 if !needNewNamespace { 474 data, err := ioutil.ReadFile(path) 475 if err != nil { 476 lastErr = err 477 continue 478 } 479 480 pausePid, err = strconv.Atoi(string(data)) 481 if err != nil { 482 lastErr = errors.Wrapf(err, "cannot parse file %s", path) 483 continue 484 } 485 486 lastErr = nil 487 break 488 } else { 489 r, w, err := os.Pipe() 490 if err != nil { 491 lastErr = err 492 continue 493 } 494 495 defer errorhandling.CloseQuiet(r) 496 497 if _, _, err := becomeRootInUserNS("", path, w); err != nil { 498 w.Close() 499 lastErr = err 500 continue 501 } 502 503 if err := w.Close(); err != nil { 504 return false, 0, err 505 } 506 defer func() { 507 C.reexec_in_user_namespace_wait(-1, 0) 508 }() 509 510 b := make([]byte, 32) 511 512 n, err := r.Read(b) 513 if err != nil { 514 lastErr = errors.Wrapf(err, "cannot read %s\n", path) 515 continue 516 } 517 518 pausePid, err = strconv.Atoi(string(b[:n])) 519 if err == nil && unix.Kill(pausePid, 0) == nil { 520 foundProcess = true 521 lastErr = nil 522 break 523 } 524 } 525 } 526 if !foundProcess && pausePidPath != "" { 527 return BecomeRootInUserNS(pausePidPath) 528 } 529 if lastErr != nil { 530 return false, 0, lastErr 531 } 532 533 return joinUserAndMountNS(uint(pausePid), pausePidPath) 534 } 535 536 // ReadMappingsProc parses and returns the ID mappings at the specified path. 537 func ReadMappingsProc(path string) ([]idtools.IDMap, error) { 538 file, err := os.Open(path) 539 if err != nil { 540 return nil, err 541 } 542 defer file.Close() 543 544 mappings := []idtools.IDMap{} 545 546 buf := bufio.NewReader(file) 547 for { 548 line, _, err := buf.ReadLine() 549 if err != nil { 550 if err == io.EOF { 551 return mappings, nil 552 } 553 return nil, errors.Wrapf(err, "cannot read line from %s", path) 554 } 555 if line == nil { 556 return mappings, nil 557 } 558 559 containerID, hostID, size := 0, 0, 0 560 if _, err := fmt.Sscanf(string(line), "%d %d %d", &containerID, &hostID, &size); err != nil { 561 return nil, errors.Wrapf(err, "cannot parse %s", string(line)) 562 } 563 mappings = append(mappings, idtools.IDMap{ContainerID: containerID, HostID: hostID, Size: size}) 564 } 565 } 566 567 func matches(id int, configuredIDs []idtools.IDMap, currentIDs []idtools.IDMap) bool { 568 // The first mapping is the host user, handle it separately. 569 if currentIDs[0].HostID != id || currentIDs[0].Size != 1 { 570 return false 571 } 572 573 currentIDs = currentIDs[1:] 574 if len(currentIDs) != len(configuredIDs) { 575 return false 576 } 577 578 // It is fine to iterate sequentially as both slices are sorted. 579 for i := range currentIDs { 580 if currentIDs[i].HostID != configuredIDs[i].HostID { 581 return false 582 } 583 if currentIDs[i].Size != configuredIDs[i].Size { 584 return false 585 } 586 } 587 588 return true 589 } 590 591 // ConfigurationMatches checks whether the additional uids/gids configured for the user 592 // match the current user namespace. 593 func ConfigurationMatches() (bool, error) { 594 if !IsRootless() || os.Geteuid() != 0 { 595 return true, nil 596 } 597 598 uids, gids, err := GetConfiguredMappings() 599 if err != nil { 600 return false, err 601 } 602 603 currentUIDs, err := ReadMappingsProc("/proc/self/uid_map") 604 if err != nil { 605 return false, err 606 } 607 608 if !matches(GetRootlessUID(), uids, currentUIDs) { 609 return false, err 610 } 611 612 currentGIDs, err := ReadMappingsProc("/proc/self/gid_map") 613 if err != nil { 614 return false, err 615 } 616 617 return matches(GetRootlessGID(), gids, currentGIDs), nil 618 } 619 620 // IsFdInherited checks whether the fd is opened and valid to use 621 func IsFdInherited(fd int) bool { 622 return int(C.is_fd_inherited(C.int(fd))) > 0 623 }