github.com/hanks177/podman/v4@v4.1.3-0.20220613032544-16d90015bc83/pkg/rootless/rootless_linux.go (about)

     1  //go:build linux && cgo
     2  // +build linux,cgo
     3  
     4  package rootless
     5  
     6  import (
     7  	"bufio"
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"io/ioutil"
    12  	"os"
    13  	"os/exec"
    14  	gosignal "os/signal"
    15  	"os/user"
    16  	"runtime"
    17  	"strconv"
    18  	"strings"
    19  	"sync"
    20  	"unsafe"
    21  
    22  	"github.com/hanks177/podman/v4/pkg/errorhandling"
    23  	"github.com/containers/storage/pkg/idtools"
    24  	pmount "github.com/containers/storage/pkg/mount"
    25  	"github.com/containers/storage/pkg/unshare"
    26  	"github.com/pkg/errors"
    27  	"github.com/sirupsen/logrus"
    28  	"github.com/syndtr/gocapability/capability"
    29  	"golang.org/x/sys/unix"
    30  )
    31  
    32  /*
    33  #cgo remote CFLAGS: -Wall -Werror -DDISABLE_JOIN_SHORTCUT
    34  #include <stdlib.h>
    35  #include <sys/types.h>
    36  extern uid_t rootless_uid();
    37  extern uid_t rootless_gid();
    38  extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd);
    39  extern int reexec_in_user_namespace_wait(int pid, int options);
    40  extern int reexec_userns_join(int pid, char *pause_pid_file_path);
    41  extern int is_fd_inherited(int fd);
    42  */
    43  import "C"
    44  
    45  const (
    46  	numSig = 65 // max number of signals
    47  )
    48  
    49  func runInUser() error {
    50  	return os.Setenv("_CONTAINERS_USERNS_CONFIGURED", "done")
    51  }
    52  
    53  var (
    54  	isRootlessOnce sync.Once
    55  	isRootless     bool
    56  )
    57  
    58  // IsRootless tells us if we are running in rootless mode
    59  func IsRootless() bool {
    60  	isRootlessOnce.Do(func() {
    61  		rootlessUIDInit := int(C.rootless_uid())
    62  		rootlessGIDInit := int(C.rootless_gid())
    63  		if rootlessUIDInit != 0 {
    64  			// This happens if we joined the user+mount namespace as part of
    65  			if err := os.Setenv("_CONTAINERS_USERNS_CONFIGURED", "done"); err != nil {
    66  				logrus.Errorf("Failed to set environment variable %s as %s", "_CONTAINERS_USERNS_CONFIGURED", "done")
    67  			}
    68  			if err := os.Setenv("_CONTAINERS_ROOTLESS_UID", fmt.Sprintf("%d", rootlessUIDInit)); err != nil {
    69  				logrus.Errorf("Failed to set environment variable %s as %d", "_CONTAINERS_ROOTLESS_UID", rootlessUIDInit)
    70  			}
    71  			if err := os.Setenv("_CONTAINERS_ROOTLESS_GID", fmt.Sprintf("%d", rootlessGIDInit)); err != nil {
    72  				logrus.Errorf("Failed to set environment variable %s as %d", "_CONTAINERS_ROOTLESS_GID", rootlessGIDInit)
    73  			}
    74  		}
    75  		isRootless = os.Geteuid() != 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != ""
    76  		if !isRootless {
    77  			hasCapSysAdmin, err := unshare.HasCapSysAdmin()
    78  			if err != nil {
    79  				logrus.Warnf("Failed to read CAP_SYS_ADMIN presence for the current process")
    80  			}
    81  			if err == nil && !hasCapSysAdmin {
    82  				isRootless = true
    83  			}
    84  		}
    85  	})
    86  	return isRootless
    87  }
    88  
    89  // GetRootlessUID returns the UID of the user in the parent userNS
    90  func GetRootlessUID() int {
    91  	uidEnv := os.Getenv("_CONTAINERS_ROOTLESS_UID")
    92  	if uidEnv != "" {
    93  		u, _ := strconv.Atoi(uidEnv)
    94  		return u
    95  	}
    96  	return os.Geteuid()
    97  }
    98  
    99  // GetRootlessGID returns the GID of the user in the parent userNS
   100  func GetRootlessGID() int {
   101  	gidEnv := os.Getenv("_CONTAINERS_ROOTLESS_GID")
   102  	if gidEnv != "" {
   103  		u, _ := strconv.Atoi(gidEnv)
   104  		return u
   105  	}
   106  
   107  	/* If the _CONTAINERS_ROOTLESS_UID is set, assume the gid==uid.  */
   108  	uidEnv := os.Getenv("_CONTAINERS_ROOTLESS_UID")
   109  	if uidEnv != "" {
   110  		u, _ := strconv.Atoi(uidEnv)
   111  		return u
   112  	}
   113  	return os.Getegid()
   114  }
   115  
   116  func tryMappingTool(uid bool, pid int, hostID int, mappings []idtools.IDMap) error {
   117  	var tool = "newuidmap"
   118  	mode := os.ModeSetuid
   119  	cap := capability.CAP_SETUID
   120  	idtype := "setuid"
   121  	if !uid {
   122  		tool = "newgidmap"
   123  		mode = os.ModeSetgid
   124  		cap = capability.CAP_SETGID
   125  		idtype = "setgid"
   126  	}
   127  	path, err := exec.LookPath(tool)
   128  	if err != nil {
   129  		return errors.Wrapf(err, "command required for rootless mode with multiple IDs")
   130  	}
   131  
   132  	appendTriplet := func(l []string, a, b, c int) []string {
   133  		return append(l, strconv.Itoa(a), strconv.Itoa(b), strconv.Itoa(c))
   134  	}
   135  
   136  	args := []string{path, fmt.Sprintf("%d", pid)}
   137  	args = appendTriplet(args, 0, hostID, 1)
   138  	for _, i := range mappings {
   139  		if hostID >= i.HostID && hostID < i.HostID+i.Size {
   140  			what := "UID"
   141  			where := "/etc/subuid"
   142  			if !uid {
   143  				what = "GID"
   144  				where = "/etc/subgid"
   145  			}
   146  			return errors.Errorf("invalid configuration: the specified mapping %d:%d in %q includes the user %s", i.HostID, i.Size, where, what)
   147  		}
   148  		args = appendTriplet(args, i.ContainerID+1, i.HostID, i.Size)
   149  	}
   150  	cmd := exec.Cmd{
   151  		Path: path,
   152  		Args: args,
   153  	}
   154  
   155  	if output, err := cmd.CombinedOutput(); err != nil {
   156  		logrus.Errorf("running `%s`: %s", strings.Join(args, " "), output)
   157  		errorStr := fmt.Sprintf("cannot setup namespace using %q", path)
   158  		if isSet, err := unshare.IsSetID(cmd.Path, mode, cap); err != nil {
   159  			logrus.Errorf("Failed to check for %s on %s: %v", idtype, path, err)
   160  		} else if !isSet {
   161  			errorStr = fmt.Sprintf("%s: should have %s or have filecaps %s", errorStr, idtype, idtype)
   162  		}
   163  		return errors.Wrapf(err, errorStr)
   164  	}
   165  	return nil
   166  }
   167  
   168  // joinUserAndMountNS re-exec podman in a new userNS and join the user and mount
   169  // namespace of the specified PID without looking up its parent.  Useful to join directly
   170  // the conmon process.
   171  func joinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
   172  	hasCapSysAdmin, err := unshare.HasCapSysAdmin()
   173  	if err != nil {
   174  		return false, 0, err
   175  	}
   176  	if hasCapSysAdmin || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
   177  		return false, 0, nil
   178  	}
   179  
   180  	cPausePid := C.CString(pausePid)
   181  	defer C.free(unsafe.Pointer(cPausePid))
   182  
   183  	pidC := C.reexec_userns_join(C.int(pid), cPausePid)
   184  	if int(pidC) < 0 {
   185  		return false, -1, errors.Errorf("cannot re-exec process")
   186  	}
   187  
   188  	ret := C.reexec_in_user_namespace_wait(pidC, 0)
   189  	if ret < 0 {
   190  		return false, -1, errors.New("waiting for the re-exec process")
   191  	}
   192  
   193  	return true, int(ret), nil
   194  }
   195  
   196  // GetConfiguredMappings returns the additional IDs configured for the current user.
   197  func GetConfiguredMappings() ([]idtools.IDMap, []idtools.IDMap, error) {
   198  	var uids, gids []idtools.IDMap
   199  	username := os.Getenv("USER")
   200  	if username == "" {
   201  		var id string
   202  		if os.Geteuid() == 0 {
   203  			id = strconv.Itoa(GetRootlessUID())
   204  		} else {
   205  			id = strconv.Itoa(os.Geteuid())
   206  		}
   207  		userID, err := user.LookupId(id)
   208  		if err == nil {
   209  			username = userID.Username
   210  		}
   211  	}
   212  	mappings, err := idtools.NewIDMappings(username, username)
   213  	if err != nil {
   214  		logLevel := logrus.ErrorLevel
   215  		if os.Geteuid() == 0 && GetRootlessUID() == 0 {
   216  			logLevel = logrus.DebugLevel
   217  		}
   218  		logrus.StandardLogger().Logf(logLevel, "cannot find UID/GID for user %s: %v - check rootless mode in man pages.", username, err)
   219  	} else {
   220  		uids = mappings.UIDs()
   221  		gids = mappings.GIDs()
   222  	}
   223  	return uids, gids, nil
   224  }
   225  
   226  func copyMappings(from, to string) error {
   227  	content, err := ioutil.ReadFile(from)
   228  	if err != nil {
   229  		return err
   230  	}
   231  	// Both runc and crun check whether the current process is in a user namespace
   232  	// by looking up 4294967295 in /proc/self/uid_map.  If the mappings would be
   233  	// copied as they are, the check in the OCI runtimes would fail.  So just split
   234  	// it in two different ranges.
   235  	if bytes.Contains(content, []byte("4294967295")) {
   236  		content = []byte("0 0 1\n1 1 4294967294\n")
   237  	}
   238  	return ioutil.WriteFile(to, content, 0600)
   239  }
   240  
   241  func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ bool, _ int, retErr error) {
   242  	hasCapSysAdmin, err := unshare.HasCapSysAdmin()
   243  	if err != nil {
   244  		return false, 0, err
   245  	}
   246  
   247  	if hasCapSysAdmin || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
   248  		if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" {
   249  			return false, 0, runInUser()
   250  		}
   251  		return false, 0, nil
   252  	}
   253  
   254  	if mounts, err := pmount.GetMounts(); err == nil {
   255  		for _, m := range mounts {
   256  			if m.Mountpoint == "/" {
   257  				isShared := false
   258  				for _, o := range strings.Split(m.Optional, ",") {
   259  					if strings.HasPrefix(o, "shared:") {
   260  						isShared = true
   261  						break
   262  					}
   263  				}
   264  				if !isShared {
   265  					logrus.Warningf("%q is not a shared mount, this could cause issues or missing mounts with rootless containers", m.Mountpoint)
   266  				}
   267  				break
   268  			}
   269  		}
   270  	}
   271  
   272  	cPausePid := C.CString(pausePid)
   273  	defer C.free(unsafe.Pointer(cPausePid))
   274  
   275  	cFileToRead := C.CString(fileToRead)
   276  	defer C.free(unsafe.Pointer(cFileToRead))
   277  	var fileOutputFD C.int
   278  	if fileOutput != nil {
   279  		fileOutputFD = C.int(fileOutput.Fd())
   280  	}
   281  
   282  	runtime.LockOSThread()
   283  	defer runtime.UnlockOSThread()
   284  
   285  	fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_DGRAM, 0)
   286  	if err != nil {
   287  		return false, -1, err
   288  	}
   289  	r, w := os.NewFile(uintptr(fds[0]), "sync host"), os.NewFile(uintptr(fds[1]), "sync child")
   290  
   291  	var pid int
   292  
   293  	defer errorhandling.CloseQuiet(r)
   294  	defer errorhandling.CloseQuiet(w)
   295  	defer func() {
   296  		toWrite := []byte("0")
   297  		if retErr != nil {
   298  			toWrite = []byte("1")
   299  		}
   300  		if _, err := w.Write(toWrite); err != nil {
   301  			logrus.Errorf("Failed to write byte 0: %q", err)
   302  		}
   303  		if retErr != nil && pid > 0 {
   304  			if err := unix.Kill(pid, unix.SIGKILL); err != nil {
   305  				if err != unix.ESRCH {
   306  					logrus.Errorf("Failed to cleanup process %d: %v", pid, err)
   307  				}
   308  			}
   309  			C.reexec_in_user_namespace_wait(C.int(pid), 0)
   310  		}
   311  	}()
   312  
   313  	pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid, cFileToRead, fileOutputFD)
   314  	pid = int(pidC)
   315  	if pid < 0 {
   316  		return false, -1, errors.Errorf("cannot re-exec process")
   317  	}
   318  
   319  	uids, gids, err := GetConfiguredMappings()
   320  	if err != nil {
   321  		return false, -1, err
   322  	}
   323  
   324  	uidMap := fmt.Sprintf("/proc/%d/uid_map", pid)
   325  	gidMap := fmt.Sprintf("/proc/%d/gid_map", pid)
   326  
   327  	uidsMapped := false
   328  
   329  	if err := copyMappings("/proc/self/uid_map", uidMap); err == nil {
   330  		uidsMapped = true
   331  	}
   332  
   333  	if uids != nil && !uidsMapped {
   334  		err := tryMappingTool(true, pid, os.Geteuid(), uids)
   335  		// If some mappings were specified, do not ignore the error
   336  		if err != nil && len(uids) > 0 {
   337  			return false, -1, err
   338  		}
   339  		uidsMapped = err == nil
   340  	}
   341  	if !uidsMapped {
   342  		logrus.Warnf("Using rootless single mapping into the namespace. This might break some images. Check /etc/subuid and /etc/subgid for adding sub*ids if not using a network user")
   343  		setgroups := fmt.Sprintf("/proc/%d/setgroups", pid)
   344  		err = ioutil.WriteFile(setgroups, []byte("deny\n"), 0666)
   345  		if err != nil {
   346  			return false, -1, errors.Wrapf(err, "cannot write setgroups file")
   347  		}
   348  		logrus.Debugf("write setgroups file exited with 0")
   349  
   350  		err = ioutil.WriteFile(uidMap, []byte(fmt.Sprintf("%d %d 1\n", 0, os.Geteuid())), 0666)
   351  		if err != nil {
   352  			return false, -1, errors.Wrapf(err, "cannot write uid_map")
   353  		}
   354  		logrus.Debugf("write uid_map exited with 0")
   355  	}
   356  
   357  	gidsMapped := false
   358  	if err := copyMappings("/proc/self/gid_map", gidMap); err == nil {
   359  		gidsMapped = true
   360  	}
   361  	if gids != nil && !gidsMapped {
   362  		err := tryMappingTool(false, pid, os.Getegid(), gids)
   363  		// If some mappings were specified, do not ignore the error
   364  		if err != nil && len(gids) > 0 {
   365  			return false, -1, err
   366  		}
   367  		gidsMapped = err == nil
   368  	}
   369  	if !gidsMapped {
   370  		err = ioutil.WriteFile(gidMap, []byte(fmt.Sprintf("%d %d 1\n", 0, os.Getegid())), 0666)
   371  		if err != nil {
   372  			return false, -1, errors.Wrapf(err, "cannot write gid_map")
   373  		}
   374  	}
   375  
   376  	_, err = w.Write([]byte("0"))
   377  	if err != nil {
   378  		return false, -1, errors.Wrapf(err, "write to sync pipe")
   379  	}
   380  
   381  	b := make([]byte, 1)
   382  	_, err = w.Read(b)
   383  	if err != nil {
   384  		return false, -1, errors.Wrapf(err, "read from sync pipe")
   385  	}
   386  
   387  	if fileOutput != nil {
   388  		ret := C.reexec_in_user_namespace_wait(pidC, 0)
   389  		if ret < 0 {
   390  			return false, -1, errors.New("waiting for the re-exec process")
   391  		}
   392  
   393  		return true, 0, nil
   394  	}
   395  
   396  	if b[0] == '2' {
   397  		// We have lost the race for writing the PID file, as probably another
   398  		// process created a namespace and wrote the PID.
   399  		// Try to join it.
   400  		data, err := ioutil.ReadFile(pausePid)
   401  		if err == nil {
   402  			pid, err := strconv.ParseUint(string(data), 10, 0)
   403  			if err == nil {
   404  				return joinUserAndMountNS(uint(pid), "")
   405  			}
   406  		}
   407  		return false, -1, errors.New("setting up the process")
   408  	}
   409  
   410  	if b[0] != '0' {
   411  		return false, -1, errors.New("setting up the process")
   412  	}
   413  
   414  	signals := []os.Signal{}
   415  	for sig := 0; sig < numSig; sig++ {
   416  		if sig == int(unix.SIGTSTP) {
   417  			continue
   418  		}
   419  		signals = append(signals, unix.Signal(sig))
   420  	}
   421  
   422  	c := make(chan os.Signal, len(signals))
   423  	gosignal.Notify(c, signals...)
   424  	defer gosignal.Reset()
   425  	go func() {
   426  		for s := range c {
   427  			if s == unix.SIGCHLD || s == unix.SIGPIPE {
   428  				continue
   429  			}
   430  
   431  			if err := unix.Kill(int(pidC), s.(unix.Signal)); err != nil {
   432  				if err != unix.ESRCH {
   433  					logrus.Errorf("Failed to propagate signal to child process %d: %v", int(pidC), err)
   434  				}
   435  			}
   436  		}
   437  	}()
   438  
   439  	ret := C.reexec_in_user_namespace_wait(pidC, 0)
   440  	if ret < 0 {
   441  		return false, -1, errors.New("waiting for the re-exec process")
   442  	}
   443  
   444  	return true, int(ret), nil
   445  }
   446  
   447  // BecomeRootInUserNS re-exec podman in a new userNS.  It returns whether podman was re-executed
   448  // into a new user namespace and the return code from the re-executed podman process.
   449  // If podman was re-executed the caller needs to propagate the error code returned by the child
   450  // process.
   451  func BecomeRootInUserNS(pausePid string) (bool, int, error) {
   452  	return becomeRootInUserNS(pausePid, "", nil)
   453  }
   454  
   455  // TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
   456  // This is useful when there are already running containers and we
   457  // don't have a pause process yet.  We can use the paths to the conmon
   458  // processes to attempt joining their namespaces.
   459  // If needNewNamespace is set, the file is read from a temporary user
   460  // namespace, this is useful for containers that are running with a
   461  // different uidmap and the unprivileged user has no way to read the
   462  // file owned by the root in the container.
   463  func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
   464  	if len(paths) == 0 {
   465  		return BecomeRootInUserNS(pausePidPath)
   466  	}
   467  
   468  	var lastErr error
   469  	var pausePid int
   470  	foundProcess := false
   471  
   472  	for _, path := range paths {
   473  		if !needNewNamespace {
   474  			data, err := ioutil.ReadFile(path)
   475  			if err != nil {
   476  				lastErr = err
   477  				continue
   478  			}
   479  
   480  			pausePid, err = strconv.Atoi(string(data))
   481  			if err != nil {
   482  				lastErr = errors.Wrapf(err, "cannot parse file %s", path)
   483  				continue
   484  			}
   485  
   486  			lastErr = nil
   487  			break
   488  		} else {
   489  			r, w, err := os.Pipe()
   490  			if err != nil {
   491  				lastErr = err
   492  				continue
   493  			}
   494  
   495  			defer errorhandling.CloseQuiet(r)
   496  
   497  			if _, _, err := becomeRootInUserNS("", path, w); err != nil {
   498  				w.Close()
   499  				lastErr = err
   500  				continue
   501  			}
   502  
   503  			if err := w.Close(); err != nil {
   504  				return false, 0, err
   505  			}
   506  			defer func() {
   507  				C.reexec_in_user_namespace_wait(-1, 0)
   508  			}()
   509  
   510  			b := make([]byte, 32)
   511  
   512  			n, err := r.Read(b)
   513  			if err != nil {
   514  				lastErr = errors.Wrapf(err, "cannot read %s\n", path)
   515  				continue
   516  			}
   517  
   518  			pausePid, err = strconv.Atoi(string(b[:n]))
   519  			if err == nil && unix.Kill(pausePid, 0) == nil {
   520  				foundProcess = true
   521  				lastErr = nil
   522  				break
   523  			}
   524  		}
   525  	}
   526  	if !foundProcess && pausePidPath != "" {
   527  		return BecomeRootInUserNS(pausePidPath)
   528  	}
   529  	if lastErr != nil {
   530  		return false, 0, lastErr
   531  	}
   532  
   533  	return joinUserAndMountNS(uint(pausePid), pausePidPath)
   534  }
   535  
   536  // ReadMappingsProc parses and returns the ID mappings at the specified path.
   537  func ReadMappingsProc(path string) ([]idtools.IDMap, error) {
   538  	file, err := os.Open(path)
   539  	if err != nil {
   540  		return nil, err
   541  	}
   542  	defer file.Close()
   543  
   544  	mappings := []idtools.IDMap{}
   545  
   546  	buf := bufio.NewReader(file)
   547  	for {
   548  		line, _, err := buf.ReadLine()
   549  		if err != nil {
   550  			if err == io.EOF {
   551  				return mappings, nil
   552  			}
   553  			return nil, errors.Wrapf(err, "cannot read line from %s", path)
   554  		}
   555  		if line == nil {
   556  			return mappings, nil
   557  		}
   558  
   559  		containerID, hostID, size := 0, 0, 0
   560  		if _, err := fmt.Sscanf(string(line), "%d %d %d", &containerID, &hostID, &size); err != nil {
   561  			return nil, errors.Wrapf(err, "cannot parse %s", string(line))
   562  		}
   563  		mappings = append(mappings, idtools.IDMap{ContainerID: containerID, HostID: hostID, Size: size})
   564  	}
   565  }
   566  
   567  func matches(id int, configuredIDs []idtools.IDMap, currentIDs []idtools.IDMap) bool {
   568  	// The first mapping is the host user, handle it separately.
   569  	if currentIDs[0].HostID != id || currentIDs[0].Size != 1 {
   570  		return false
   571  	}
   572  
   573  	currentIDs = currentIDs[1:]
   574  	if len(currentIDs) != len(configuredIDs) {
   575  		return false
   576  	}
   577  
   578  	// It is fine to iterate sequentially as both slices are sorted.
   579  	for i := range currentIDs {
   580  		if currentIDs[i].HostID != configuredIDs[i].HostID {
   581  			return false
   582  		}
   583  		if currentIDs[i].Size != configuredIDs[i].Size {
   584  			return false
   585  		}
   586  	}
   587  
   588  	return true
   589  }
   590  
   591  // ConfigurationMatches checks whether the additional uids/gids configured for the user
   592  // match the current user namespace.
   593  func ConfigurationMatches() (bool, error) {
   594  	if !IsRootless() || os.Geteuid() != 0 {
   595  		return true, nil
   596  	}
   597  
   598  	uids, gids, err := GetConfiguredMappings()
   599  	if err != nil {
   600  		return false, err
   601  	}
   602  
   603  	currentUIDs, err := ReadMappingsProc("/proc/self/uid_map")
   604  	if err != nil {
   605  		return false, err
   606  	}
   607  
   608  	if !matches(GetRootlessUID(), uids, currentUIDs) {
   609  		return false, err
   610  	}
   611  
   612  	currentGIDs, err := ReadMappingsProc("/proc/self/gid_map")
   613  	if err != nil {
   614  		return false, err
   615  	}
   616  
   617  	return matches(GetRootlessGID(), gids, currentGIDs), nil
   618  }
   619  
   620  // IsFdInherited checks whether the fd is opened and valid to use
   621  func IsFdInherited(fd int) bool {
   622  	return int(C.is_fd_inherited(C.int(fd))) > 0
   623  }