github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/dmz/cloned_binary_linux.go (about)

     1  package dmz
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"os"
     8  	"strconv"
     9  
    10  	"github.com/sirupsen/logrus"
    11  	"golang.org/x/sys/unix"
    12  
    13  	"github.com/opencontainers/runc/libcontainer/system"
    14  )
    15  
    16  type SealFunc func(**os.File) error
    17  
    18  var (
    19  	_ SealFunc = sealMemfd
    20  	_ SealFunc = sealFile
    21  )
    22  
    23  func isExecutable(f *os.File) bool {
    24  	if err := unix.Faccessat(int(f.Fd()), "", unix.X_OK, unix.AT_EACCESS|unix.AT_EMPTY_PATH); err == nil {
    25  		return true
    26  	} else if err == unix.EACCES {
    27  		return false
    28  	}
    29  	path := "/proc/self/fd/" + strconv.Itoa(int(f.Fd()))
    30  	if err := unix.Access(path, unix.X_OK); err == nil {
    31  		return true
    32  	} else if err == unix.EACCES {
    33  		return false
    34  	}
    35  	// Cannot check -- assume it's executable (if not, exec will fail).
    36  	logrus.Debugf("cannot do X_OK check on binary %s -- assuming it's executable", f.Name())
    37  	return true
    38  }
    39  
    40  const baseMemfdSeals = unix.F_SEAL_SEAL | unix.F_SEAL_SHRINK | unix.F_SEAL_GROW | unix.F_SEAL_WRITE
    41  
    42  func sealMemfd(f **os.File) error {
    43  	if err := (*f).Chmod(0o511); err != nil {
    44  		return err
    45  	}
    46  	// Try to set the newer memfd sealing flags, but we ignore
    47  	// errors because they are not needed and we want to continue
    48  	// to work on older kernels.
    49  	fd := (*f).Fd()
    50  	// F_SEAL_FUTURE_WRITE -- Linux 5.1
    51  	_, _ = unix.FcntlInt(fd, unix.F_ADD_SEALS, unix.F_SEAL_FUTURE_WRITE)
    52  	// F_SEAL_EXEC -- Linux 6.3
    53  	const F_SEAL_EXEC = 0x20 //nolint:revive // this matches the unix.* name
    54  	_, _ = unix.FcntlInt(fd, unix.F_ADD_SEALS, F_SEAL_EXEC)
    55  	// Apply all original memfd seals.
    56  	_, err := unix.FcntlInt(fd, unix.F_ADD_SEALS, baseMemfdSeals)
    57  	return os.NewSyscallError("fcntl(F_ADD_SEALS)", err)
    58  }
    59  
    60  // Memfd creates a sealable executable memfd (supported since Linux 3.17).
    61  func Memfd(comment string) (*os.File, SealFunc, error) {
    62  	file, err := system.ExecutableMemfd("runc_cloned:"+comment, unix.MFD_ALLOW_SEALING|unix.MFD_CLOEXEC)
    63  	return file, sealMemfd, err
    64  }
    65  
    66  func sealFile(f **os.File) error {
    67  	if err := (*f).Chmod(0o511); err != nil {
    68  		return err
    69  	}
    70  	// When sealing an O_TMPFILE-style descriptor we need to
    71  	// re-open the path as O_PATH to clear the existing write
    72  	// handle we have.
    73  	opath, err := os.OpenFile(fmt.Sprintf("/proc/self/fd/%d", (*f).Fd()), unix.O_PATH|unix.O_CLOEXEC, 0)
    74  	if err != nil {
    75  		return fmt.Errorf("reopen tmpfile: %w", err)
    76  	}
    77  	_ = (*f).Close()
    78  	*f = opath
    79  	return nil
    80  }
    81  
    82  // otmpfile creates an open(O_TMPFILE) file in the given directory (supported
    83  // since Linux 3.11).
    84  func otmpfile(dir string) (*os.File, SealFunc, error) {
    85  	file, err := os.OpenFile(dir, unix.O_TMPFILE|unix.O_RDWR|unix.O_EXCL|unix.O_CLOEXEC, 0o700)
    86  	if err != nil {
    87  		return nil, nil, fmt.Errorf("O_TMPFILE creation failed: %w", err)
    88  	}
    89  	// Make sure we actually got an unlinked O_TMPFILE descriptor.
    90  	var stat unix.Stat_t
    91  	if err := unix.Fstat(int(file.Fd()), &stat); err != nil {
    92  		file.Close()
    93  		return nil, nil, fmt.Errorf("cannot fstat O_TMPFILE fd: %w", err)
    94  	} else if stat.Nlink != 0 {
    95  		file.Close()
    96  		return nil, nil, errors.New("O_TMPFILE has non-zero nlink")
    97  	}
    98  	return file, sealFile, err
    99  }
   100  
   101  // mktemp creates a classic unlinked file in the given directory.
   102  func mktemp(dir string) (*os.File, SealFunc, error) {
   103  	file, err := os.CreateTemp(dir, "runc.")
   104  	if err != nil {
   105  		return nil, nil, err
   106  	}
   107  	// Unlink the file and verify it was unlinked.
   108  	if err := os.Remove(file.Name()); err != nil {
   109  		return nil, nil, fmt.Errorf("unlinking classic tmpfile: %w", err)
   110  	}
   111  	var stat unix.Stat_t
   112  	if err := unix.Fstat(int(file.Fd()), &stat); err != nil {
   113  		return nil, nil, fmt.Errorf("cannot fstat classic tmpfile: %w", err)
   114  	} else if stat.Nlink != 0 {
   115  		return nil, nil, fmt.Errorf("classic tmpfile %s has non-zero nlink after unlink", file.Name())
   116  	}
   117  	return file, sealFile, err
   118  }
   119  
   120  func getSealableFile(comment, tmpDir string) (file *os.File, sealFn SealFunc, err error) {
   121  	// First, try an executable memfd (supported since Linux 3.17).
   122  	file, sealFn, err = Memfd(comment)
   123  	if err == nil {
   124  		return
   125  	}
   126  	logrus.Debugf("memfd cloned binary failed, falling back to O_TMPFILE: %v", err)
   127  
   128  	// The tmpDir here (c.root) might be mounted noexec, so we need a couple of
   129  	// fallbacks to try. It's possible that none of these are writable and
   130  	// executable, in which case there's nothing we can practically do (other
   131  	// than mounting our own executable tmpfs, which would have its own
   132  	// issues).
   133  	tmpDirs := []string{
   134  		tmpDir,
   135  		os.TempDir(),
   136  		"/tmp",
   137  		".",
   138  		"/bin",
   139  		"/",
   140  	}
   141  
   142  	// Try to fallback to O_TMPFILE (supported since Linux 3.11).
   143  	for _, dir := range tmpDirs {
   144  		file, sealFn, err = otmpfile(dir)
   145  		if err != nil {
   146  			continue
   147  		}
   148  		if !isExecutable(file) {
   149  			logrus.Debugf("tmpdir %s is noexec -- trying a different tmpdir", dir)
   150  			file.Close()
   151  			continue
   152  		}
   153  		return
   154  	}
   155  	logrus.Debugf("O_TMPFILE cloned binary failed, falling back to mktemp(): %v", err)
   156  	// Finally, try a classic unlinked temporary file.
   157  	for _, dir := range tmpDirs {
   158  		file, sealFn, err = mktemp(dir)
   159  		if err != nil {
   160  			continue
   161  		}
   162  		if !isExecutable(file) {
   163  			logrus.Debugf("tmpdir %s is noexec -- trying a different tmpdir", dir)
   164  			file.Close()
   165  			continue
   166  		}
   167  		return
   168  	}
   169  	return nil, nil, fmt.Errorf("could not create sealable file for cloned binary: %w", err)
   170  }
   171  
   172  // CloneBinary creates a "sealed" clone of a given binary, which can be used to
   173  // thwart attempts by the container process to gain access to host binaries
   174  // through procfs magic-link shenanigans. For more details on why this is
   175  // necessary, see CVE-2019-5736.
   176  func CloneBinary(src io.Reader, size int64, name, tmpDir string) (*os.File, error) {
   177  	logrus.Debugf("cloning %s binary (%d bytes)", name, size)
   178  	file, sealFn, err := getSealableFile(name, tmpDir)
   179  	if err != nil {
   180  		return nil, err
   181  	}
   182  	copied, err := system.Copy(file, src)
   183  	if err != nil {
   184  		file.Close()
   185  		return nil, fmt.Errorf("copy binary: %w", err)
   186  	} else if copied != size {
   187  		file.Close()
   188  		return nil, fmt.Errorf("copied binary size mismatch: %d != %d", copied, size)
   189  	}
   190  	if err := sealFn(&file); err != nil {
   191  		file.Close()
   192  		return nil, fmt.Errorf("could not seal fd: %w", err)
   193  	}
   194  	return file, nil
   195  }
   196  
   197  // IsCloned returns whether the given file can be guaranteed to be a safe exe.
   198  func IsCloned(exe *os.File) bool {
   199  	seals, err := unix.FcntlInt(exe.Fd(), unix.F_GET_SEALS, 0)
   200  	if err != nil {
   201  		// /proc/self/exe is probably not a memfd
   202  		logrus.Debugf("F_GET_SEALS on %s failed: %v", exe.Name(), err)
   203  		return false
   204  	}
   205  	// The memfd must have all of the base seals applied.
   206  	logrus.Debugf("checking %s memfd seals: 0x%x", exe.Name(), seals)
   207  	return seals&baseMemfdSeals == baseMemfdSeals
   208  }
   209  
   210  // CloneSelfExe makes a clone of the current process's binary (through
   211  // /proc/self/exe). This binary can then be used for "runc init" in order to
   212  // make sure the container process can never resolve the original runc binary.
   213  // For more details on why this is necessary, see CVE-2019-5736.
   214  func CloneSelfExe(tmpDir string) (*os.File, error) {
   215  	selfExe, err := os.Open("/proc/self/exe")
   216  	if err != nil {
   217  		return nil, fmt.Errorf("opening current binary: %w", err)
   218  	}
   219  	defer selfExe.Close()
   220  
   221  	stat, err := selfExe.Stat()
   222  	if err != nil {
   223  		return nil, fmt.Errorf("checking /proc/self/exe size: %w", err)
   224  	}
   225  	size := stat.Size()
   226  
   227  	return CloneBinary(selfExe, size, "/proc/self/exe", tmpDir)
   228  }
   229  
   230  // IsSelfExeCloned returns whether /proc/self/exe is a cloned binary that can
   231  // be guaranteed to be safe. This means that it must be a sealed memfd. Other
   232  // types of clones cannot be completely verified as safe.
   233  func IsSelfExeCloned() bool {
   234  	selfExe, err := os.Open("/proc/self/exe")
   235  	if err != nil {
   236  		logrus.Debugf("open /proc/self/exe failed: %v", err)
   237  		return false
   238  	}
   239  	defer selfExe.Close()
   240  	return IsCloned(selfExe)
   241  }