github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/contrib/cmd/memfd-bind/memfd-bind.go (about)

     1  /*
     2   * Copyright (c) 2023 SUSE LLC
     3   * Copyright (c) 2023 Aleksa Sarai <cyphar@cyphar.com>
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   */
    17  
    18  package main
    19  
    20  import (
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"os"
    25  	"os/signal"
    26  	"runtime"
    27  	"strings"
    28  	"time"
    29  
    30  	"github.com/opencontainers/runc/libcontainer/dmz"
    31  
    32  	"github.com/sirupsen/logrus"
    33  	"github.com/urfave/cli"
    34  	"golang.org/x/sys/unix"
    35  )
    36  
    37  // version will be populated by the Makefile, read from
    38  // VERSION file of the source code.
    39  var version = ""
    40  
    41  // gitCommit will be the hash that the binary was built from
    42  // and will be populated by the Makefile.
    43  var gitCommit = ""
    44  
    45  const (
    46  	usage = `Open Container Initiative contrib/cmd/memfd-bind
    47  
    48  In order to protect against certain container attacks, every runc invocation
    49  that involves creating or joining a container will cause runc to make a copy of
    50  the runc binary in memory (usually to a memfd). While "runc init" is very
    51  short-lived, this extra memory usage can cause problems for containers with
    52  very small memory limits (or containers that have many "runc exec" invocations
    53  applied to them at the same time).
    54  
    55  memfd-bind is a tool to create a persistent memfd-sealed-copy of the runc binary,
    56  which will cause runc to not make its own copy. This means you can get the
    57  benefits of using a sealed memfd as runc's binary (even in a container breakout
    58  attack to get write access to the runc binary, neither the underlying binary
    59  nor the memfd copy can be changed).
    60  
    61  To use memfd-bind, just specify which path you want to create a socket path at
    62  which you want to receive terminals:
    63  
    64      $ sudo memfd-bind /usr/bin/runc
    65  
    66  Note that (due to kernel restrictions on bind-mounts), this program must remain
    67  running on the host in order for the binary to be readable (it is recommended
    68  you use a systemd unit to keep this process around).
    69  
    70  If this program dies, there will be a leftover mountpoint that always returns
    71  -EINVAL when attempting to access it. You need to use memfd-bind --cleanup on the
    72  path in order to unmount the path (regular umount(8) will not work):
    73  
    74      $ sudo memfd-bind --cleanup /usr/bin/runc
    75  
    76  Note that (due to restrictions on /proc/$pid/fd/$fd magic-link resolution),
    77  only privileged users (specifically, those that have ptrace privileges over the
    78  memfd-bind daemon) can access the memfd bind-mount. This means that using this
    79  tool to harden your /usr/bin/runc binary would result in unprivileged users
    80  being unable to execute the binary. If this is an issue, you could make all
    81  privileged process use a different copy of runc (by making a copy in somewhere
    82  like /usr/sbin/runc) and only using memfd-bind for the version used by
    83  privileged users.
    84  `
    85  )
    86  
    87  func cleanup(path string) error {
    88  	file, err := os.OpenFile(path, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
    89  	if err != nil {
    90  		return fmt.Errorf("cleanup: failed to open runc binary path: %w", err)
    91  	}
    92  	defer file.Close()
    93  	fdPath := fmt.Sprintf("/proc/self/fd/%d", file.Fd())
    94  
    95  	// Keep umounting until we hit a umount error.
    96  	for unix.Unmount(fdPath, unix.MNT_DETACH) == nil {
    97  		// loop...
    98  		logrus.Debugf("memfd-bind: path %q unmount succeeded...", path)
    99  	}
   100  	logrus.Infof("memfd-bind: path %q has been cleared of all old bind-mounts", path)
   101  	return nil
   102  }
   103  
   104  // memfdClone is a memfd-only implementation of dmz.CloneBinary.
   105  func memfdClone(path string) (*os.File, error) {
   106  	binFile, err := os.Open(path)
   107  	if err != nil {
   108  		return nil, fmt.Errorf("failed to open runc binary path: %w", err)
   109  	}
   110  	defer binFile.Close()
   111  	stat, err := binFile.Stat()
   112  	if err != nil {
   113  		return nil, fmt.Errorf("checking %s size: %w", path, err)
   114  	}
   115  	size := stat.Size()
   116  	memfd, sealFn, err := dmz.Memfd("/proc/self/exe")
   117  	if err != nil {
   118  		return nil, fmt.Errorf("creating memfd failed: %w", err)
   119  	}
   120  	copied, err := io.Copy(memfd, binFile)
   121  	if err != nil {
   122  		return nil, fmt.Errorf("copy binary: %w", err)
   123  	} else if copied != size {
   124  		return nil, fmt.Errorf("copied binary size mismatch: %d != %d", copied, size)
   125  	}
   126  	if err := sealFn(&memfd); err != nil {
   127  		return nil, fmt.Errorf("could not seal fd: %w", err)
   128  	}
   129  	if !dmz.IsCloned(memfd) {
   130  		return nil, fmt.Errorf("cloned memfd is not properly sealed")
   131  	}
   132  	return memfd, nil
   133  }
   134  
   135  func mount(path string) error {
   136  	memfdFile, err := memfdClone(path)
   137  	if err != nil {
   138  		return fmt.Errorf("memfd clone: %w", err)
   139  	}
   140  	defer memfdFile.Close()
   141  	memfdPath := fmt.Sprintf("/proc/self/fd/%d", memfdFile.Fd())
   142  
   143  	// We have to open an O_NOFOLLOW|O_PATH to the memfd magic-link because we
   144  	// cannot bind-mount the memfd itself (it's in the internal kernel mount
   145  	// namespace and cross-mount-namespace bind-mounts are not allowed). This
   146  	// also requires that this program stay alive continuously for the
   147  	// magic-link to stay alive...
   148  	memfdLink, err := os.OpenFile(memfdPath, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
   149  	if err != nil {
   150  		return fmt.Errorf("mount: failed to /proc/self/fd magic-link for memfd: %w", err)
   151  	}
   152  	defer memfdLink.Close()
   153  	memfdLinkFdPath := fmt.Sprintf("/proc/self/fd/%d", memfdLink.Fd())
   154  
   155  	exeFile, err := os.OpenFile(path, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
   156  	if err != nil {
   157  		return fmt.Errorf("mount: failed to open target runc binary path: %w", err)
   158  	}
   159  	defer exeFile.Close()
   160  	exeFdPath := fmt.Sprintf("/proc/self/fd/%d", exeFile.Fd())
   161  
   162  	err = unix.Mount(memfdLinkFdPath, exeFdPath, "", unix.MS_BIND, "")
   163  	if err != nil {
   164  		return fmt.Errorf("mount: failed to mount memfd on top of runc binary path target: %w", err)
   165  	}
   166  
   167  	// If there is a signal we want to do cleanup.
   168  	sigCh := make(chan os.Signal, 1)
   169  	signal.Notify(sigCh, os.Interrupt, unix.SIGTERM, unix.SIGINT)
   170  	go func() {
   171  		<-sigCh
   172  		logrus.Infof("memfd-bind: exit signal caught! cleaning up the bind-mount on %q...", path)
   173  		_ = cleanup(path)
   174  		os.Exit(0)
   175  	}()
   176  
   177  	// Clean up things we don't need...
   178  	_ = exeFile.Close()
   179  	_ = memfdLink.Close()
   180  
   181  	// We now have to stay alive to keep the magic-link alive...
   182  	logrus.Infof("memfd-bind: bind-mount of memfd over %q created -- looping forever!", path)
   183  	for {
   184  		// loop forever...
   185  		time.Sleep(time.Duration(1<<63 - 1))
   186  		// make sure the memfd isn't gc'd
   187  		runtime.KeepAlive(memfdFile)
   188  	}
   189  }
   190  
   191  func main() {
   192  	app := cli.NewApp()
   193  	app.Name = "memfd-bind"
   194  	app.Usage = usage
   195  
   196  	// Set version to be the same as runC.
   197  	var v []string
   198  	if version != "" {
   199  		v = append(v, version)
   200  	}
   201  	if gitCommit != "" {
   202  		v = append(v, "commit: "+gitCommit)
   203  	}
   204  	app.Version = strings.Join(v, "\n")
   205  
   206  	// Set the flags.
   207  	app.Flags = []cli.Flag{
   208  		cli.BoolFlag{
   209  			Name:  "cleanup",
   210  			Usage: "Do not create a new memfd-sealed file, only clean up an existing one at <path>.",
   211  		},
   212  		cli.BoolFlag{
   213  			Name:  "debug",
   214  			Usage: "Enable debug logging.",
   215  		},
   216  	}
   217  
   218  	app.Action = func(ctx *cli.Context) error {
   219  		args := ctx.Args()
   220  		if len(args) != 1 {
   221  			return errors.New("need to specify a single path to the runc binary")
   222  		}
   223  		path := ctx.Args()[0]
   224  
   225  		if ctx.Bool("debug") {
   226  			logrus.SetLevel(logrus.DebugLevel)
   227  		}
   228  
   229  		err := cleanup(path)
   230  		// We only care about cleanup errors when doing --cleanup.
   231  		if ctx.Bool("cleanup") {
   232  			return err
   233  		}
   234  		return mount(path)
   235  	}
   236  	if err := app.Run(os.Args); err != nil {
   237  		fmt.Fprintf(os.Stderr, "memfd-bind: %v\n", err)
   238  		os.Exit(1)
   239  	}
   240  }