github.com/rawahars/moby@v24.0.4+incompatible/pkg/rootless/specconv/specconv_linux.go (about)

     1  package specconv // import "github.com/docker/docker/pkg/rootless/specconv"
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"path"
     7  	"path/filepath"
     8  	"strconv"
     9  	"strings"
    10  
    11  	specs "github.com/opencontainers/runtime-spec/specs-go"
    12  	"github.com/sirupsen/logrus"
    13  )
    14  
    15  // ToRootless converts spec to be compatible with "rootless" runc.
    16  // * Remove non-supported cgroups
    17  // * Fix up OOMScoreAdj
    18  // * Fix up /proc if --pid=host
    19  // * Fix up /dev/shm and /dev/mqueue if --ipc=host
    20  //
    21  // v2Controllers should be non-nil only if running with v2 and systemd.
    22  func ToRootless(spec *specs.Spec, v2Controllers []string) error {
    23  	return toRootless(spec, v2Controllers, getCurrentOOMScoreAdj())
    24  }
    25  
    26  func getCurrentOOMScoreAdj() int {
    27  	b, err := os.ReadFile("/proc/self/oom_score_adj")
    28  	if err != nil {
    29  		logrus.WithError(err).Warn("failed to read /proc/self/oom_score_adj")
    30  		return 0
    31  	}
    32  	s := string(b)
    33  	i, err := strconv.Atoi(strings.TrimSpace(s))
    34  	if err != nil {
    35  		logrus.WithError(err).Warnf("failed to parse /proc/self/oom_score_adj (%q)", s)
    36  		return 0
    37  	}
    38  	return i
    39  }
    40  
    41  func toRootless(spec *specs.Spec, v2Controllers []string, currentOOMScoreAdj int) error {
    42  	if len(v2Controllers) == 0 {
    43  		if spec.Linux != nil {
    44  			// Remove cgroup settings.
    45  			spec.Linux.Resources = nil
    46  			spec.Linux.CgroupsPath = ""
    47  		}
    48  	} else {
    49  		if spec.Linux != nil && spec.Linux.Resources != nil {
    50  			m := make(map[string]struct{})
    51  			for _, s := range v2Controllers {
    52  				m[s] = struct{}{}
    53  			}
    54  			// Remove devices: https://github.com/containers/crun/issues/255
    55  			spec.Linux.Resources.Devices = nil
    56  			if _, ok := m["memory"]; !ok {
    57  				spec.Linux.Resources.Memory = nil
    58  			}
    59  			if _, ok := m["cpu"]; !ok {
    60  				spec.Linux.Resources.CPU = nil
    61  			}
    62  			if _, ok := m["cpuset"]; !ok {
    63  				if spec.Linux.Resources.CPU != nil {
    64  					spec.Linux.Resources.CPU.Cpus = ""
    65  					spec.Linux.Resources.CPU.Mems = ""
    66  				}
    67  			}
    68  			if _, ok := m["pids"]; !ok {
    69  				spec.Linux.Resources.Pids = nil
    70  			}
    71  			if _, ok := m["io"]; !ok {
    72  				spec.Linux.Resources.BlockIO = nil
    73  			}
    74  			if _, ok := m["rdma"]; !ok {
    75  				spec.Linux.Resources.Rdma = nil
    76  			}
    77  			spec.Linux.Resources.HugepageLimits = nil
    78  			spec.Linux.Resources.Network = nil
    79  		}
    80  	}
    81  
    82  	if spec.Process != nil && spec.Process.OOMScoreAdj != nil && *spec.Process.OOMScoreAdj < currentOOMScoreAdj {
    83  		*spec.Process.OOMScoreAdj = currentOOMScoreAdj
    84  	}
    85  
    86  	// Fix up /proc if --pid=host
    87  	pidHost, err := isHostNS(spec, specs.PIDNamespace)
    88  	if err != nil {
    89  		return err
    90  	}
    91  	if pidHost {
    92  		if err := bindMountHostProcfs(spec); err != nil {
    93  			return err
    94  		}
    95  	}
    96  
    97  	// Fix up /dev/shm and /dev/mqueue if --ipc=host
    98  	ipcHost, err := isHostNS(spec, specs.IPCNamespace)
    99  	if err != nil {
   100  		return err
   101  	}
   102  	if ipcHost {
   103  		if err := bindMountHostIPC(spec); err != nil {
   104  			return err
   105  		}
   106  	}
   107  
   108  	return nil
   109  }
   110  
   111  func isHostNS(spec *specs.Spec, nsType specs.LinuxNamespaceType) (bool, error) {
   112  	if strings.Contains(string(nsType), string(os.PathSeparator)) {
   113  		return false, fmt.Errorf("unexpected namespace type %q", nsType)
   114  	}
   115  	if spec.Linux == nil {
   116  		return false, nil
   117  	}
   118  	for _, ns := range spec.Linux.Namespaces {
   119  		if ns.Type == nsType {
   120  			if ns.Path == "" {
   121  				return false, nil
   122  			}
   123  			ns, err := os.Readlink(ns.Path)
   124  			if err != nil {
   125  				return false, err
   126  			}
   127  			selfNS, err := os.Readlink(filepath.Join("/proc/self/ns", string(nsType)))
   128  			if err != nil {
   129  				return false, err
   130  			}
   131  			return ns == selfNS, nil
   132  		}
   133  	}
   134  	return true, nil
   135  }
   136  
   137  func bindMountHostProcfs(spec *specs.Spec) error {
   138  	// Replace procfs mount with rbind
   139  	// https://github.com/containers/podman/blob/v3.0.0-rc1/pkg/specgen/generate/oci.go#L248-L257
   140  	for i, m := range spec.Mounts {
   141  		if path.Clean(m.Destination) == "/proc" {
   142  			newM := specs.Mount{
   143  				Destination: "/proc",
   144  				Type:        "bind",
   145  				Source:      "/proc",
   146  				Options:     []string{"rbind", "nosuid", "noexec", "nodev"},
   147  			}
   148  			spec.Mounts[i] = newM
   149  		}
   150  	}
   151  
   152  	if spec.Linux != nil {
   153  		// Remove ReadonlyPaths for /proc/*
   154  		newROP := spec.Linux.ReadonlyPaths[:0]
   155  		for _, s := range spec.Linux.ReadonlyPaths {
   156  			s = path.Clean(s)
   157  			if !strings.HasPrefix(s, "/proc/") {
   158  				newROP = append(newROP, s)
   159  			}
   160  		}
   161  		spec.Linux.ReadonlyPaths = newROP
   162  	}
   163  
   164  	return nil
   165  }
   166  
   167  // withBindMountHostIPC replaces /dev/shm and /dev/mqueue mount with rbind.
   168  // Required for --ipc=host on rootless.
   169  //
   170  // Based on https://github.com/containerd/nerdctl/blob/v1.1.0/cmd/nerdctl/run.go#L836-L860
   171  func bindMountHostIPC(s *specs.Spec) error {
   172  	for i, m := range s.Mounts {
   173  		switch p := path.Clean(m.Destination); p {
   174  		case "/dev/shm", "/dev/mqueue":
   175  			s.Mounts[i] = specs.Mount{
   176  				Destination: p,
   177  				Type:        "bind",
   178  				Source:      p,
   179  				Options:     []string{"rbind", "nosuid", "noexec", "nodev"},
   180  			}
   181  		}
   182  	}
   183  	return nil
   184  }