github.com/adityamillind98/moby@v23.0.0-rc.4+incompatible/pkg/rootless/specconv/specconv_linux.go (about)

     1  package specconv // import "github.com/docker/docker/pkg/rootless/specconv"
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"path"
     7  	"path/filepath"
     8  	"strconv"
     9  	"strings"
    10  
    11  	specs "github.com/opencontainers/runtime-spec/specs-go"
    12  	"github.com/sirupsen/logrus"
    13  )
    14  
    15  // ToRootless converts spec to be compatible with "rootless" runc.
    16  // * Remove non-supported cgroups
    17  // * Fix up OOMScoreAdj
    18  // * Fix up /proc if --pid=host
    19  // * Fix up /dev/shm and /dev/mqueue if --ipc=host
    20  //
    21  // v2Controllers should be non-nil only if running with v2 and systemd.
    22  func ToRootless(spec *specs.Spec, v2Controllers []string) error {
    23  	return toRootless(spec, v2Controllers, getCurrentOOMScoreAdj())
    24  }
    25  
    26  func getCurrentOOMScoreAdj() int {
    27  	b, err := os.ReadFile("/proc/self/oom_score_adj")
    28  	if err != nil {
    29  		logrus.WithError(err).Warn("failed to read /proc/self/oom_score_adj")
    30  		return 0
    31  	}
    32  	s := string(b)
    33  	i, err := strconv.Atoi(strings.TrimSpace(s))
    34  	if err != nil {
    35  		logrus.WithError(err).Warnf("failed to parse /proc/self/oom_score_adj (%q)", s)
    36  		return 0
    37  	}
    38  	return i
    39  }
    40  
    41  func toRootless(spec *specs.Spec, v2Controllers []string, currentOOMScoreAdj int) error {
    42  	if len(v2Controllers) == 0 {
    43  		// Remove cgroup settings.
    44  		spec.Linux.Resources = nil
    45  		spec.Linux.CgroupsPath = ""
    46  	} else {
    47  		if spec.Linux.Resources != nil {
    48  			m := make(map[string]struct{})
    49  			for _, s := range v2Controllers {
    50  				m[s] = struct{}{}
    51  			}
    52  			// Remove devices: https://github.com/containers/crun/issues/255
    53  			spec.Linux.Resources.Devices = nil
    54  			if _, ok := m["memory"]; !ok {
    55  				spec.Linux.Resources.Memory = nil
    56  			}
    57  			if _, ok := m["cpu"]; !ok {
    58  				spec.Linux.Resources.CPU = nil
    59  			}
    60  			if _, ok := m["cpuset"]; !ok {
    61  				if spec.Linux.Resources.CPU != nil {
    62  					spec.Linux.Resources.CPU.Cpus = ""
    63  					spec.Linux.Resources.CPU.Mems = ""
    64  				}
    65  			}
    66  			if _, ok := m["pids"]; !ok {
    67  				spec.Linux.Resources.Pids = nil
    68  			}
    69  			if _, ok := m["io"]; !ok {
    70  				spec.Linux.Resources.BlockIO = nil
    71  			}
    72  			if _, ok := m["rdma"]; !ok {
    73  				spec.Linux.Resources.Rdma = nil
    74  			}
    75  			spec.Linux.Resources.HugepageLimits = nil
    76  			spec.Linux.Resources.Network = nil
    77  		}
    78  	}
    79  
    80  	if spec.Process.OOMScoreAdj != nil && *spec.Process.OOMScoreAdj < currentOOMScoreAdj {
    81  		*spec.Process.OOMScoreAdj = currentOOMScoreAdj
    82  	}
    83  
    84  	// Fix up /proc if --pid=host
    85  	pidHost, err := isHostNS(spec, specs.PIDNamespace)
    86  	if err != nil {
    87  		return err
    88  	}
    89  	if pidHost {
    90  		if err := bindMountHostProcfs(spec); err != nil {
    91  			return err
    92  		}
    93  	}
    94  
    95  	// Fix up /dev/shm and /dev/mqueue if --ipc=host
    96  	ipcHost, err := isHostNS(spec, specs.IPCNamespace)
    97  	if err != nil {
    98  		return err
    99  	}
   100  	if ipcHost {
   101  		if err := bindMountHostIPC(spec); err != nil {
   102  			return err
   103  		}
   104  	}
   105  
   106  	return nil
   107  }
   108  
   109  func isHostNS(spec *specs.Spec, nsType specs.LinuxNamespaceType) (bool, error) {
   110  	if strings.Contains(string(nsType), string(os.PathSeparator)) {
   111  		return false, fmt.Errorf("unexpected namespace type %q", nsType)
   112  	}
   113  	for _, ns := range spec.Linux.Namespaces {
   114  		if ns.Type == nsType {
   115  			if ns.Path == "" {
   116  				return false, nil
   117  			}
   118  			ns, err := os.Readlink(ns.Path)
   119  			if err != nil {
   120  				return false, err
   121  			}
   122  			selfNS, err := os.Readlink(filepath.Join("/proc/self/ns", string(nsType)))
   123  			if err != nil {
   124  				return false, err
   125  			}
   126  			return ns == selfNS, nil
   127  		}
   128  	}
   129  	return true, nil
   130  }
   131  
   132  func bindMountHostProcfs(spec *specs.Spec) error {
   133  	// Replace procfs mount with rbind
   134  	// https://github.com/containers/podman/blob/v3.0.0-rc1/pkg/specgen/generate/oci.go#L248-L257
   135  	for i, m := range spec.Mounts {
   136  		if path.Clean(m.Destination) == "/proc" {
   137  			newM := specs.Mount{
   138  				Destination: "/proc",
   139  				Type:        "bind",
   140  				Source:      "/proc",
   141  				Options:     []string{"rbind", "nosuid", "noexec", "nodev"},
   142  			}
   143  			spec.Mounts[i] = newM
   144  		}
   145  	}
   146  
   147  	// Remove ReadonlyPaths for /proc/*
   148  	newROP := spec.Linux.ReadonlyPaths[:0]
   149  	for _, s := range spec.Linux.ReadonlyPaths {
   150  		s = path.Clean(s)
   151  		if !strings.HasPrefix(s, "/proc/") {
   152  			newROP = append(newROP, s)
   153  		}
   154  	}
   155  	spec.Linux.ReadonlyPaths = newROP
   156  
   157  	return nil
   158  }
   159  
   160  // withBindMountHostIPC replaces /dev/shm and /dev/mqueue mount with rbind.
   161  // Required for --ipc=host on rootless.
   162  //
   163  // Based on https://github.com/containerd/nerdctl/blob/v1.1.0/cmd/nerdctl/run.go#L836-L860
   164  func bindMountHostIPC(s *specs.Spec) error {
   165  	for i, m := range s.Mounts {
   166  		switch p := path.Clean(m.Destination); p {
   167  		case "/dev/shm", "/dev/mqueue":
   168  			s.Mounts[i] = specs.Mount{
   169  				Destination: p,
   170  				Type:        "bind",
   171  				Source:      p,
   172  				Options:     []string{"rbind", "nosuid", "noexec", "nodev"},
   173  			}
   174  		}
   175  	}
   176  	return nil
   177  }