github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/pkg/rootless/specconv/specconv_linux.go (about)

     1  package specconv // import "github.com/Prakhar-Agarwal-byte/moby/pkg/rootless/specconv"
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path"
     8  	"path/filepath"
     9  	"strconv"
    10  	"strings"
    11  
    12  	"github.com/containerd/log"
    13  	specs "github.com/opencontainers/runtime-spec/specs-go"
    14  )
    15  
    16  // ToRootfulInRootless is used for "rootful-in-rootless" dind;
    17  // the daemon is running in UserNS but has no access to RootlessKit API socket, host filesystem, etc.
    18  //
    19  // This fuction does:
    20  // * Fix up OOMScoreAdj (needed since systemd v250: https://github.com/moby/moby/issues/46563)
    21  func ToRootfulInRootless(spec *specs.Spec) {
    22  	if spec.Process == nil || spec.Process.OOMScoreAdj == nil {
    23  		return
    24  	}
    25  	if currentOOMScoreAdj := getCurrentOOMScoreAdj(); *spec.Process.OOMScoreAdj < currentOOMScoreAdj {
    26  		*spec.Process.OOMScoreAdj = currentOOMScoreAdj
    27  	}
    28  }
    29  
    30  // ToRootless converts spec to be compatible with "rootless" runc.
    31  // * Remove non-supported cgroups
    32  // * Fix up OOMScoreAdj
    33  // * Fix up /proc if --pid=host
    34  // * Fix up /dev/shm and /dev/mqueue if --ipc=host
    35  //
    36  // v2Controllers should be non-nil only if running with v2 and systemd.
    37  func ToRootless(spec *specs.Spec, v2Controllers []string) error {
    38  	return toRootless(spec, v2Controllers, getCurrentOOMScoreAdj())
    39  }
    40  
    41  func getCurrentOOMScoreAdj() int {
    42  	b, err := os.ReadFile("/proc/self/oom_score_adj")
    43  	if err != nil {
    44  		log.G(context.TODO()).WithError(err).Warn("failed to read /proc/self/oom_score_adj")
    45  		return 0
    46  	}
    47  	s := string(b)
    48  	i, err := strconv.Atoi(strings.TrimSpace(s))
    49  	if err != nil {
    50  		log.G(context.TODO()).WithError(err).Warnf("failed to parse /proc/self/oom_score_adj (%q)", s)
    51  		return 0
    52  	}
    53  	return i
    54  }
    55  
    56  func toRootless(spec *specs.Spec, v2Controllers []string, currentOOMScoreAdj int) error {
    57  	if len(v2Controllers) == 0 {
    58  		if spec.Linux != nil {
    59  			// Remove cgroup settings.
    60  			spec.Linux.Resources = nil
    61  			spec.Linux.CgroupsPath = ""
    62  		}
    63  	} else {
    64  		if spec.Linux != nil && spec.Linux.Resources != nil {
    65  			m := make(map[string]struct{})
    66  			for _, s := range v2Controllers {
    67  				m[s] = struct{}{}
    68  			}
    69  			// Remove devices: https://github.com/containers/crun/issues/255
    70  			spec.Linux.Resources.Devices = nil
    71  			if _, ok := m["memory"]; !ok {
    72  				spec.Linux.Resources.Memory = nil
    73  			}
    74  			if _, ok := m["cpu"]; !ok {
    75  				spec.Linux.Resources.CPU = nil
    76  			}
    77  			if _, ok := m["cpuset"]; !ok {
    78  				if spec.Linux.Resources.CPU != nil {
    79  					spec.Linux.Resources.CPU.Cpus = ""
    80  					spec.Linux.Resources.CPU.Mems = ""
    81  				}
    82  			}
    83  			if _, ok := m["pids"]; !ok {
    84  				spec.Linux.Resources.Pids = nil
    85  			}
    86  			if _, ok := m["io"]; !ok {
    87  				spec.Linux.Resources.BlockIO = nil
    88  			}
    89  			if _, ok := m["rdma"]; !ok {
    90  				spec.Linux.Resources.Rdma = nil
    91  			}
    92  			spec.Linux.Resources.HugepageLimits = nil
    93  			spec.Linux.Resources.Network = nil
    94  		}
    95  	}
    96  
    97  	if spec.Process != nil && spec.Process.OOMScoreAdj != nil && *spec.Process.OOMScoreAdj < currentOOMScoreAdj {
    98  		*spec.Process.OOMScoreAdj = currentOOMScoreAdj
    99  	}
   100  
   101  	// Fix up /proc if --pid=host
   102  	pidHost, err := isHostNS(spec, specs.PIDNamespace)
   103  	if err != nil {
   104  		return err
   105  	}
   106  	if pidHost {
   107  		if err := bindMountHostProcfs(spec); err != nil {
   108  			return err
   109  		}
   110  	}
   111  
   112  	// Fix up /dev/shm and /dev/mqueue if --ipc=host
   113  	ipcHost, err := isHostNS(spec, specs.IPCNamespace)
   114  	if err != nil {
   115  		return err
   116  	}
   117  	if ipcHost {
   118  		if err := bindMountHostIPC(spec); err != nil {
   119  			return err
   120  		}
   121  	}
   122  
   123  	return nil
   124  }
   125  
   126  func isHostNS(spec *specs.Spec, nsType specs.LinuxNamespaceType) (bool, error) {
   127  	if strings.Contains(string(nsType), string(os.PathSeparator)) {
   128  		return false, fmt.Errorf("unexpected namespace type %q", nsType)
   129  	}
   130  	if spec.Linux == nil {
   131  		return false, nil
   132  	}
   133  	for _, ns := range spec.Linux.Namespaces {
   134  		if ns.Type == nsType {
   135  			if ns.Path == "" {
   136  				return false, nil
   137  			}
   138  			ns, err := os.Readlink(ns.Path)
   139  			if err != nil {
   140  				return false, err
   141  			}
   142  			selfNS, err := os.Readlink(filepath.Join("/proc/self/ns", string(nsType)))
   143  			if err != nil {
   144  				return false, err
   145  			}
   146  			return ns == selfNS, nil
   147  		}
   148  	}
   149  	return true, nil
   150  }
   151  
   152  func bindMountHostProcfs(spec *specs.Spec) error {
   153  	// Replace procfs mount with rbind
   154  	// https://github.com/containers/podman/blob/v3.0.0-rc1/pkg/specgen/generate/oci.go#L248-L257
   155  	for i, m := range spec.Mounts {
   156  		if path.Clean(m.Destination) == "/proc" {
   157  			newM := specs.Mount{
   158  				Destination: "/proc",
   159  				Type:        "bind",
   160  				Source:      "/proc",
   161  				Options:     []string{"rbind", "nosuid", "noexec", "nodev"},
   162  			}
   163  			spec.Mounts[i] = newM
   164  		}
   165  	}
   166  
   167  	if spec.Linux != nil {
   168  		// Remove ReadonlyPaths for /proc/*
   169  		newROP := spec.Linux.ReadonlyPaths[:0]
   170  		for _, s := range spec.Linux.ReadonlyPaths {
   171  			s = path.Clean(s)
   172  			if !strings.HasPrefix(s, "/proc/") {
   173  				newROP = append(newROP, s)
   174  			}
   175  		}
   176  		spec.Linux.ReadonlyPaths = newROP
   177  	}
   178  
   179  	return nil
   180  }
   181  
   182  // withBindMountHostIPC replaces /dev/shm and /dev/mqueue mount with rbind.
   183  // Required for --ipc=host on rootless.
   184  //
   185  // Based on https://github.com/containerd/nerdctl/blob/v1.1.0/cmd/nerdctl/run.go#L836-L860
   186  func bindMountHostIPC(s *specs.Spec) error {
   187  	for i, m := range s.Mounts {
   188  		switch p := path.Clean(m.Destination); p {
   189  		case "/dev/shm", "/dev/mqueue":
   190  			s.Mounts[i] = specs.Mount{
   191  				Destination: p,
   192  				Type:        "bind",
   193  				Source:      p,
   194  				Options:     []string{"rbind", "nosuid", "noexec", "nodev"},
   195  			}
   196  		}
   197  	}
   198  	return nil
   199  }