github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/pkg/rootless/specconv/specconv_linux.go (about) 1 package specconv // import "github.com/Prakhar-Agarwal-byte/moby/pkg/rootless/specconv" 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "path" 8 "path/filepath" 9 "strconv" 10 "strings" 11 12 "github.com/containerd/log" 13 specs "github.com/opencontainers/runtime-spec/specs-go" 14 ) 15 16 // ToRootfulInRootless is used for "rootful-in-rootless" dind; 17 // the daemon is running in UserNS but has no access to RootlessKit API socket, host filesystem, etc. 18 // 19 // This fuction does: 20 // * Fix up OOMScoreAdj (needed since systemd v250: https://github.com/moby/moby/issues/46563) 21 func ToRootfulInRootless(spec *specs.Spec) { 22 if spec.Process == nil || spec.Process.OOMScoreAdj == nil { 23 return 24 } 25 if currentOOMScoreAdj := getCurrentOOMScoreAdj(); *spec.Process.OOMScoreAdj < currentOOMScoreAdj { 26 *spec.Process.OOMScoreAdj = currentOOMScoreAdj 27 } 28 } 29 30 // ToRootless converts spec to be compatible with "rootless" runc. 31 // * Remove non-supported cgroups 32 // * Fix up OOMScoreAdj 33 // * Fix up /proc if --pid=host 34 // * Fix up /dev/shm and /dev/mqueue if --ipc=host 35 // 36 // v2Controllers should be non-nil only if running with v2 and systemd. 37 func ToRootless(spec *specs.Spec, v2Controllers []string) error { 38 return toRootless(spec, v2Controllers, getCurrentOOMScoreAdj()) 39 } 40 41 func getCurrentOOMScoreAdj() int { 42 b, err := os.ReadFile("/proc/self/oom_score_adj") 43 if err != nil { 44 log.G(context.TODO()).WithError(err).Warn("failed to read /proc/self/oom_score_adj") 45 return 0 46 } 47 s := string(b) 48 i, err := strconv.Atoi(strings.TrimSpace(s)) 49 if err != nil { 50 log.G(context.TODO()).WithError(err).Warnf("failed to parse /proc/self/oom_score_adj (%q)", s) 51 return 0 52 } 53 return i 54 } 55 56 func toRootless(spec *specs.Spec, v2Controllers []string, currentOOMScoreAdj int) error { 57 if len(v2Controllers) == 0 { 58 if spec.Linux != nil { 59 // Remove cgroup settings. 60 spec.Linux.Resources = nil 61 spec.Linux.CgroupsPath = "" 62 } 63 } else { 64 if spec.Linux != nil && spec.Linux.Resources != nil { 65 m := make(map[string]struct{}) 66 for _, s := range v2Controllers { 67 m[s] = struct{}{} 68 } 69 // Remove devices: https://github.com/containers/crun/issues/255 70 spec.Linux.Resources.Devices = nil 71 if _, ok := m["memory"]; !ok { 72 spec.Linux.Resources.Memory = nil 73 } 74 if _, ok := m["cpu"]; !ok { 75 spec.Linux.Resources.CPU = nil 76 } 77 if _, ok := m["cpuset"]; !ok { 78 if spec.Linux.Resources.CPU != nil { 79 spec.Linux.Resources.CPU.Cpus = "" 80 spec.Linux.Resources.CPU.Mems = "" 81 } 82 } 83 if _, ok := m["pids"]; !ok { 84 spec.Linux.Resources.Pids = nil 85 } 86 if _, ok := m["io"]; !ok { 87 spec.Linux.Resources.BlockIO = nil 88 } 89 if _, ok := m["rdma"]; !ok { 90 spec.Linux.Resources.Rdma = nil 91 } 92 spec.Linux.Resources.HugepageLimits = nil 93 spec.Linux.Resources.Network = nil 94 } 95 } 96 97 if spec.Process != nil && spec.Process.OOMScoreAdj != nil && *spec.Process.OOMScoreAdj < currentOOMScoreAdj { 98 *spec.Process.OOMScoreAdj = currentOOMScoreAdj 99 } 100 101 // Fix up /proc if --pid=host 102 pidHost, err := isHostNS(spec, specs.PIDNamespace) 103 if err != nil { 104 return err 105 } 106 if pidHost { 107 if err := bindMountHostProcfs(spec); err != nil { 108 return err 109 } 110 } 111 112 // Fix up /dev/shm and /dev/mqueue if --ipc=host 113 ipcHost, err := isHostNS(spec, specs.IPCNamespace) 114 if err != nil { 115 return err 116 } 117 if ipcHost { 118 if err := bindMountHostIPC(spec); err != nil { 119 return err 120 } 121 } 122 123 return nil 124 } 125 126 func isHostNS(spec *specs.Spec, nsType specs.LinuxNamespaceType) (bool, error) { 127 if strings.Contains(string(nsType), string(os.PathSeparator)) { 128 return false, fmt.Errorf("unexpected namespace type %q", nsType) 129 } 130 if spec.Linux == nil { 131 return false, nil 132 } 133 for _, ns := range spec.Linux.Namespaces { 134 if ns.Type == nsType { 135 if ns.Path == "" { 136 return false, nil 137 } 138 ns, err := os.Readlink(ns.Path) 139 if err != nil { 140 return false, err 141 } 142 selfNS, err := os.Readlink(filepath.Join("/proc/self/ns", string(nsType))) 143 if err != nil { 144 return false, err 145 } 146 return ns == selfNS, nil 147 } 148 } 149 return true, nil 150 } 151 152 func bindMountHostProcfs(spec *specs.Spec) error { 153 // Replace procfs mount with rbind 154 // https://github.com/containers/podman/blob/v3.0.0-rc1/pkg/specgen/generate/oci.go#L248-L257 155 for i, m := range spec.Mounts { 156 if path.Clean(m.Destination) == "/proc" { 157 newM := specs.Mount{ 158 Destination: "/proc", 159 Type: "bind", 160 Source: "/proc", 161 Options: []string{"rbind", "nosuid", "noexec", "nodev"}, 162 } 163 spec.Mounts[i] = newM 164 } 165 } 166 167 if spec.Linux != nil { 168 // Remove ReadonlyPaths for /proc/* 169 newROP := spec.Linux.ReadonlyPaths[:0] 170 for _, s := range spec.Linux.ReadonlyPaths { 171 s = path.Clean(s) 172 if !strings.HasPrefix(s, "/proc/") { 173 newROP = append(newROP, s) 174 } 175 } 176 spec.Linux.ReadonlyPaths = newROP 177 } 178 179 return nil 180 } 181 182 // withBindMountHostIPC replaces /dev/shm and /dev/mqueue mount with rbind. 183 // Required for --ipc=host on rootless. 184 // 185 // Based on https://github.com/containerd/nerdctl/blob/v1.1.0/cmd/nerdctl/run.go#L836-L860 186 func bindMountHostIPC(s *specs.Spec) error { 187 for i, m := range s.Mounts { 188 switch p := path.Clean(m.Destination); p { 189 case "/dev/shm", "/dev/mqueue": 190 s.Mounts[i] = specs.Mount{ 191 Destination: p, 192 Type: "bind", 193 Source: p, 194 Options: []string{"rbind", "nosuid", "noexec", "nodev"}, 195 } 196 } 197 } 198 return nil 199 }