github.com/hanks177/podman/v4@v4.1.3-0.20220613032544-16d90015bc83/pkg/specgen/generate/oci.go (about) 1 package generate 2 3 import ( 4 "context" 5 "encoding/json" 6 "path" 7 "strings" 8 9 "github.com/containers/common/libimage" 10 "github.com/containers/common/pkg/cgroups" 11 "github.com/containers/common/pkg/config" 12 "github.com/hanks177/podman/v4/libpod" 13 "github.com/hanks177/podman/v4/libpod/define" 14 "github.com/hanks177/podman/v4/pkg/rootless" 15 "github.com/hanks177/podman/v4/pkg/specgen" 16 spec "github.com/opencontainers/runtime-spec/specs-go" 17 "github.com/opencontainers/runtime-tools/generate" 18 "github.com/pkg/errors" 19 "github.com/sirupsen/logrus" 20 "golang.org/x/sys/unix" 21 ) 22 23 func setProcOpts(s *specgen.SpecGenerator, g *generate.Generator) { 24 if s.ProcOpts == nil { 25 return 26 } 27 for i := range g.Config.Mounts { 28 if g.Config.Mounts[i].Destination == "/proc" { 29 g.Config.Mounts[i].Options = s.ProcOpts 30 return 31 } 32 } 33 } 34 35 func addRlimits(s *specgen.SpecGenerator, g *generate.Generator) { 36 var ( 37 isRootless = rootless.IsRootless() 38 nofileSet = false 39 nprocSet = false 40 ) 41 42 if s.Rlimits == nil { 43 g.Config.Process.Rlimits = nil 44 return 45 } 46 47 for _, u := range s.Rlimits { 48 name := "RLIMIT_" + strings.ToUpper(u.Type) 49 if name == "RLIMIT_NOFILE" { 50 nofileSet = true 51 } else if name == "RLIMIT_NPROC" { 52 nprocSet = true 53 } 54 g.AddProcessRlimits(name, u.Hard, u.Soft) 55 } 56 57 // If not explicitly overridden by the user, default number of open 58 // files and number of processes to the maximum they can be set to 59 // (without overriding a sysctl) 60 if !nofileSet { 61 max := define.RLimitDefaultValue 62 current := define.RLimitDefaultValue 63 if isRootless { 64 var rlimit unix.Rlimit 65 if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &rlimit); err != nil { 66 logrus.Warnf("Failed to return RLIMIT_NOFILE ulimit %q", err) 67 } 68 if rlimit.Cur < current { 69 current = rlimit.Cur 70 } 71 if rlimit.Max < max { 72 max = rlimit.Max 73 } 74 } 75 g.AddProcessRlimits("RLIMIT_NOFILE", max, current) 76 } 77 if !nprocSet { 78 max := define.RLimitDefaultValue 79 current := define.RLimitDefaultValue 80 if isRootless { 81 var rlimit unix.Rlimit 82 if err := unix.Getrlimit(unix.RLIMIT_NPROC, &rlimit); err != nil { 83 logrus.Warnf("Failed to return RLIMIT_NPROC ulimit %q", err) 84 } 85 if rlimit.Cur < current { 86 current = rlimit.Cur 87 } 88 if rlimit.Max < max { 89 max = rlimit.Max 90 } 91 } 92 g.AddProcessRlimits("RLIMIT_NPROC", max, current) 93 } 94 } 95 96 // Produce the final command for the container. 97 func makeCommand(s *specgen.SpecGenerator, imageData *libimage.ImageData, rtc *config.Config) ([]string, error) { 98 finalCommand := []string{} 99 100 entrypoint := s.Entrypoint 101 if entrypoint == nil && imageData != nil { 102 entrypoint = imageData.Config.Entrypoint 103 } 104 105 // Don't append the entrypoint if it is [""] 106 if len(entrypoint) != 1 || entrypoint[0] != "" { 107 finalCommand = append(finalCommand, entrypoint...) 108 } 109 110 // Only use image command if the user did not manually set an 111 // entrypoint. 112 command := s.Command 113 if len(command) == 0 && imageData != nil && len(s.Entrypoint) == 0 { 114 command = imageData.Config.Cmd 115 } 116 117 finalCommand = append(finalCommand, command...) 118 119 if len(finalCommand) == 0 { 120 return nil, errors.Errorf("no command or entrypoint provided, and no CMD or ENTRYPOINT from image") 121 } 122 123 if s.Init { 124 initPath := s.InitPath 125 if initPath == "" && rtc != nil { 126 initPath = rtc.Engine.InitPath 127 } 128 if initPath == "" { 129 return nil, errors.Errorf("no path to init binary found but container requested an init") 130 } 131 finalCommand = append([]string{define.ContainerInitPath, "--"}, finalCommand...) 132 } 133 134 return finalCommand, nil 135 } 136 137 // canMountSys is a best-effort heuristic to detect whether mounting a new sysfs is permitted in the container 138 func canMountSys(isRootless, isNewUserns bool, s *specgen.SpecGenerator) bool { 139 if s.NetNS.IsHost() && (isRootless || isNewUserns) { 140 return false 141 } 142 if isNewUserns { 143 switch s.NetNS.NSMode { 144 case specgen.Slirp, specgen.Private, specgen.NoNetwork, specgen.Bridge: 145 return true 146 default: 147 return false 148 } 149 } 150 return true 151 } 152 153 func getCgroupPermissons(unmask []string) string { 154 ro := "ro" 155 rw := "rw" 156 cgroup := "/sys/fs/cgroup" 157 158 cgroupv2, _ := cgroups.IsCgroup2UnifiedMode() 159 if !cgroupv2 { 160 return ro 161 } 162 163 if unmask != nil && unmask[0] == "ALL" { 164 return rw 165 } 166 167 for _, p := range unmask { 168 if path.Clean(p) == cgroup { 169 return rw 170 } 171 } 172 return ro 173 } 174 175 // SpecGenToOCI returns the base configuration for the container. 176 func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runtime, rtc *config.Config, newImage *libimage.Image, mounts []spec.Mount, pod *libpod.Pod, finalCmd []string, compatibleOptions *libpod.InfraInherit) (*spec.Spec, error) { 177 cgroupPerm := getCgroupPermissons(s.Unmask) 178 179 g, err := generate.New("linux") 180 if err != nil { 181 return nil, err 182 } 183 // Remove the default /dev/shm mount to ensure we overwrite it 184 g.RemoveMount("/dev/shm") 185 g.HostSpecific = true 186 addCgroup := true 187 188 isRootless := rootless.IsRootless() 189 isNewUserns := s.UserNS.IsContainer() || s.UserNS.IsPath() || s.UserNS.IsPrivate() 190 191 canMountSys := canMountSys(isRootless, isNewUserns, s) 192 193 if s.Privileged && canMountSys { 194 cgroupPerm = "rw" 195 g.RemoveMount("/sys") 196 sysMnt := spec.Mount{ 197 Destination: "/sys", 198 Type: "sysfs", 199 Source: "sysfs", 200 Options: []string{"rprivate", "nosuid", "noexec", "nodev", "rw"}, 201 } 202 g.AddMount(sysMnt) 203 } 204 if !canMountSys { 205 addCgroup = false 206 g.RemoveMount("/sys") 207 r := "ro" 208 if s.Privileged { 209 r = "rw" 210 } 211 sysMnt := spec.Mount{ 212 Destination: "/sys", 213 Type: "bind", // should we use a constant for this, like createconfig? 214 Source: "/sys", 215 Options: []string{"rprivate", "nosuid", "noexec", "nodev", r, "rbind"}, 216 } 217 g.AddMount(sysMnt) 218 if !s.Privileged && isRootless { 219 g.AddLinuxMaskedPaths("/sys/kernel") 220 } 221 } 222 gid5Available := true 223 if isRootless { 224 nGids, err := rootless.GetAvailableGids() 225 if err != nil { 226 return nil, err 227 } 228 gid5Available = nGids >= 5 229 } 230 // When using a different user namespace, check that the GID 5 is mapped inside 231 // the container. 232 if gid5Available && (s.IDMappings != nil && len(s.IDMappings.GIDMap) > 0) { 233 mappingFound := false 234 for _, r := range s.IDMappings.GIDMap { 235 if r.ContainerID <= 5 && 5 < r.ContainerID+r.Size { 236 mappingFound = true 237 break 238 } 239 } 240 if !mappingFound { 241 gid5Available = false 242 } 243 } 244 if !gid5Available { 245 // If we have no GID mappings, the gid=5 default option would fail, so drop it. 246 g.RemoveMount("/dev/pts") 247 devPts := spec.Mount{ 248 Destination: "/dev/pts", 249 Type: "devpts", 250 Source: "devpts", 251 Options: []string{"rprivate", "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"}, 252 } 253 g.AddMount(devPts) 254 } 255 256 inUserNS := isRootless || isNewUserns 257 258 if inUserNS && s.IpcNS.IsHost() { 259 g.RemoveMount("/dev/mqueue") 260 devMqueue := spec.Mount{ 261 Destination: "/dev/mqueue", 262 Type: "bind", // constant ? 263 Source: "/dev/mqueue", 264 Options: []string{"bind", "nosuid", "noexec", "nodev"}, 265 } 266 g.AddMount(devMqueue) 267 } 268 if inUserNS && s.PidNS.IsHost() { 269 g.RemoveMount("/proc") 270 procMount := spec.Mount{ 271 Destination: "/proc", 272 Type: define.TypeBind, 273 Source: "/proc", 274 Options: []string{"rbind", "nosuid", "noexec", "nodev"}, 275 } 276 g.AddMount(procMount) 277 } 278 279 if addCgroup { 280 cgroupMnt := spec.Mount{ 281 Destination: "/sys/fs/cgroup", 282 Type: "cgroup", 283 Source: "cgroup", 284 Options: []string{"rprivate", "nosuid", "noexec", "nodev", "relatime", cgroupPerm}, 285 } 286 g.AddMount(cgroupMnt) 287 } 288 289 g.Config.Linux.Personality = s.Personality 290 291 g.SetProcessCwd(s.WorkDir) 292 293 g.SetProcessArgs(finalCmd) 294 295 g.SetProcessTerminal(s.Terminal) 296 297 for key, val := range s.Annotations { 298 g.AddAnnotation(key, val) 299 } 300 301 switch { 302 case compatibleOptions.InfraResources == nil && s.ResourceLimits != nil: 303 out, err := json.Marshal(s.ResourceLimits) 304 if err != nil { 305 return nil, err 306 } 307 err = json.Unmarshal(out, g.Config.Linux.Resources) 308 if err != nil { 309 return nil, err 310 } 311 case s.ResourceLimits != nil: // if we have predefined resource limits we need to make sure we keep the infra and container limits 312 originalResources, err := json.Marshal(s.ResourceLimits) 313 if err != nil { 314 return nil, err 315 } 316 infraResources, err := json.Marshal(compatibleOptions.InfraResources) 317 if err != nil { 318 return nil, err 319 } 320 err = json.Unmarshal(infraResources, s.ResourceLimits) // put infra's resource limits in the container 321 if err != nil { 322 return nil, err 323 } 324 err = json.Unmarshal(originalResources, s.ResourceLimits) // make sure we did not override anything 325 if err != nil { 326 return nil, err 327 } 328 g.Config.Linux.Resources = s.ResourceLimits 329 default: 330 g.Config.Linux.Resources = compatibleOptions.InfraResources 331 } 332 // Devices 333 334 // set the default rule at the beginning of device configuration 335 if !inUserNS && !s.Privileged { 336 g.AddLinuxResourcesDevice(false, "", nil, nil, "rwm") 337 } 338 339 var userDevices []spec.LinuxDevice 340 341 if !s.Privileged { 342 // add default devices from containers.conf 343 for _, device := range rtc.Containers.Devices { 344 if err = DevicesFromPath(&g, device); err != nil { 345 return nil, err 346 } 347 } 348 if len(compatibleOptions.HostDeviceList) > 0 && len(s.Devices) == 0 { 349 userDevices = compatibleOptions.HostDeviceList 350 } else { 351 userDevices = s.Devices 352 } 353 // add default devices specified by caller 354 for _, device := range userDevices { 355 if err = DevicesFromPath(&g, device.Path); err != nil { 356 return nil, err 357 } 358 } 359 } 360 s.HostDeviceList = userDevices 361 362 // set the devices cgroup when not running in a user namespace 363 if !inUserNS && !s.Privileged { 364 for _, dev := range s.DeviceCgroupRule { 365 g.AddLinuxResourcesDevice(true, dev.Type, dev.Major, dev.Minor, dev.Access) 366 } 367 } 368 369 for k, v := range s.WeightDevice { 370 statT := unix.Stat_t{} 371 if err := unix.Stat(k, &statT); err != nil { 372 return nil, errors.Wrapf(err, "failed to inspect '%s' in --blkio-weight-device", k) 373 } 374 g.AddLinuxResourcesBlockIOWeightDevice((int64(unix.Major(uint64(statT.Rdev)))), (int64(unix.Minor(uint64(statT.Rdev)))), *v.Weight) // nolint: unconvert 375 } 376 377 BlockAccessToKernelFilesystems(s.Privileged, s.PidNS.IsHost(), s.Mask, s.Unmask, &g) 378 379 g.ClearProcessEnv() 380 for name, val := range s.Env { 381 g.AddProcessEnv(name, val) 382 } 383 384 addRlimits(s, &g) 385 386 // NAMESPACES 387 if err := specConfigureNamespaces(s, &g, rt, pod); err != nil { 388 return nil, err 389 } 390 configSpec := g.Config 391 392 if err := securityConfigureGenerator(s, &g, newImage, rtc); err != nil { 393 return nil, err 394 } 395 396 // BIND MOUNTS 397 configSpec.Mounts = SupersedeUserMounts(mounts, configSpec.Mounts) 398 // Process mounts to ensure correct options 399 if err := InitFSMounts(configSpec.Mounts); err != nil { 400 return nil, err 401 } 402 403 // Add annotations 404 if configSpec.Annotations == nil { 405 configSpec.Annotations = make(map[string]string) 406 } 407 408 if s.Remove { 409 configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseTrue 410 } else { 411 configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseFalse 412 } 413 414 if len(s.VolumesFrom) > 0 { 415 configSpec.Annotations[define.InspectAnnotationVolumesFrom] = strings.Join(s.VolumesFrom, ",") 416 } 417 418 if s.Privileged { 419 configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseTrue 420 } else { 421 configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseFalse 422 } 423 424 if s.Init { 425 configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseTrue 426 } else { 427 configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseFalse 428 } 429 430 if s.OOMScoreAdj != nil { 431 g.SetProcessOOMScoreAdj(*s.OOMScoreAdj) 432 } 433 setProcOpts(s, &g) 434 435 return configSpec, nil 436 }