github.com/hanks177/podman/v4@v4.1.3-0.20220613032544-16d90015bc83/pkg/specgen/generate/oci.go (about)

     1  package generate
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"path"
     7  	"strings"
     8  
     9  	"github.com/containers/common/libimage"
    10  	"github.com/containers/common/pkg/cgroups"
    11  	"github.com/containers/common/pkg/config"
    12  	"github.com/hanks177/podman/v4/libpod"
    13  	"github.com/hanks177/podman/v4/libpod/define"
    14  	"github.com/hanks177/podman/v4/pkg/rootless"
    15  	"github.com/hanks177/podman/v4/pkg/specgen"
    16  	spec "github.com/opencontainers/runtime-spec/specs-go"
    17  	"github.com/opencontainers/runtime-tools/generate"
    18  	"github.com/pkg/errors"
    19  	"github.com/sirupsen/logrus"
    20  	"golang.org/x/sys/unix"
    21  )
    22  
    23  func setProcOpts(s *specgen.SpecGenerator, g *generate.Generator) {
    24  	if s.ProcOpts == nil {
    25  		return
    26  	}
    27  	for i := range g.Config.Mounts {
    28  		if g.Config.Mounts[i].Destination == "/proc" {
    29  			g.Config.Mounts[i].Options = s.ProcOpts
    30  			return
    31  		}
    32  	}
    33  }
    34  
    35  func addRlimits(s *specgen.SpecGenerator, g *generate.Generator) {
    36  	var (
    37  		isRootless = rootless.IsRootless()
    38  		nofileSet  = false
    39  		nprocSet   = false
    40  	)
    41  
    42  	if s.Rlimits == nil {
    43  		g.Config.Process.Rlimits = nil
    44  		return
    45  	}
    46  
    47  	for _, u := range s.Rlimits {
    48  		name := "RLIMIT_" + strings.ToUpper(u.Type)
    49  		if name == "RLIMIT_NOFILE" {
    50  			nofileSet = true
    51  		} else if name == "RLIMIT_NPROC" {
    52  			nprocSet = true
    53  		}
    54  		g.AddProcessRlimits(name, u.Hard, u.Soft)
    55  	}
    56  
    57  	// If not explicitly overridden by the user, default number of open
    58  	// files and number of processes to the maximum they can be set to
    59  	// (without overriding a sysctl)
    60  	if !nofileSet {
    61  		max := define.RLimitDefaultValue
    62  		current := define.RLimitDefaultValue
    63  		if isRootless {
    64  			var rlimit unix.Rlimit
    65  			if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &rlimit); err != nil {
    66  				logrus.Warnf("Failed to return RLIMIT_NOFILE ulimit %q", err)
    67  			}
    68  			if rlimit.Cur < current {
    69  				current = rlimit.Cur
    70  			}
    71  			if rlimit.Max < max {
    72  				max = rlimit.Max
    73  			}
    74  		}
    75  		g.AddProcessRlimits("RLIMIT_NOFILE", max, current)
    76  	}
    77  	if !nprocSet {
    78  		max := define.RLimitDefaultValue
    79  		current := define.RLimitDefaultValue
    80  		if isRootless {
    81  			var rlimit unix.Rlimit
    82  			if err := unix.Getrlimit(unix.RLIMIT_NPROC, &rlimit); err != nil {
    83  				logrus.Warnf("Failed to return RLIMIT_NPROC ulimit %q", err)
    84  			}
    85  			if rlimit.Cur < current {
    86  				current = rlimit.Cur
    87  			}
    88  			if rlimit.Max < max {
    89  				max = rlimit.Max
    90  			}
    91  		}
    92  		g.AddProcessRlimits("RLIMIT_NPROC", max, current)
    93  	}
    94  }
    95  
    96  // Produce the final command for the container.
    97  func makeCommand(s *specgen.SpecGenerator, imageData *libimage.ImageData, rtc *config.Config) ([]string, error) {
    98  	finalCommand := []string{}
    99  
   100  	entrypoint := s.Entrypoint
   101  	if entrypoint == nil && imageData != nil {
   102  		entrypoint = imageData.Config.Entrypoint
   103  	}
   104  
   105  	// Don't append the entrypoint if it is [""]
   106  	if len(entrypoint) != 1 || entrypoint[0] != "" {
   107  		finalCommand = append(finalCommand, entrypoint...)
   108  	}
   109  
   110  	// Only use image command if the user did not manually set an
   111  	// entrypoint.
   112  	command := s.Command
   113  	if len(command) == 0 && imageData != nil && len(s.Entrypoint) == 0 {
   114  		command = imageData.Config.Cmd
   115  	}
   116  
   117  	finalCommand = append(finalCommand, command...)
   118  
   119  	if len(finalCommand) == 0 {
   120  		return nil, errors.Errorf("no command or entrypoint provided, and no CMD or ENTRYPOINT from image")
   121  	}
   122  
   123  	if s.Init {
   124  		initPath := s.InitPath
   125  		if initPath == "" && rtc != nil {
   126  			initPath = rtc.Engine.InitPath
   127  		}
   128  		if initPath == "" {
   129  			return nil, errors.Errorf("no path to init binary found but container requested an init")
   130  		}
   131  		finalCommand = append([]string{define.ContainerInitPath, "--"}, finalCommand...)
   132  	}
   133  
   134  	return finalCommand, nil
   135  }
   136  
   137  // canMountSys is a best-effort heuristic to detect whether mounting a new sysfs is permitted in the container
   138  func canMountSys(isRootless, isNewUserns bool, s *specgen.SpecGenerator) bool {
   139  	if s.NetNS.IsHost() && (isRootless || isNewUserns) {
   140  		return false
   141  	}
   142  	if isNewUserns {
   143  		switch s.NetNS.NSMode {
   144  		case specgen.Slirp, specgen.Private, specgen.NoNetwork, specgen.Bridge:
   145  			return true
   146  		default:
   147  			return false
   148  		}
   149  	}
   150  	return true
   151  }
   152  
   153  func getCgroupPermissons(unmask []string) string {
   154  	ro := "ro"
   155  	rw := "rw"
   156  	cgroup := "/sys/fs/cgroup"
   157  
   158  	cgroupv2, _ := cgroups.IsCgroup2UnifiedMode()
   159  	if !cgroupv2 {
   160  		return ro
   161  	}
   162  
   163  	if unmask != nil && unmask[0] == "ALL" {
   164  		return rw
   165  	}
   166  
   167  	for _, p := range unmask {
   168  		if path.Clean(p) == cgroup {
   169  			return rw
   170  		}
   171  	}
   172  	return ro
   173  }
   174  
   175  // SpecGenToOCI returns the base configuration for the container.
   176  func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runtime, rtc *config.Config, newImage *libimage.Image, mounts []spec.Mount, pod *libpod.Pod, finalCmd []string, compatibleOptions *libpod.InfraInherit) (*spec.Spec, error) {
   177  	cgroupPerm := getCgroupPermissons(s.Unmask)
   178  
   179  	g, err := generate.New("linux")
   180  	if err != nil {
   181  		return nil, err
   182  	}
   183  	// Remove the default /dev/shm mount to ensure we overwrite it
   184  	g.RemoveMount("/dev/shm")
   185  	g.HostSpecific = true
   186  	addCgroup := true
   187  
   188  	isRootless := rootless.IsRootless()
   189  	isNewUserns := s.UserNS.IsContainer() || s.UserNS.IsPath() || s.UserNS.IsPrivate()
   190  
   191  	canMountSys := canMountSys(isRootless, isNewUserns, s)
   192  
   193  	if s.Privileged && canMountSys {
   194  		cgroupPerm = "rw"
   195  		g.RemoveMount("/sys")
   196  		sysMnt := spec.Mount{
   197  			Destination: "/sys",
   198  			Type:        "sysfs",
   199  			Source:      "sysfs",
   200  			Options:     []string{"rprivate", "nosuid", "noexec", "nodev", "rw"},
   201  		}
   202  		g.AddMount(sysMnt)
   203  	}
   204  	if !canMountSys {
   205  		addCgroup = false
   206  		g.RemoveMount("/sys")
   207  		r := "ro"
   208  		if s.Privileged {
   209  			r = "rw"
   210  		}
   211  		sysMnt := spec.Mount{
   212  			Destination: "/sys",
   213  			Type:        "bind", // should we use a constant for this, like createconfig?
   214  			Source:      "/sys",
   215  			Options:     []string{"rprivate", "nosuid", "noexec", "nodev", r, "rbind"},
   216  		}
   217  		g.AddMount(sysMnt)
   218  		if !s.Privileged && isRootless {
   219  			g.AddLinuxMaskedPaths("/sys/kernel")
   220  		}
   221  	}
   222  	gid5Available := true
   223  	if isRootless {
   224  		nGids, err := rootless.GetAvailableGids()
   225  		if err != nil {
   226  			return nil, err
   227  		}
   228  		gid5Available = nGids >= 5
   229  	}
   230  	// When using a different user namespace, check that the GID 5 is mapped inside
   231  	// the container.
   232  	if gid5Available && (s.IDMappings != nil && len(s.IDMappings.GIDMap) > 0) {
   233  		mappingFound := false
   234  		for _, r := range s.IDMappings.GIDMap {
   235  			if r.ContainerID <= 5 && 5 < r.ContainerID+r.Size {
   236  				mappingFound = true
   237  				break
   238  			}
   239  		}
   240  		if !mappingFound {
   241  			gid5Available = false
   242  		}
   243  	}
   244  	if !gid5Available {
   245  		// If we have no GID mappings, the gid=5 default option would fail, so drop it.
   246  		g.RemoveMount("/dev/pts")
   247  		devPts := spec.Mount{
   248  			Destination: "/dev/pts",
   249  			Type:        "devpts",
   250  			Source:      "devpts",
   251  			Options:     []string{"rprivate", "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"},
   252  		}
   253  		g.AddMount(devPts)
   254  	}
   255  
   256  	inUserNS := isRootless || isNewUserns
   257  
   258  	if inUserNS && s.IpcNS.IsHost() {
   259  		g.RemoveMount("/dev/mqueue")
   260  		devMqueue := spec.Mount{
   261  			Destination: "/dev/mqueue",
   262  			Type:        "bind", // constant ?
   263  			Source:      "/dev/mqueue",
   264  			Options:     []string{"bind", "nosuid", "noexec", "nodev"},
   265  		}
   266  		g.AddMount(devMqueue)
   267  	}
   268  	if inUserNS && s.PidNS.IsHost() {
   269  		g.RemoveMount("/proc")
   270  		procMount := spec.Mount{
   271  			Destination: "/proc",
   272  			Type:        define.TypeBind,
   273  			Source:      "/proc",
   274  			Options:     []string{"rbind", "nosuid", "noexec", "nodev"},
   275  		}
   276  		g.AddMount(procMount)
   277  	}
   278  
   279  	if addCgroup {
   280  		cgroupMnt := spec.Mount{
   281  			Destination: "/sys/fs/cgroup",
   282  			Type:        "cgroup",
   283  			Source:      "cgroup",
   284  			Options:     []string{"rprivate", "nosuid", "noexec", "nodev", "relatime", cgroupPerm},
   285  		}
   286  		g.AddMount(cgroupMnt)
   287  	}
   288  
   289  	g.Config.Linux.Personality = s.Personality
   290  
   291  	g.SetProcessCwd(s.WorkDir)
   292  
   293  	g.SetProcessArgs(finalCmd)
   294  
   295  	g.SetProcessTerminal(s.Terminal)
   296  
   297  	for key, val := range s.Annotations {
   298  		g.AddAnnotation(key, val)
   299  	}
   300  
   301  	switch {
   302  	case compatibleOptions.InfraResources == nil && s.ResourceLimits != nil:
   303  		out, err := json.Marshal(s.ResourceLimits)
   304  		if err != nil {
   305  			return nil, err
   306  		}
   307  		err = json.Unmarshal(out, g.Config.Linux.Resources)
   308  		if err != nil {
   309  			return nil, err
   310  		}
   311  	case s.ResourceLimits != nil: // if we have predefined resource limits we need to make sure we keep the infra and container limits
   312  		originalResources, err := json.Marshal(s.ResourceLimits)
   313  		if err != nil {
   314  			return nil, err
   315  		}
   316  		infraResources, err := json.Marshal(compatibleOptions.InfraResources)
   317  		if err != nil {
   318  			return nil, err
   319  		}
   320  		err = json.Unmarshal(infraResources, s.ResourceLimits) // put infra's resource limits in the container
   321  		if err != nil {
   322  			return nil, err
   323  		}
   324  		err = json.Unmarshal(originalResources, s.ResourceLimits) // make sure we did not override anything
   325  		if err != nil {
   326  			return nil, err
   327  		}
   328  		g.Config.Linux.Resources = s.ResourceLimits
   329  	default:
   330  		g.Config.Linux.Resources = compatibleOptions.InfraResources
   331  	}
   332  	// Devices
   333  
   334  	// set the default rule at the beginning of device configuration
   335  	if !inUserNS && !s.Privileged {
   336  		g.AddLinuxResourcesDevice(false, "", nil, nil, "rwm")
   337  	}
   338  
   339  	var userDevices []spec.LinuxDevice
   340  
   341  	if !s.Privileged {
   342  		// add default devices from containers.conf
   343  		for _, device := range rtc.Containers.Devices {
   344  			if err = DevicesFromPath(&g, device); err != nil {
   345  				return nil, err
   346  			}
   347  		}
   348  		if len(compatibleOptions.HostDeviceList) > 0 && len(s.Devices) == 0 {
   349  			userDevices = compatibleOptions.HostDeviceList
   350  		} else {
   351  			userDevices = s.Devices
   352  		}
   353  		// add default devices specified by caller
   354  		for _, device := range userDevices {
   355  			if err = DevicesFromPath(&g, device.Path); err != nil {
   356  				return nil, err
   357  			}
   358  		}
   359  	}
   360  	s.HostDeviceList = userDevices
   361  
   362  	// set the devices cgroup when not running in a user namespace
   363  	if !inUserNS && !s.Privileged {
   364  		for _, dev := range s.DeviceCgroupRule {
   365  			g.AddLinuxResourcesDevice(true, dev.Type, dev.Major, dev.Minor, dev.Access)
   366  		}
   367  	}
   368  
   369  	for k, v := range s.WeightDevice {
   370  		statT := unix.Stat_t{}
   371  		if err := unix.Stat(k, &statT); err != nil {
   372  			return nil, errors.Wrapf(err, "failed to inspect '%s' in --blkio-weight-device", k)
   373  		}
   374  		g.AddLinuxResourcesBlockIOWeightDevice((int64(unix.Major(uint64(statT.Rdev)))), (int64(unix.Minor(uint64(statT.Rdev)))), *v.Weight) // nolint: unconvert
   375  	}
   376  
   377  	BlockAccessToKernelFilesystems(s.Privileged, s.PidNS.IsHost(), s.Mask, s.Unmask, &g)
   378  
   379  	g.ClearProcessEnv()
   380  	for name, val := range s.Env {
   381  		g.AddProcessEnv(name, val)
   382  	}
   383  
   384  	addRlimits(s, &g)
   385  
   386  	// NAMESPACES
   387  	if err := specConfigureNamespaces(s, &g, rt, pod); err != nil {
   388  		return nil, err
   389  	}
   390  	configSpec := g.Config
   391  
   392  	if err := securityConfigureGenerator(s, &g, newImage, rtc); err != nil {
   393  		return nil, err
   394  	}
   395  
   396  	// BIND MOUNTS
   397  	configSpec.Mounts = SupersedeUserMounts(mounts, configSpec.Mounts)
   398  	// Process mounts to ensure correct options
   399  	if err := InitFSMounts(configSpec.Mounts); err != nil {
   400  		return nil, err
   401  	}
   402  
   403  	// Add annotations
   404  	if configSpec.Annotations == nil {
   405  		configSpec.Annotations = make(map[string]string)
   406  	}
   407  
   408  	if s.Remove {
   409  		configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseTrue
   410  	} else {
   411  		configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseFalse
   412  	}
   413  
   414  	if len(s.VolumesFrom) > 0 {
   415  		configSpec.Annotations[define.InspectAnnotationVolumesFrom] = strings.Join(s.VolumesFrom, ",")
   416  	}
   417  
   418  	if s.Privileged {
   419  		configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseTrue
   420  	} else {
   421  		configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseFalse
   422  	}
   423  
   424  	if s.Init {
   425  		configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseTrue
   426  	} else {
   427  		configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseFalse
   428  	}
   429  
   430  	if s.OOMScoreAdj != nil {
   431  		g.SetProcessOOMScoreAdj(*s.OOMScoreAdj)
   432  	}
   433  	setProcOpts(s, &g)
   434  
   435  	return configSpec, nil
   436  }