github.com/containers/podman/v2@v2.2.2-0.20210501105131-c1e07d070c4c/pkg/specgen/generate/oci.go (about)

     1  package generate
     2  
     3  import (
     4  	"context"
     5  	"strings"
     6  
     7  	"github.com/containers/common/pkg/config"
     8  	"github.com/containers/podman/v2/libpod"
     9  	"github.com/containers/podman/v2/libpod/define"
    10  	"github.com/containers/podman/v2/libpod/image"
    11  	"github.com/containers/podman/v2/pkg/rootless"
    12  	"github.com/containers/podman/v2/pkg/specgen"
    13  	spec "github.com/opencontainers/runtime-spec/specs-go"
    14  	"github.com/opencontainers/runtime-tools/generate"
    15  	"github.com/pkg/errors"
    16  	"github.com/sirupsen/logrus"
    17  	"golang.org/x/sys/unix"
    18  )
    19  
    20  func setProcOpts(s *specgen.SpecGenerator, g *generate.Generator) {
    21  	if s.ProcOpts == nil {
    22  		return
    23  	}
    24  	for i := range g.Config.Mounts {
    25  		if g.Config.Mounts[i].Destination == "/proc" {
    26  			g.Config.Mounts[i].Options = s.ProcOpts
    27  			return
    28  		}
    29  	}
    30  }
    31  
    32  func addRlimits(s *specgen.SpecGenerator, g *generate.Generator) error {
    33  	var (
    34  		isRootless = rootless.IsRootless()
    35  		nofileSet  = false
    36  		nprocSet   = false
    37  	)
    38  
    39  	if s.Rlimits == nil {
    40  		g.Config.Process.Rlimits = nil
    41  		return nil
    42  	}
    43  
    44  	for _, u := range s.Rlimits {
    45  		name := "RLIMIT_" + strings.ToUpper(u.Type)
    46  		if name == "RLIMIT_NOFILE" {
    47  			nofileSet = true
    48  		} else if name == "RLIMIT_NPROC" {
    49  			nprocSet = true
    50  		}
    51  		g.AddProcessRlimits(name, u.Hard, u.Soft)
    52  	}
    53  
    54  	// If not explicitly overridden by the user, default number of open
    55  	// files and number of processes to the maximum they can be set to
    56  	// (without overriding a sysctl)
    57  	if !nofileSet {
    58  		max := define.RLimitDefaultValue
    59  		current := define.RLimitDefaultValue
    60  		if isRootless {
    61  			var rlimit unix.Rlimit
    62  			if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &rlimit); err != nil {
    63  				logrus.Warnf("failed to return RLIMIT_NOFILE ulimit %q", err)
    64  			}
    65  			if rlimit.Cur < current {
    66  				current = rlimit.Cur
    67  			}
    68  			if rlimit.Max < max {
    69  				max = rlimit.Max
    70  			}
    71  		}
    72  		g.AddProcessRlimits("RLIMIT_NOFILE", max, current)
    73  	}
    74  	if !nprocSet {
    75  		max := define.RLimitDefaultValue
    76  		current := define.RLimitDefaultValue
    77  		if isRootless {
    78  			var rlimit unix.Rlimit
    79  			if err := unix.Getrlimit(unix.RLIMIT_NPROC, &rlimit); err != nil {
    80  				logrus.Warnf("failed to return RLIMIT_NPROC ulimit %q", err)
    81  			}
    82  			if rlimit.Cur < current {
    83  				current = rlimit.Cur
    84  			}
    85  			if rlimit.Max < max {
    86  				max = rlimit.Max
    87  			}
    88  		}
    89  		g.AddProcessRlimits("RLIMIT_NPROC", max, current)
    90  	}
    91  
    92  	return nil
    93  }
    94  
    95  // Produce the final command for the container.
    96  func makeCommand(ctx context.Context, s *specgen.SpecGenerator, img *image.Image, rtc *config.Config) ([]string, error) {
    97  	finalCommand := []string{}
    98  
    99  	entrypoint := s.Entrypoint
   100  	if entrypoint == nil && img != nil {
   101  		newEntry, err := img.Entrypoint(ctx)
   102  		if err != nil {
   103  			return nil, err
   104  		}
   105  		entrypoint = newEntry
   106  	}
   107  
   108  	finalCommand = append(finalCommand, entrypoint...)
   109  
   110  	// Only use image command if the user did not manually set an
   111  	// entrypoint.
   112  	command := s.Command
   113  	if (command == nil || len(command) == 0) && img != nil && (s.Entrypoint == nil || len(s.Entrypoint) == 0) {
   114  		newCmd, err := img.Cmd(ctx)
   115  		if err != nil {
   116  			return nil, err
   117  		}
   118  		command = newCmd
   119  	}
   120  
   121  	finalCommand = append(finalCommand, command...)
   122  
   123  	if len(finalCommand) == 0 {
   124  		return nil, errors.Errorf("no command or entrypoint provided, and no CMD or ENTRYPOINT from image")
   125  	}
   126  
   127  	if s.Init {
   128  		initPath := s.InitPath
   129  		if initPath == "" && rtc != nil {
   130  			initPath = rtc.Engine.InitPath
   131  		}
   132  		if initPath == "" {
   133  			return nil, errors.Errorf("no path to init binary found but container requested an init")
   134  		}
   135  		finalCommand = append([]string{"/dev/init", "--"}, finalCommand...)
   136  	}
   137  
   138  	return finalCommand, nil
   139  }
   140  
   141  func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runtime, rtc *config.Config, newImage *image.Image, mounts []spec.Mount, pod *libpod.Pod, finalCmd []string) (*spec.Spec, error) {
   142  	var (
   143  		inUserNS bool
   144  	)
   145  	cgroupPerm := "ro"
   146  	g, err := generate.New("linux")
   147  	if err != nil {
   148  		return nil, err
   149  	}
   150  	// Remove the default /dev/shm mount to ensure we overwrite it
   151  	g.RemoveMount("/dev/shm")
   152  	g.HostSpecific = true
   153  	addCgroup := true
   154  	canMountSys := true
   155  
   156  	isRootless := rootless.IsRootless()
   157  	if isRootless {
   158  		inUserNS = true
   159  	}
   160  	if !s.UserNS.IsHost() {
   161  		if s.UserNS.IsContainer() || s.UserNS.IsPath() {
   162  			inUserNS = true
   163  		}
   164  		if s.UserNS.IsPrivate() {
   165  			inUserNS = true
   166  		}
   167  	}
   168  	if inUserNS && s.NetNS.NSMode != specgen.NoNetwork {
   169  		canMountSys = false
   170  	}
   171  
   172  	if s.Privileged && canMountSys {
   173  		cgroupPerm = "rw"
   174  		g.RemoveMount("/sys")
   175  		sysMnt := spec.Mount{
   176  			Destination: "/sys",
   177  			Type:        "sysfs",
   178  			Source:      "sysfs",
   179  			Options:     []string{"rprivate", "nosuid", "noexec", "nodev", "rw"},
   180  		}
   181  		g.AddMount(sysMnt)
   182  	} else if !canMountSys {
   183  		addCgroup = false
   184  		g.RemoveMount("/sys")
   185  		r := "ro"
   186  		if s.Privileged {
   187  			r = "rw"
   188  		}
   189  		sysMnt := spec.Mount{
   190  			Destination: "/sys",
   191  			Type:        "bind", // should we use a constant for this, like createconfig?
   192  			Source:      "/sys",
   193  			Options:     []string{"rprivate", "nosuid", "noexec", "nodev", r, "rbind"},
   194  		}
   195  		g.AddMount(sysMnt)
   196  		if !s.Privileged && isRootless {
   197  			g.AddLinuxMaskedPaths("/sys/kernel")
   198  		}
   199  	}
   200  	gid5Available := true
   201  	if isRootless {
   202  		nGids, err := rootless.GetAvailableGids()
   203  		if err != nil {
   204  			return nil, err
   205  		}
   206  		gid5Available = nGids >= 5
   207  	}
   208  	// When using a different user namespace, check that the GID 5 is mapped inside
   209  	// the container.
   210  	if gid5Available && (s.IDMappings != nil && len(s.IDMappings.GIDMap) > 0) {
   211  		mappingFound := false
   212  		for _, r := range s.IDMappings.GIDMap {
   213  			if r.ContainerID <= 5 && 5 < r.ContainerID+r.Size {
   214  				mappingFound = true
   215  				break
   216  			}
   217  		}
   218  		if !mappingFound {
   219  			gid5Available = false
   220  		}
   221  
   222  	}
   223  	if !gid5Available {
   224  		// If we have no GID mappings, the gid=5 default option would fail, so drop it.
   225  		g.RemoveMount("/dev/pts")
   226  		devPts := spec.Mount{
   227  			Destination: "/dev/pts",
   228  			Type:        "devpts",
   229  			Source:      "devpts",
   230  			Options:     []string{"rprivate", "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"},
   231  		}
   232  		g.AddMount(devPts)
   233  	}
   234  
   235  	if inUserNS && s.IpcNS.IsHost() {
   236  		g.RemoveMount("/dev/mqueue")
   237  		devMqueue := spec.Mount{
   238  			Destination: "/dev/mqueue",
   239  			Type:        "bind", // constant ?
   240  			Source:      "/dev/mqueue",
   241  			Options:     []string{"bind", "nosuid", "noexec", "nodev"},
   242  		}
   243  		g.AddMount(devMqueue)
   244  	}
   245  	if inUserNS && s.PidNS.IsHost() {
   246  		g.RemoveMount("/proc")
   247  		procMount := spec.Mount{
   248  			Destination: "/proc",
   249  			Type:        TypeBind,
   250  			Source:      "/proc",
   251  			Options:     []string{"rbind", "nosuid", "noexec", "nodev"},
   252  		}
   253  		g.AddMount(procMount)
   254  	}
   255  
   256  	if addCgroup {
   257  		cgroupMnt := spec.Mount{
   258  			Destination: "/sys/fs/cgroup",
   259  			Type:        "cgroup",
   260  			Source:      "cgroup",
   261  			Options:     []string{"rprivate", "nosuid", "noexec", "nodev", "relatime", cgroupPerm},
   262  		}
   263  		g.AddMount(cgroupMnt)
   264  	}
   265  	g.SetProcessCwd(s.WorkDir)
   266  
   267  	g.SetProcessArgs(finalCmd)
   268  
   269  	g.SetProcessTerminal(s.Terminal)
   270  
   271  	for key, val := range s.Annotations {
   272  		g.AddAnnotation(key, val)
   273  	}
   274  	g.AddProcessEnv("container", "podman")
   275  
   276  	g.Config.Linux.Resources = s.ResourceLimits
   277  
   278  	// Devices
   279  	if s.Privileged {
   280  		// If privileged, we need to add all the host devices to the
   281  		// spec.  We do not add the user provided ones because we are
   282  		// already adding them all.
   283  		if err := addPrivilegedDevices(&g); err != nil {
   284  			return nil, err
   285  		}
   286  	} else {
   287  		// add default devices from containers.conf
   288  		for _, device := range rtc.Containers.Devices {
   289  			if err := DevicesFromPath(&g, device); err != nil {
   290  				return nil, err
   291  			}
   292  		}
   293  		// add default devices specified by caller
   294  		for _, device := range s.Devices {
   295  			if err := DevicesFromPath(&g, device.Path); err != nil {
   296  				return nil, err
   297  			}
   298  		}
   299  	}
   300  
   301  	BlockAccessToKernelFilesystems(s.Privileged, s.PidNS.IsHost(), &g)
   302  
   303  	for name, val := range s.Env {
   304  		g.AddProcessEnv(name, val)
   305  	}
   306  
   307  	if err := addRlimits(s, &g); err != nil {
   308  		return nil, err
   309  	}
   310  
   311  	// NAMESPACES
   312  	if err := specConfigureNamespaces(s, &g, rt, pod); err != nil {
   313  		return nil, err
   314  	}
   315  	configSpec := g.Config
   316  
   317  	if err := securityConfigureGenerator(s, &g, newImage, rtc); err != nil {
   318  		return nil, err
   319  	}
   320  
   321  	// BIND MOUNTS
   322  	configSpec.Mounts = SupercedeUserMounts(mounts, configSpec.Mounts)
   323  	// Process mounts to ensure correct options
   324  	if err := InitFSMounts(configSpec.Mounts); err != nil {
   325  		return nil, err
   326  	}
   327  
   328  	// Add annotations
   329  	if configSpec.Annotations == nil {
   330  		configSpec.Annotations = make(map[string]string)
   331  	}
   332  
   333  	if s.Remove {
   334  		configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseTrue
   335  	} else {
   336  		configSpec.Annotations[define.InspectAnnotationAutoremove] = define.InspectResponseFalse
   337  	}
   338  
   339  	if len(s.VolumesFrom) > 0 {
   340  		configSpec.Annotations[define.InspectAnnotationVolumesFrom] = strings.Join(s.VolumesFrom, ",")
   341  	}
   342  
   343  	if s.Privileged {
   344  		configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseTrue
   345  	} else {
   346  		configSpec.Annotations[define.InspectAnnotationPrivileged] = define.InspectResponseFalse
   347  	}
   348  
   349  	if s.Init {
   350  		configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseTrue
   351  	} else {
   352  		configSpec.Annotations[define.InspectAnnotationInit] = define.InspectResponseFalse
   353  	}
   354  
   355  	if s.OOMScoreAdj != nil {
   356  		g.SetProcessOOMScoreAdj(*s.OOMScoreAdj)
   357  	}
   358  	setProcOpts(s, &g)
   359  
   360  	return configSpec, nil
   361  }