github.com/rkt/rkt@v1.30.1-0.20200224141603-171c416fac02/stage1/init/common/pod.go (about)

     1  // Copyright 2014 The rkt Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //+build linux
    16  
    17  package common
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  	"io"
    23  	"io/ioutil"
    24  	"os"
    25  	"path"
    26  	"path/filepath"
    27  	"regexp"
    28  	"strconv"
    29  	"strings"
    30  	"syscall"
    31  
    32  	"github.com/rkt/rkt/pkg/acl"
    33  	stage1commontypes "github.com/rkt/rkt/stage1/common/types"
    34  
    35  	"github.com/appc/spec/schema"
    36  	"github.com/appc/spec/schema/types"
    37  	"github.com/coreos/go-systemd/unit"
    38  	"github.com/hashicorp/errwrap"
    39  
    40  	"github.com/rkt/rkt/common"
    41  	"github.com/rkt/rkt/pkg/fileutil"
    42  	"github.com/rkt/rkt/pkg/user"
    43  )
    44  
    45  const (
    46  	// FlavorFile names the file storing the pod's flavor
    47  	FlavorFile = "flavor"
    48  )
    49  
    50  // execEscape uses Golang's string quoting for ", \, \n, and regex for special cases
    51  func execEscape(i int, str string) string {
    52  	escapeMap := map[string]string{
    53  		`'`: `\`,
    54  	}
    55  
    56  	if i > 0 { // These are escaped only after the first argument
    57  		escapeMap[`$`] = `$`
    58  		escapeMap[`%`] = `%`
    59  	}
    60  
    61  	escArg := fmt.Sprintf("%q", str)
    62  	for k := range escapeMap {
    63  		reStr := `([` + regexp.QuoteMeta(k) + `])`
    64  		re := regexp.MustCompile(reStr)
    65  		escArg = re.ReplaceAllStringFunc(escArg, func(s string) string {
    66  			escaped := escapeMap[s] + s
    67  			return escaped
    68  		})
    69  	}
    70  	return escArg
    71  }
    72  
    73  // quoteExec returns an array of quoted strings appropriate for systemd execStart usage
    74  func quoteExec(exec []string) string {
    75  	if len(exec) == 0 {
    76  		// existing callers always include at least the binary so this shouldn't occur.
    77  		panic("empty exec")
    78  	}
    79  
    80  	var qexec []string
    81  	for i, arg := range exec {
    82  		escArg := execEscape(i, arg)
    83  		qexec = append(qexec, escArg)
    84  	}
    85  	return strings.Join(qexec, " ")
    86  }
    87  
    88  func writeAppReaper(p *stage1commontypes.Pod, appName string, appRootDirectory string, binPath string) error {
    89  	opts := []*unit.UnitOption{
    90  		unit.NewUnitOption("Unit", "Description", fmt.Sprintf("%s Reaper", appName)),
    91  		unit.NewUnitOption("Unit", "DefaultDependencies", "false"),
    92  		unit.NewUnitOption("Unit", "StopWhenUnneeded", "yes"),
    93  		unit.NewUnitOption("Unit", "Wants", "shutdown.service"),
    94  		unit.NewUnitOption("Unit", "After", "shutdown.service"),
    95  		unit.NewUnitOption("Unit", "Conflicts", "exit.target"),
    96  		unit.NewUnitOption("Unit", "Conflicts", "halt.target"),
    97  		unit.NewUnitOption("Unit", "Conflicts", "poweroff.target"),
    98  		unit.NewUnitOption("Service", "RemainAfterExit", "yes"),
    99  		unit.NewUnitOption("Service", "ExecStop", fmt.Sprintf("/reaper.sh \"%s\" \"%s\" \"%s\"", appName, appRootDirectory, binPath)),
   100  	}
   101  
   102  	unitsPath := filepath.Join(common.Stage1RootfsPath(p.Root), UnitsDir)
   103  	file, err := os.OpenFile(filepath.Join(unitsPath, fmt.Sprintf("reaper-%s.service", appName)), os.O_WRONLY|os.O_CREATE, 0644)
   104  	if err != nil {
   105  		return errwrap.Wrap(errors.New("failed to create service unit file"), err)
   106  	}
   107  	defer file.Close()
   108  
   109  	if _, err = io.Copy(file, unit.Serialize(opts)); err != nil {
   110  		return errwrap.Wrap(errors.New("failed to write service unit file"), err)
   111  	}
   112  
   113  	return nil
   114  }
   115  
   116  // SetJournalPermissions sets ACLs and permissions so the rkt group can access
   117  // the pod's logs
   118  func SetJournalPermissions(p *stage1commontypes.Pod) error {
   119  	s1 := common.Stage1ImagePath(p.Root)
   120  
   121  	rktgid, err := common.LookupGid(common.RktGroup)
   122  	if err != nil {
   123  		return fmt.Errorf("group %q not found", common.RktGroup)
   124  	}
   125  
   126  	journalPath := filepath.Join(s1, "rootfs", "var", "log", "journal")
   127  	if err := os.MkdirAll(journalPath, os.FileMode(0755)); err != nil {
   128  		return errwrap.Wrap(errors.New("error creating journal dir"), err)
   129  	}
   130  
   131  	a, err := acl.InitACL()
   132  	if err != nil {
   133  		return err
   134  	}
   135  	defer a.Free()
   136  
   137  	if err := a.ParseACL(fmt.Sprintf("g:%d:r-x,m:r-x", rktgid)); err != nil {
   138  		return errwrap.Wrap(errors.New("error parsing ACL string"), err)
   139  	}
   140  
   141  	if err := a.AddBaseEntries(journalPath); err != nil {
   142  		return errwrap.Wrap(errors.New("error adding base ACL entries"), err)
   143  	}
   144  
   145  	if err := a.Valid(); err != nil {
   146  		return err
   147  	}
   148  
   149  	if err := a.SetFileACLDefault(journalPath); err != nil {
   150  		return errwrap.Wrap(fmt.Errorf("error setting default ACLs on %q", journalPath), err)
   151  	}
   152  
   153  	return nil
   154  }
   155  
   156  func generateGidArg(gid int, supplGid []int) string {
   157  	arg := []string{strconv.Itoa(gid)}
   158  	for _, sg := range supplGid {
   159  		arg = append(arg, strconv.Itoa(sg))
   160  	}
   161  	return strings.Join(arg, ",")
   162  }
   163  
   164  // findHostPort returns the port number on the host that corresponds to an
   165  // image manifest port identified by name
   166  func findHostPort(pm schema.PodManifest, name types.ACName) uint {
   167  	var port uint
   168  	for _, p := range pm.Ports {
   169  		if p.Name == name {
   170  			port = p.HostPort
   171  		}
   172  	}
   173  	return port
   174  }
   175  
   176  // generateSysusers generates systemd sysusers files for a given app so that
   177  // corresponding entries in /etc/passwd and /etc/group are created in stage1.
   178  // This is needed to use the "User=" and "Group=" options in the systemd
   179  // service files of apps.
   180  // If there're several apps defining the same UIDs/GIDs, systemd will take care
   181  // of only generating one /etc/{passwd,group} entry
   182  func generateSysusers(p *stage1commontypes.Pod, ra *schema.RuntimeApp, uid_ int, gid_ int, uidRange *user.UidRange) error {
   183  	var toShift []string
   184  
   185  	app := ra.App
   186  	appName := ra.Name
   187  
   188  	sysusersDir := path.Join(common.Stage1RootfsPath(p.Root), "usr/lib/sysusers.d")
   189  	toShift = append(toShift, sysusersDir)
   190  	if err := os.MkdirAll(sysusersDir, 0755); err != nil {
   191  		return err
   192  	}
   193  
   194  	gids := append(app.SupplementaryGIDs, gid_)
   195  
   196  	// Create the Unix user and group
   197  	var sysusersConf []string
   198  
   199  	for _, g := range gids {
   200  		groupname := "gen" + strconv.Itoa(g)
   201  		sysusersConf = append(sysusersConf, fmt.Sprintf("g %s %d\n", groupname, g))
   202  	}
   203  
   204  	username := "gen" + strconv.Itoa(uid_)
   205  	sysusersConf = append(sysusersConf, fmt.Sprintf("u %s %d \"%s\"\n", username, uid_, username))
   206  
   207  	sysusersFile := path.Join(common.Stage1RootfsPath(p.Root), "usr/lib/sysusers.d", ServiceUnitName(appName)+".conf")
   208  	toShift = append(toShift, sysusersFile)
   209  	if err := ioutil.WriteFile(sysusersFile, []byte(strings.Join(sysusersConf, "\n")), 0640); err != nil {
   210  		return err
   211  	}
   212  
   213  	if err := user.ShiftFiles(toShift, uidRange); err != nil {
   214  		return err
   215  	}
   216  
   217  	return nil
   218  }
   219  
   220  // lookupPathInsideApp returns the path (relative to the app rootfs) of the
   221  // given binary. It will look up on "paths" (also relative to the app rootfs)
   222  // and evaluate possible symlinks to check if the resulting path is actually
   223  // executable.
   224  func lookupPathInsideApp(bin string, paths string, appRootfs string, workDir string) (string, error) {
   225  	pathsArr := filepath.SplitList(paths)
   226  	var appPathsArr []string
   227  	for _, p := range pathsArr {
   228  		if !filepath.IsAbs(p) {
   229  			p = filepath.Join(workDir, p)
   230  		}
   231  		appPathsArr = append(appPathsArr, filepath.Join(appRootfs, p))
   232  	}
   233  	for _, path := range appPathsArr {
   234  		binPath := filepath.Join(path, bin)
   235  		stage2Path := strings.TrimPrefix(binPath, appRootfs)
   236  		binRealPath, err := EvaluateSymlinksInsideApp(appRootfs, stage2Path)
   237  		if err != nil {
   238  			return "", errwrap.Wrap(fmt.Errorf("could not evaluate path %v", stage2Path), err)
   239  		}
   240  		binRealPath = filepath.Join(appRootfs, binRealPath)
   241  		if fileutil.IsExecutable(binRealPath) {
   242  			// The real path is executable, return the path relative to the app
   243  			return stage2Path, nil
   244  		}
   245  	}
   246  	return "", fmt.Errorf("unable to find %q in %q", bin, paths)
   247  }
   248  
   249  // appSearchPaths returns a list of paths where we should search for
   250  // non-absolute exec binaries
   251  func appSearchPaths(p *stage1commontypes.Pod, workDir string, app types.App) []string {
   252  	appEnv := app.Environment
   253  
   254  	if imgPath, ok := appEnv.Get("PATH"); ok {
   255  		return strings.Split(imgPath, ":")
   256  	}
   257  
   258  	// emulate exec(3) behavior, first check working directory and then the
   259  	// list of directories returned by confstr(_CS_PATH). That's typically
   260  	// "/bin:/usr/bin" so let's use that.
   261  	return []string{workDir, "/bin", "/usr/bin"}
   262  }
   263  
   264  // FindBinPath takes a binary path and returns a the absolute path of the
   265  // binary relative to the app rootfs. This can be passed to ExecStart on the
   266  // app's systemd service file directly.
   267  func FindBinPath(p *stage1commontypes.Pod, ra *schema.RuntimeApp) (string, error) {
   268  	if len(ra.App.Exec) == 0 {
   269  		return "", errors.New("app has no executable")
   270  	}
   271  
   272  	bin := ra.App.Exec[0]
   273  
   274  	var binPath string
   275  	switch {
   276  	// absolute path, just use it
   277  	case filepath.IsAbs(bin):
   278  		binPath = bin
   279  	// non-absolute path containing a slash, look in the working dir
   280  	case strings.Contains(bin, "/"):
   281  		binPath = filepath.Join(ra.App.WorkingDirectory, bin)
   282  	// filename, search in the app's $PATH
   283  	default:
   284  		absRoot, err := filepath.Abs(p.Root)
   285  		if err != nil {
   286  			return "", errwrap.Wrap(errors.New("could not get pod's root absolute path"), err)
   287  		}
   288  		appRootfs := common.AppRootfsPath(absRoot, ra.Name)
   289  		appPathDirs := appSearchPaths(p, ra.App.WorkingDirectory, *ra.App)
   290  		appPath := strings.Join(appPathDirs, ":")
   291  
   292  		binPath, err = lookupPathInsideApp(bin, appPath, appRootfs, ra.App.WorkingDirectory)
   293  		if err != nil {
   294  			return "", errwrap.Wrap(fmt.Errorf("error looking up %q", bin), err)
   295  		}
   296  	}
   297  
   298  	return binPath, nil
   299  }
   300  
   301  // generateDeviceAllows generates a DeviceAllow= line for an app.
   302  // To make it work, the path needs to start with "/dev" but the device won't
   303  // exist inside the container. So for a given mount, if the volume is a device
   304  // node, we create a symlink to its target in "/rkt/volumes". Later,
   305  // prepare-app will copy those to "/dev/.rkt/" so that's what we use in the
   306  // DeviceAllow= line.
   307  func generateDeviceAllows(root string, appName types.ACName, mountPoints []types.MountPoint, mounts []Mount, uidRange *user.UidRange) ([]string, error) {
   308  	var devAllow []string
   309  
   310  	rktVolumeLinksPath := filepath.Join(root, "rkt", "volumes")
   311  	if err := os.MkdirAll(rktVolumeLinksPath, 0600); err != nil {
   312  		return nil, err
   313  	}
   314  	if err := user.ShiftFiles([]string{rktVolumeLinksPath}, uidRange); err != nil {
   315  		return nil, err
   316  	}
   317  
   318  	for _, m := range mounts {
   319  		if m.Volume.Kind != "host" {
   320  			continue
   321  		}
   322  		if fileutil.IsDeviceNode(m.Volume.Source) {
   323  			mode := "r"
   324  			if !m.ReadOnly {
   325  				mode += "w"
   326  			}
   327  
   328  			tgt := filepath.Join(common.RelAppRootfsPath(appName), m.Mount.Path)
   329  			// the DeviceAllow= line needs the link path in /dev/.rkt/
   330  			linkRel := filepath.Join("/dev/.rkt", m.Volume.Name.String())
   331  			// the real link should be in /rkt/volumes for now
   332  			link := filepath.Join(rktVolumeLinksPath, m.Volume.Name.String())
   333  
   334  			err := os.Symlink(tgt, link)
   335  			// if the link already exists, we don't need to do anything
   336  			if err != nil && !os.IsExist(err) {
   337  				return nil, err
   338  			}
   339  
   340  			devAllow = append(devAllow, linkRel+" "+mode)
   341  		}
   342  	}
   343  
   344  	return devAllow, nil
   345  }
   346  
   347  // supportsNotify returns true if in the image manifest appc.io/executor/supports-systemd-notify is set to true
   348  func supportsNotify(p *stage1commontypes.Pod, appName string) bool {
   349  	appImg := p.Images[appName]
   350  	if appImg == nil {
   351  		return false
   352  	}
   353  	supportNotifyAnnotation, ok := appImg.Annotations.Get("appc.io/executor/supports-systemd-notify")
   354  	supportNotify, err := strconv.ParseBool(supportNotifyAnnotation)
   355  	if ok && supportNotify && err == nil {
   356  		return true
   357  	}
   358  	return false
   359  }
   360  
   361  // ParseUserGroup parses the User and Group fields of an App and returns its
   362  // UID and GID.
   363  // The User and Group fields accept several formats:
   364  //   1. the hardcoded string "root"
   365  //   2. a path
   366  //   3. a number
   367  //   4. a name in reference to /etc/{group,passwd} in the image
   368  // See https://github.com/appc/spec/blob/master/spec/aci.md#image-manifest-schema
   369  func ParseUserGroup(p *stage1commontypes.Pod, ra *schema.RuntimeApp) (int, int, error) {
   370  	var uidResolver, gidResolver user.Resolver
   371  	var uid, gid int
   372  	var err error
   373  
   374  	root := common.AppRootfsPath(p.Root, ra.Name)
   375  
   376  	uidResolver, err = user.NumericIDs(ra.App.User)
   377  	if err != nil {
   378  		uidResolver, err = user.IDsFromStat(root, ra.App.User, &p.UidRange)
   379  	}
   380  
   381  	if err != nil {
   382  		uidResolver, err = user.IDsFromEtc(root, ra.App.User, "")
   383  	}
   384  
   385  	if err != nil { // give up
   386  		return -1, -1, errwrap.Wrap(fmt.Errorf("invalid user %q", ra.App.User), err)
   387  	}
   388  
   389  	if uid, _, err = uidResolver.IDs(); err != nil {
   390  		return -1, -1, errwrap.Wrap(fmt.Errorf("failed to configure user %q", ra.App.User), err)
   391  	}
   392  
   393  	gidResolver, err = user.NumericIDs(ra.App.Group)
   394  	if err != nil {
   395  		gidResolver, err = user.IDsFromStat(root, ra.App.Group, &p.UidRange)
   396  	}
   397  
   398  	if err != nil {
   399  		gidResolver, err = user.IDsFromEtc(root, "", ra.App.Group)
   400  	}
   401  
   402  	if err != nil { // give up
   403  		return -1, -1, errwrap.Wrap(fmt.Errorf("invalid group %q", ra.App.Group), err)
   404  	}
   405  
   406  	if _, gid, err = gidResolver.IDs(); err != nil {
   407  		// If we can't resolve the GID, it might be an image converted from
   408  		// docker.
   409  		//
   410  		// Docker uses the UID as GID if you only specify the "user". In that
   411  		// case, docker2aci sets the group name to the user name because the
   412  		// appc spec requires both user and group to be set. This will fail
   413  		// because that group name won't be found in /etc/group. Let's detect
   414  		// if the image was converted from docker and set the GID to the UID in
   415  		// that case.
   416  		//
   417  		// We only do this if the group in RuntimeApp is the same as the one in
   418  		// the image, otherwise we trust that the user knows what they're
   419  		// doing.
   420  		img := p.Images[ra.Name.String()]
   421  		if ConvertedFromDocker(img) && img.App.User == img.App.Group && ra.App.Group == img.App.Group {
   422  			gid = uid
   423  		} else {
   424  			return -1, -1, errwrap.Wrap(fmt.Errorf("failed to configure group %q", ra.App.Group), err)
   425  		}
   426  	}
   427  
   428  	return uid, gid, nil
   429  }
   430  
   431  // EvaluateSymlinksInsideApp tries to resolve symlinks within the path.
   432  // It returns the actual path relative to the app rootfs for the given path.
   433  // This is needed for absolute symlinks - we are in a different rootfs.
   434  func EvaluateSymlinksInsideApp(appRootfs, path string) (string, error) {
   435  	chroot, err := newChroot(appRootfs)
   436  	if err != nil {
   437  		return "", errwrap.Wrapf(fmt.Sprintf("chroot to %q failed", appRootfs), err)
   438  	}
   439  
   440  	target, err := fileutil.EvalSymlinksAlways(path)
   441  	if err != nil {
   442  		return "", errwrap.Wrapf(fmt.Sprintf("evaluating symlinks of %q failed", path), err)
   443  	}
   444  
   445  	// EvalSymlinksAlways might return a relative path
   446  	abs, err := filepath.Abs(target)
   447  	if err != nil {
   448  		return "", errwrap.Wrapf(fmt.Sprintf("failed to get absolute representation of %q", target), err)
   449  	}
   450  
   451  	if err := chroot.escape(); err != nil {
   452  		return "", errwrap.Wrapf(fmt.Sprintf("escaping chroot %q failed", appRootfs), err)
   453  	}
   454  
   455  	return abs, nil
   456  }
   457  
   458  // appToNspawnArgs transforms the given app manifest, with the given associated
   459  // app name, into a subset of applicable systemd-nspawn argument
   460  func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp) ([]string, error) {
   461  	var args []string
   462  	appName := ra.Name
   463  	app := ra.App
   464  
   465  	sharedVolPath, err := common.CreateSharedVolumesPath(p.Root)
   466  	if err != nil {
   467  		return nil, err
   468  	}
   469  
   470  	vols := make(map[types.ACName]types.Volume)
   471  	for _, v := range p.Manifest.Volumes {
   472  		vols[v.Name] = v
   473  	}
   474  
   475  	imageManifest := p.Images[appName.String()]
   476  	mounts, err := GenerateMounts(ra, p.Manifest.Volumes, ConvertedFromDocker(imageManifest))
   477  	if err != nil {
   478  		return nil, errwrap.Wrap(fmt.Errorf("could not generate app %q mounts", appName), err)
   479  	}
   480  	for _, m := range mounts {
   481  		shPath := filepath.Join(sharedVolPath, m.Volume.Name.String())
   482  
   483  		absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs.
   484  		if err != nil {
   485  			return nil, errwrap.Wrap(errors.New("could not get pod's root absolute path"), err)
   486  		}
   487  
   488  		appRootfs := common.AppRootfsPath(absRoot, appName)
   489  
   490  		// Evaluate symlinks within the app's rootfs. This is needed because symlinks
   491  		// within the container can be absolute, which will, of course, be wrong in our ns.
   492  		// Systemd also gets this wrong, see https://github.com/systemd/systemd/issues/2860
   493  		// When the above issue is fixed, we can pass the un-evaluated path to --bind instead.
   494  		mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Mount.Path)
   495  		if err != nil {
   496  			return nil, errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Mount.Path), err)
   497  		}
   498  		mntAbsPath := filepath.Join(appRootfs, mntPath)
   499  
   500  		if err := PrepareMountpoints(shPath, mntAbsPath, &m.Volume, m.DockerImplicit); err != nil {
   501  			return nil, err
   502  		}
   503  
   504  		opt := make([]string, 6)
   505  
   506  		if m.ReadOnly {
   507  			opt[0] = "--bind-ro="
   508  		} else {
   509  			opt[0] = "--bind="
   510  		}
   511  
   512  		opt[1] = m.Source(absRoot)
   513  		opt[2] = ":"
   514  		opt[3] = filepath.Join(common.RelAppRootfsPath(appName), mntPath)
   515  		opt[4] = ":"
   516  
   517  		// If Recursive is not set, default to recursive.
   518  		recursive := true
   519  		if m.Volume.Recursive != nil {
   520  			recursive = *m.Volume.Recursive
   521  		}
   522  
   523  		// rbind/norbind options exist since systemd-nspawn v226
   524  		if recursive {
   525  			opt[5] = "rbind"
   526  		} else {
   527  			opt[5] = "norbind"
   528  		}
   529  		args = append(args, strings.Join(opt, ""))
   530  	}
   531  
   532  	if !p.InsecureOptions.DisableCapabilities {
   533  		capabilitiesStr, err := getAppCapabilities(app.Isolators)
   534  		if err != nil {
   535  			return nil, err
   536  		}
   537  		capList := strings.Join(capabilitiesStr, ",")
   538  		args = append(args, "--capability="+capList)
   539  	}
   540  
   541  	return args, nil
   542  }
   543  
   544  // PodToNspawnArgs renders a prepared Pod as a systemd-nspawn
   545  // argument list ready to be executed
   546  func PodToNspawnArgs(p *stage1commontypes.Pod) ([]string, error) {
   547  	args := []string{
   548  		"--uuid=" + p.UUID.String(),
   549  		"--machine=" + GetMachineID(p),
   550  		"--directory=" + common.Stage1RootfsPath(p.Root),
   551  	}
   552  
   553  	for i := range p.Manifest.Apps {
   554  		aa, err := appToNspawnArgs(p, &p.Manifest.Apps[i])
   555  		if err != nil {
   556  			return nil, err
   557  		}
   558  		args = append(args, aa...)
   559  	}
   560  
   561  	if p.InsecureOptions.DisableCapabilities {
   562  		args = append(args, "--capability=all")
   563  	}
   564  
   565  	return args, nil
   566  }
   567  
   568  // GetFlavor populates a flavor string based on the flavor itself and respectively the systemd version
   569  // If the systemd version couldn't be guessed, it will be set to 0.
   570  func GetFlavor(p *stage1commontypes.Pod) (flavor string, systemdVersion int, err error) {
   571  	flavor, err = os.Readlink(filepath.Join(common.Stage1RootfsPath(p.Root), "flavor"))
   572  	if err != nil {
   573  		return "", -1, errwrap.Wrap(errors.New("unable to determine stage1 flavor"), err)
   574  	}
   575  
   576  	if flavor == "host" {
   577  		// This flavor does not contain systemd, parse "systemctl --version"
   578  		systemctlBin, err := common.LookupPath("systemctl", os.Getenv("PATH"))
   579  		if err != nil {
   580  			return "", -1, err
   581  		}
   582  
   583  		systemdVersion, err := common.SystemdVersion(systemctlBin)
   584  		if err != nil {
   585  			return "", -1, errwrap.Wrap(errors.New("error finding systemctl version"), err)
   586  		}
   587  
   588  		return flavor, systemdVersion, nil
   589  	}
   590  
   591  	systemdVersionBytes, err := ioutil.ReadFile(filepath.Join(common.Stage1RootfsPath(p.Root), "systemd-version"))
   592  	if err != nil {
   593  		return "", -1, errwrap.Wrap(errors.New("unable to determine stage1's systemd version"), err)
   594  	}
   595  	systemdVersionString := strings.Trim(string(systemdVersionBytes), " \n")
   596  
   597  	// systemdVersionString is either a tag name or a branch name. If it's a
   598  	// tag name it's of the form "v229", remove the first character to get the
   599  	// number.
   600  	systemdVersion, err = strconv.Atoi(systemdVersionString[1:])
   601  	if err != nil {
   602  		// If we get a syntax error, it means the parsing of the version string
   603  		// of the form "v229" failed, set it to 0 to indicate we couldn't guess
   604  		// it.
   605  		if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrSyntax {
   606  			systemdVersion = 0
   607  		} else {
   608  			return "", -1, errwrap.Wrap(errors.New("error parsing stage1's systemd version"), err)
   609  		}
   610  	}
   611  	return flavor, systemdVersion, nil
   612  }
   613  
   614  // GetAppHashes returns a list of hashes of the apps in this pod
   615  func GetAppHashes(p *stage1commontypes.Pod) []types.Hash {
   616  	var names []types.Hash
   617  	for _, a := range p.Manifest.Apps {
   618  		names = append(names, a.Image.ID)
   619  	}
   620  
   621  	return names
   622  }
   623  
   624  // GetMachineID returns the machine id string of the pod to be passed to
   625  // systemd-nspawn
   626  func GetMachineID(p *stage1commontypes.Pod) string {
   627  	return "rkt-" + p.UUID.String()
   628  }
   629  
   630  // getAppCapabilities computes the set of Linux capabilities that an app
   631  // should have based on its isolators. Only the following capabalities matter:
   632  // - os/linux/capabilities-retain-set
   633  // - os/linux/capabilities-remove-set
   634  //
   635  // The resulting capabilities are generated following the rules from the spec:
   636  // See: https://github.com/appc/spec/blob/master/spec/ace.md#linux-isolators
   637  func getAppCapabilities(isolators types.Isolators) ([]string, error) {
   638  	var capsToRetain []string
   639  	var capsToRemove []string
   640  
   641  	// Default caps defined in
   642  	// https://github.com/appc/spec/blob/master/spec/ace.md#linux-isolators
   643  	appDefaultCapabilities := []string{
   644  		"CAP_AUDIT_WRITE",
   645  		"CAP_CHOWN",
   646  		"CAP_DAC_OVERRIDE",
   647  		"CAP_FSETID",
   648  		"CAP_FOWNER",
   649  		"CAP_KILL",
   650  		"CAP_MKNOD",
   651  		"CAP_NET_RAW",
   652  		"CAP_NET_BIND_SERVICE",
   653  		"CAP_SETUID",
   654  		"CAP_SETGID",
   655  		"CAP_SETPCAP",
   656  		"CAP_SETFCAP",
   657  		"CAP_SYS_CHROOT",
   658  	}
   659  
   660  	// Iterate over the isolators defined in
   661  	// https://github.com/appc/spec/blob/master/spec/ace.md#linux-isolators
   662  	// Only read the capababilities isolators:
   663  	// - os/linux/capabilities-retain-set
   664  	// - os/linux/capabilities-remove-set
   665  	for _, isolator := range isolators {
   666  		if capSet, ok := isolator.Value().(types.LinuxCapabilitiesSet); ok {
   667  			switch isolator.Name {
   668  			case types.LinuxCapabilitiesRetainSetName:
   669  				capsToRetain = append(capsToRetain, parseLinuxCapabilitiesSet(capSet)...)
   670  			case types.LinuxCapabilitiesRevokeSetName:
   671  				capsToRemove = append(capsToRemove, parseLinuxCapabilitiesSet(capSet)...)
   672  			}
   673  		}
   674  	}
   675  
   676  	// appc/spec does not allow to have both the retain set and the remove
   677  	// set defined.
   678  	if len(capsToRetain) > 0 && len(capsToRemove) > 0 {
   679  		return nil, errors.New("cannot have both os/linux/capabilities-retain-set and os/linux/capabilities-remove-set")
   680  	}
   681  
   682  	// Neither the retain set or the remove set are defined
   683  	if len(capsToRetain) == 0 && len(capsToRemove) == 0 {
   684  		return appDefaultCapabilities, nil
   685  	}
   686  
   687  	if len(capsToRetain) > 0 {
   688  		return capsToRetain, nil
   689  	}
   690  
   691  	if len(capsToRemove) == 0 {
   692  		panic("len(capsToRetain) is negative. This cannot happen.")
   693  	}
   694  
   695  	caps := appDefaultCapabilities
   696  	for _, rc := range capsToRemove {
   697  		// backward loop to be safe against deletion
   698  		for i := len(caps) - 1; i >= 0; i-- {
   699  			if caps[i] == rc {
   700  				caps = append(caps[:i], caps[i+1:]...)
   701  			}
   702  		}
   703  	}
   704  	return caps, nil
   705  }
   706  
   707  // parseLinuxCapabilitySet parses a LinuxCapabilitiesSet into string slice
   708  func parseLinuxCapabilitiesSet(capSet types.LinuxCapabilitiesSet) []string {
   709  	var capsStr []string
   710  	for _, cap := range capSet.Set() {
   711  		capsStr = append(capsStr, string(cap))
   712  	}
   713  	return capsStr
   714  }
   715  
   716  func getAppNoNewPrivileges(isolators types.Isolators) bool {
   717  	for _, isolator := range isolators {
   718  		noNewPrivileges, ok := isolator.Value().(*types.LinuxNoNewPrivileges)
   719  
   720  		if ok && bool(*noNewPrivileges) {
   721  			return true
   722  		}
   723  	}
   724  
   725  	return false
   726  }
   727  
   728  // chroot is the struct that represents a chroot environment
   729  type chroot struct {
   730  	wd   string   // the working directory in the outer root
   731  	root *os.File // the outer root directory
   732  }
   733  
   734  // newChroot creates a new chroot environment for the given path.
   735  // Unless the caller calls Escape() all system operations will be invoked in that environment.
   736  // It stores the working directory at the point it was invoked.
   737  func newChroot(path string) (*chroot, error) {
   738  	var err error
   739  	var c chroot
   740  
   741  	c.wd, err = os.Getwd()
   742  	if err != nil {
   743  		return nil, errwrap.Wrapf("getwd before chroot failed", err)
   744  	}
   745  
   746  	c.root, err = os.Open("/")
   747  	if err != nil {
   748  		return nil, errwrap.Wrapf("error opening outer root", err)
   749  	}
   750  
   751  	if err := syscall.Chroot(path); err != nil {
   752  		return nil, errwrap.Wrapf("chroot to "+path+" failed", err)
   753  	}
   754  
   755  	if err := os.Chdir("/"); err != nil {
   756  		return nil, errwrap.Wrapf("chdir to \"/\" failed", err)
   757  	}
   758  
   759  	return &c, nil
   760  }
   761  
   762  // Escape escapes the chroot environment changing back to the original working directory where newChroot was invoked.
   763  func (c *chroot) escape() error {
   764  	// change directory to outer root and close it
   765  	if err := syscall.Fchdir(int(c.root.Fd())); err != nil {
   766  		return errwrap.Wrapf("changing directory to outer root failed", err)
   767  	}
   768  
   769  	if err := c.root.Close(); err != nil {
   770  		return errwrap.Wrapf("closing outer root failed", err)
   771  	}
   772  
   773  	// chroot to current directory aka "." being the outer root
   774  	if err := syscall.Chroot("."); err != nil {
   775  		return errwrap.Wrapf("chroot to current directory failed", err)
   776  	}
   777  
   778  	// chdir into previous working directory
   779  	if err := os.Chdir(c.wd); err != nil {
   780  		return errwrap.Wrapf("chdir to working directory failed", err)
   781  	}
   782  
   783  	return nil
   784  }