github.com/stackdocker/rkt@v0.10.1-0.20151109095037-1aa827478248/stage0/run.go (about)

     1  // Copyright 2014 The rkt Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //+build linux
    16  
    17  package stage0
    18  
    19  //
    20  // rkt is a reference implementation of the app container specification.
    21  //
    22  // Execution on rkt is divided into a number of stages, and the `rkt`
    23  // binary implements the first stage (stage 0)
    24  //
    25  
    26  import (
    27  	"encoding/json"
    28  	"fmt"
    29  	"io/ioutil"
    30  	"log"
    31  	"os"
    32  	"path"
    33  	"path/filepath"
    34  	"runtime"
    35  	"strconv"
    36  	"strings"
    37  	"syscall"
    38  	"time"
    39  
    40  	"github.com/coreos/rkt/Godeps/_workspace/src/github.com/appc/spec/schema"
    41  	"github.com/coreos/rkt/Godeps/_workspace/src/github.com/appc/spec/schema/types"
    42  	"github.com/coreos/rkt/common"
    43  	"github.com/coreos/rkt/common/apps"
    44  	"github.com/coreos/rkt/pkg/aci"
    45  	"github.com/coreos/rkt/pkg/fileutil"
    46  	"github.com/coreos/rkt/pkg/label"
    47  	"github.com/coreos/rkt/pkg/sys"
    48  	"github.com/coreos/rkt/pkg/uid"
    49  	"github.com/coreos/rkt/store"
    50  	"github.com/coreos/rkt/version"
    51  )
    52  
    53  const (
    54  	// Default perm bits for the regular files
    55  	// within the stage1 directory. (e.g. image manifest,
    56  	// pod manifest, stage1ID, etc).
    57  	defaultRegularFilePerm = os.FileMode(0640)
    58  
    59  	// Default perm bits for the regular directories
    60  	// within the stage1 directory.
    61  	defaultRegularDirPerm = os.FileMode(0750)
    62  )
    63  
    64  var debugEnabled bool
    65  
    66  // configuration parameters required by Prepare
    67  type PrepareConfig struct {
    68  	CommonConfig
    69  	Apps         *apps.Apps          // apps to prepare
    70  	InheritEnv   bool                // inherit parent environment into apps
    71  	ExplicitEnv  []string            // always set these environment variables for all the apps
    72  	Ports        []types.ExposedPort // list of ports that rkt will expose on the host
    73  	UseOverlay   bool                // prepare pod with overlay fs
    74  	PodManifest  string              // use the pod manifest specified by the user, this will ignore flags such as '--volume', '--port', etc.
    75  	PrivateUsers *uid.UidRange       // User namespaces
    76  }
    77  
    78  // configuration parameters needed by Run
    79  type RunConfig struct {
    80  	CommonConfig
    81  	Net         common.NetList // pod should have its own network stack
    82  	LockFd      int            // lock file descriptor
    83  	Interactive bool           // whether the pod is interactive or not
    84  	MDSRegister bool           // whether to register with metadata service or not
    85  	Apps        schema.AppList // applications (prepare gets them via Apps)
    86  	LocalConfig string         // Path to local configuration
    87  	RktGid      int            // group id of the 'rkt' group, -1 if there's no rkt group.
    88  }
    89  
    90  // configuration shared by both Run and Prepare
    91  type CommonConfig struct {
    92  	Store        *store.Store // store containing all of the configured application images
    93  	Stage1Image  types.Hash   // stage1 image containing usable /init and /enter entrypoints
    94  	UUID         *types.UUID  // UUID of the pod
    95  	Debug        bool
    96  	MountLabel   string // selinux label to use for fs
    97  	ProcessLabel string // selinux label to use for process
    98  }
    99  
   100  func init() {
   101  	// this ensures that main runs only on main thread (thread group leader).
   102  	// since namespace ops (unshare, setns) are done for a single thread, we
   103  	// must ensure that the goroutine does not jump from OS thread to thread
   104  	runtime.LockOSThread()
   105  }
   106  
   107  func InitDebug() {
   108  	debugEnabled = true
   109  }
   110  
   111  func debug(format string, i ...interface{}) {
   112  	if debugEnabled {
   113  		log.Printf(format, i...)
   114  	}
   115  }
   116  
   117  // MergeEnvs amends appEnv setting variables in setEnv before setting anything new from os.Environ if inheritEnv = true
   118  // setEnv is expected to be in the os.Environ() key=value format
   119  func MergeEnvs(appEnv *types.Environment, inheritEnv bool, setEnv []string) {
   120  	for _, ev := range setEnv {
   121  		pair := strings.SplitN(ev, "=", 2)
   122  		appEnv.Set(pair[0], pair[1])
   123  	}
   124  
   125  	if inheritEnv {
   126  		for _, ev := range os.Environ() {
   127  			pair := strings.SplitN(ev, "=", 2)
   128  			if _, exists := appEnv.Get(pair[0]); !exists {
   129  				appEnv.Set(pair[0], pair[1])
   130  			}
   131  		}
   132  	}
   133  }
   134  
   135  func imageNameToAppName(name types.ACIdentifier) (*types.ACName, error) {
   136  	parts := strings.Split(name.String(), "/")
   137  	last := parts[len(parts)-1]
   138  
   139  	sn, err := types.SanitizeACName(last)
   140  	if err != nil {
   141  		return nil, err
   142  	}
   143  
   144  	return types.MustACName(sn), nil
   145  }
   146  
   147  // deduplicateMPs removes Mounts with duplicated paths. If there's more than
   148  // one Mount with the same path, it keeps the first one encountered.
   149  func deduplicateMPs(mounts []schema.Mount) []schema.Mount {
   150  	var res []schema.Mount
   151  	seen := make(map[string]struct{})
   152  	for _, m := range mounts {
   153  		if _, ok := seen[m.Path]; !ok {
   154  			res = append(res, m)
   155  			seen[m.Path] = struct{}{}
   156  		}
   157  	}
   158  	return res
   159  }
   160  
   161  // MergeMounts combines the global and per-app mount slices
   162  func MergeMounts(mounts []schema.Mount, appMounts []schema.Mount) []schema.Mount {
   163  	ml := append(appMounts, mounts...)
   164  	return deduplicateMPs(ml)
   165  }
   166  
   167  // generatePodManifest creates the pod manifest from the command line input.
   168  // It returns the pod manifest as []byte on success.
   169  // This is invoked if no pod manifest is specified at the command line.
   170  func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) {
   171  	pm := schema.PodManifest{
   172  		ACKind: "PodManifest",
   173  		Apps:   make(schema.AppList, 0),
   174  	}
   175  
   176  	v, err := types.NewSemVer(version.Version)
   177  	if err != nil {
   178  		return nil, fmt.Errorf("error creating version: %v", err)
   179  	}
   180  	pm.ACVersion = *v
   181  
   182  	if err := cfg.Apps.Walk(func(app *apps.App) error {
   183  		img := app.ImageID
   184  
   185  		am, err := cfg.Store.GetImageManifest(img.String())
   186  		if err != nil {
   187  			return fmt.Errorf("error getting the manifest: %v", err)
   188  		}
   189  		appName, err := imageNameToAppName(am.Name)
   190  		if err != nil {
   191  			return fmt.Errorf("error converting image name to app name: %v", err)
   192  		}
   193  		if err := prepareAppImage(cfg, *appName, img, dir, cfg.UseOverlay); err != nil {
   194  			return fmt.Errorf("error setting up image %s: %v", img, err)
   195  		}
   196  		if pm.Apps.Get(*appName) != nil {
   197  			return fmt.Errorf("error: multiple apps with name %s", am.Name)
   198  		}
   199  		if am.App == nil && app.Exec == "" {
   200  			return fmt.Errorf("error: image %s has no app section and --exec argument is not provided", img)
   201  		}
   202  		ra := schema.RuntimeApp{
   203  			// TODO(vc): leverage RuntimeApp.Name for disambiguating the apps
   204  			Name: *appName,
   205  			App:  am.App,
   206  			Image: schema.RuntimeImage{
   207  				Name:   &am.Name,
   208  				ID:     img,
   209  				Labels: am.Labels,
   210  			},
   211  			Annotations: am.Annotations,
   212  			Mounts:      MergeMounts(cfg.Apps.Mounts, app.Mounts),
   213  		}
   214  
   215  		if execOverride := app.Exec; execOverride != "" {
   216  			// Create a minimal App section if not present
   217  			if am.App == nil {
   218  				ra.App = &types.App{
   219  					User:  strconv.Itoa(os.Getuid()),
   220  					Group: strconv.Itoa(os.Getgid()),
   221  				}
   222  			}
   223  			ra.App.Exec = []string{execOverride}
   224  		}
   225  
   226  		if execAppends := app.Args; execAppends != nil {
   227  			ra.App.Exec = append(ra.App.Exec, execAppends...)
   228  		}
   229  
   230  		if cfg.InheritEnv || len(cfg.ExplicitEnv) > 0 {
   231  			MergeEnvs(&ra.App.Environment, cfg.InheritEnv, cfg.ExplicitEnv)
   232  		}
   233  		pm.Apps = append(pm.Apps, ra)
   234  		return nil
   235  	}); err != nil {
   236  		return nil, err
   237  	}
   238  
   239  	// TODO(jonboulle): check that app mountpoint expectations are
   240  	// satisfied here, rather than waiting for stage1
   241  	pm.Volumes = cfg.Apps.Volumes
   242  	pm.Ports = cfg.Ports
   243  
   244  	pmb, err := json.Marshal(pm)
   245  	if err != nil {
   246  		return nil, fmt.Errorf("error marshalling pod manifest: %v", err)
   247  	}
   248  	return pmb, nil
   249  }
   250  
   251  // validatePodManifest reads the user-specified pod manifest, prepares the app images
   252  // and validates the pod manifest. If the pod manifest passes validation, it returns
   253  // the manifest as []byte.
   254  // TODO(yifan): More validation in the future.
   255  func validatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) {
   256  	pmb, err := ioutil.ReadFile(cfg.PodManifest)
   257  	if err != nil {
   258  		return nil, fmt.Errorf("error reading pod manifest: %v", err)
   259  	}
   260  	var pm schema.PodManifest
   261  	if err := json.Unmarshal(pmb, &pm); err != nil {
   262  		return nil, fmt.Errorf("error unmarshaling pod manifest: %v", err)
   263  	}
   264  
   265  	appNames := make(map[types.ACName]struct{})
   266  	for _, ra := range pm.Apps {
   267  		img := ra.Image
   268  
   269  		if img.ID.Empty() {
   270  			return nil, fmt.Errorf("no image ID for app %q", ra.Name)
   271  		}
   272  		am, err := cfg.Store.GetImageManifest(img.ID.String())
   273  		if err != nil {
   274  			return nil, fmt.Errorf("error getting the image manifest from store: %v", err)
   275  		}
   276  		if err := prepareAppImage(cfg, ra.Name, img.ID, dir, cfg.UseOverlay); err != nil {
   277  			return nil, fmt.Errorf("error setting up image %s: %v", img, err)
   278  		}
   279  		if _, ok := appNames[ra.Name]; ok {
   280  			return nil, fmt.Errorf("multiple apps with same name %s", ra.Name)
   281  		}
   282  		appNames[ra.Name] = struct{}{}
   283  		if ra.App == nil && am.App == nil {
   284  			return nil, fmt.Errorf("no app section in the pod manifest or the image manifest")
   285  		}
   286  	}
   287  	return pmb, nil
   288  }
   289  
   290  // Prepare sets up a pod based on the given config.
   291  func Prepare(cfg PrepareConfig, dir string, uuid *types.UUID) error {
   292  	if err := os.MkdirAll(common.AppsInfoPath(dir), defaultRegularDirPerm); err != nil {
   293  		return fmt.Errorf("error creating apps info directory: %v", err)
   294  	}
   295  	debug("Preparing stage1")
   296  	if err := prepareStage1Image(cfg, cfg.Stage1Image, dir, cfg.UseOverlay); err != nil {
   297  		return fmt.Errorf("error preparing stage1: %v", err)
   298  	}
   299  
   300  	var pmb []byte
   301  	var err error
   302  	if len(cfg.PodManifest) > 0 {
   303  		pmb, err = validatePodManifest(cfg, dir)
   304  	} else {
   305  		pmb, err = generatePodManifest(cfg, dir)
   306  	}
   307  	if err != nil {
   308  		return err
   309  	}
   310  
   311  	debug("Writing pod manifest")
   312  	fn := common.PodManifestPath(dir)
   313  	if err := ioutil.WriteFile(fn, pmb, defaultRegularFilePerm); err != nil {
   314  		return fmt.Errorf("error writing pod manifest: %v", err)
   315  	}
   316  
   317  	if cfg.UseOverlay {
   318  		// mark the pod as prepared with overlay
   319  		f, err := os.Create(filepath.Join(dir, common.OverlayPreparedFilename))
   320  		if err != nil {
   321  			return fmt.Errorf("error writing overlay marker file: %v", err)
   322  		}
   323  		defer f.Close()
   324  	}
   325  
   326  	if cfg.PrivateUsers.Shift > 0 {
   327  		// mark the pod as prepared for user namespaces
   328  		uidrangeBytes := cfg.PrivateUsers.Serialize()
   329  
   330  		if err := ioutil.WriteFile(filepath.Join(dir, common.PrivateUsersPreparedFilename), uidrangeBytes, defaultRegularFilePerm); err != nil {
   331  			return fmt.Errorf("error writing userns marker file: %v", err)
   332  		}
   333  	}
   334  
   335  	return nil
   336  }
   337  
   338  func preparedWithOverlay(dir string) (bool, error) {
   339  	_, err := os.Stat(filepath.Join(dir, common.OverlayPreparedFilename))
   340  	if os.IsNotExist(err) {
   341  		return false, nil
   342  	}
   343  	if err != nil {
   344  		return false, err
   345  	}
   346  
   347  	if !common.SupportsOverlay() {
   348  		return false, fmt.Errorf("the pod was prepared with overlay but overlay is not supported")
   349  	}
   350  
   351  	return true, nil
   352  }
   353  
   354  func preparedWithPrivateUsers(dir string) (string, error) {
   355  	bytes, err := ioutil.ReadFile(filepath.Join(dir, common.PrivateUsersPreparedFilename))
   356  	if os.IsNotExist(err) {
   357  		return "", nil
   358  	}
   359  	if err != nil {
   360  		return "", err
   361  	}
   362  
   363  	return string(bytes), nil
   364  }
   365  
   366  // Run mounts the right overlay filesystems and actually runs the prepared
   367  // pod by exec()ing the stage1 init inside the pod filesystem.
   368  func Run(cfg RunConfig, dir string, dataDir string) {
   369  	useOverlay, err := preparedWithOverlay(dir)
   370  	if err != nil {
   371  		log.Fatalf("error: %v", err)
   372  	}
   373  
   374  	privateUsers, err := preparedWithPrivateUsers(dir)
   375  	if err != nil {
   376  		log.Fatalf("error: %v", err)
   377  	}
   378  
   379  	debug("Setting up stage1")
   380  	if err := setupStage1Image(cfg, dir, useOverlay); err != nil {
   381  		log.Fatalf("error setting up stage1: %v", err)
   382  	}
   383  	debug("Wrote filesystem to %s\n", dir)
   384  
   385  	for _, app := range cfg.Apps {
   386  		if err := setupAppImage(cfg, app.Name, app.Image.ID, dir, useOverlay); err != nil {
   387  			log.Fatalf("error setting up app image: %v", err)
   388  		}
   389  	}
   390  
   391  	destRootfs := common.Stage1RootfsPath(dir)
   392  	flavor, err := os.Readlink(filepath.Join(destRootfs, "flavor"))
   393  	if err != nil {
   394  		log.Printf("error reading flavor: %v\n", err)
   395  	}
   396  	if flavor == "kvm" {
   397  		err := kvmCheckSSHSetup(destRootfs, dataDir)
   398  		if err != nil {
   399  			log.Fatalf("error setting up ssh keys: %v", err)
   400  		}
   401  	}
   402  
   403  	if err := os.Setenv(common.EnvLockFd, fmt.Sprintf("%v", cfg.LockFd)); err != nil {
   404  		log.Fatalf("setting lock fd environment: %v", err)
   405  	}
   406  
   407  	if err := os.Setenv(common.EnvSELinuxContext, fmt.Sprintf("%v", cfg.ProcessLabel)); err != nil {
   408  		log.Fatalf("setting SELinux context environment: %v", err)
   409  	}
   410  
   411  	debug("Pivoting to filesystem %s", dir)
   412  	if err := os.Chdir(dir); err != nil {
   413  		log.Fatalf("failed changing to dir: %v", err)
   414  	}
   415  
   416  	ep, err := getStage1Entrypoint(dir, runEntrypoint)
   417  	if err != nil {
   418  		log.Fatalf("error determining 'run' entrypoint: %v", err)
   419  	}
   420  	args := []string{filepath.Join(destRootfs, ep)}
   421  	debug("Execing %s", ep)
   422  
   423  	if cfg.Debug {
   424  		args = append(args, "--debug")
   425  	}
   426  
   427  	args = append(args, "--net="+cfg.Net.String())
   428  
   429  	if cfg.Interactive {
   430  		args = append(args, "--interactive")
   431  	}
   432  	if len(privateUsers) > 0 {
   433  		args = append(args, "--private-users="+privateUsers)
   434  	}
   435  	if cfg.MDSRegister {
   436  		mdsToken, err := registerPod(".", cfg.UUID, cfg.Apps)
   437  		if err != nil {
   438  			log.Fatalf("failed to register the pod: %v", err)
   439  		}
   440  
   441  		args = append(args, "--mds-token="+mdsToken)
   442  	}
   443  
   444  	if cfg.LocalConfig != "" {
   445  		args = append(args, "--local-config="+cfg.LocalConfig)
   446  	}
   447  
   448  	args = append(args, cfg.UUID.String())
   449  
   450  	// make sure the lock fd stays open across exec
   451  	if err := sys.CloseOnExec(cfg.LockFd, false); err != nil {
   452  		log.Fatalf("error clearing FD_CLOEXEC on lock fd")
   453  	}
   454  
   455  	if err := syscall.Exec(args[0], args, os.Environ()); err != nil {
   456  		log.Fatalf("error execing init: %v", err)
   457  	}
   458  }
   459  
   460  // prepareAppImage renders and verifies the tree cache of the app image that
   461  // corresponds to the given app name.
   462  // When useOverlay is false, it attempts to render and expand the app image
   463  func prepareAppImage(cfg PrepareConfig, appName types.ACName, img types.Hash, cdir string, useOverlay bool) error {
   464  	debug("Loading image %s", img.String())
   465  
   466  	am, err := cfg.Store.GetImageManifest(img.String())
   467  	if err != nil {
   468  		return fmt.Errorf("error getting the manifest: %v", err)
   469  	}
   470  
   471  	if _, hasOS := am.Labels.Get("os"); !hasOS {
   472  		return fmt.Errorf("missing os label in the image manifest")
   473  	}
   474  	if _, hasArch := am.Labels.Get("arch"); !hasArch {
   475  		return fmt.Errorf("missing arch label in the image manifest")
   476  	}
   477  
   478  	if err := types.IsValidOSArch(am.Labels.ToMap(), ValidOSArch); err != nil {
   479  		return err
   480  	}
   481  
   482  	appInfoDir := common.AppInfoPath(cdir, appName)
   483  	if err := os.MkdirAll(appInfoDir, defaultRegularDirPerm); err != nil {
   484  		return fmt.Errorf("error creating apps info directory: %v", err)
   485  	}
   486  
   487  	if useOverlay {
   488  		if cfg.PrivateUsers.Shift > 0 {
   489  			return fmt.Errorf("cannot use both overlay and user namespace: not implemented yet. (Try --no-overlay)")
   490  		}
   491  		treeStoreID, err := cfg.Store.RenderTreeStore(img.String(), false)
   492  		if err != nil {
   493  			return fmt.Errorf("error rendering tree image: %v", err)
   494  		}
   495  		if err := cfg.Store.CheckTreeStore(treeStoreID); err != nil {
   496  			log.Printf("Warning: tree cache is in a bad state: %v. Rebuilding...", err)
   497  			var err error
   498  			if treeStoreID, err = cfg.Store.RenderTreeStore(img.String(), true); err != nil {
   499  				return fmt.Errorf("error rendering tree image: %v", err)
   500  			}
   501  		}
   502  
   503  		if err := ioutil.WriteFile(common.AppTreeStoreIDPath(cdir, appName), []byte(treeStoreID), defaultRegularFilePerm); err != nil {
   504  			return fmt.Errorf("error writing app treeStoreID: %v", err)
   505  		}
   506  	} else {
   507  		ad := common.AppPath(cdir, appName)
   508  		err := os.MkdirAll(ad, defaultRegularDirPerm)
   509  		if err != nil {
   510  			return fmt.Errorf("error creating image directory: %v", err)
   511  		}
   512  
   513  		shiftedUid, shiftedGid, err := cfg.PrivateUsers.ShiftRange(uint32(os.Getuid()), uint32(os.Getgid()))
   514  		if err != nil {
   515  			return fmt.Errorf("error getting uid, gid: %v", err)
   516  		}
   517  
   518  		if err := os.Chown(ad, int(shiftedUid), int(shiftedGid)); err != nil {
   519  			return fmt.Errorf("error shifting app %q's stage2 dir: %v", appName, err)
   520  		}
   521  
   522  		if err := aci.RenderACIWithImageID(img, ad, cfg.Store, cfg.PrivateUsers); err != nil {
   523  			return fmt.Errorf("error rendering ACI: %v", err)
   524  		}
   525  	}
   526  	if err := writeManifest(cfg.CommonConfig, img, appInfoDir); err != nil {
   527  		return err
   528  	}
   529  	return nil
   530  }
   531  
   532  // setupAppImage mounts the overlay filesystem for the app image that
   533  // corresponds to the given hash. Then, it creates the tmp directory.
   534  // When useOverlay is false it just creates the tmp directory for this app.
   535  func setupAppImage(cfg RunConfig, appName types.ACName, img types.Hash, cdir string, useOverlay bool) error {
   536  	ad := common.AppPath(cdir, appName)
   537  	if useOverlay {
   538  		err := os.MkdirAll(ad, defaultRegularDirPerm)
   539  		if err != nil {
   540  			return fmt.Errorf("error creating image directory: %v", err)
   541  		}
   542  		treeStoreID, err := ioutil.ReadFile(common.AppTreeStoreIDPath(cdir, appName))
   543  		if err != nil {
   544  			return err
   545  		}
   546  		if err := copyAppManifest(cdir, appName, ad); err != nil {
   547  			return err
   548  		}
   549  		if err := overlayRender(cfg, string(treeStoreID), cdir, ad, appName.String()); err != nil {
   550  			return fmt.Errorf("error rendering overlay filesystem: %v", err)
   551  		}
   552  	}
   553  
   554  	return nil
   555  }
   556  
   557  // prepareStage1Image renders and verifies tree cache of the given hash
   558  // when using overlay.
   559  // When useOverlay is false, it attempts to render and expand the stage1.
   560  func prepareStage1Image(cfg PrepareConfig, img types.Hash, cdir string, useOverlay bool) error {
   561  	s1 := common.Stage1ImagePath(cdir)
   562  	if err := os.MkdirAll(s1, defaultRegularDirPerm); err != nil {
   563  		return fmt.Errorf("error creating stage1 directory: %v", err)
   564  	}
   565  
   566  	treeStoreID, err := cfg.Store.RenderTreeStore(img.String(), false)
   567  	if err != nil {
   568  		return fmt.Errorf("error rendering tree image: %v", err)
   569  	}
   570  	if err := cfg.Store.CheckTreeStore(treeStoreID); err != nil {
   571  		log.Printf("Warning: tree cache is in a bad state: %v. Rebuilding...", err)
   572  		var err error
   573  		if treeStoreID, err = cfg.Store.RenderTreeStore(img.String(), true); err != nil {
   574  			return fmt.Errorf("error rendering tree image: %v", err)
   575  		}
   576  	}
   577  
   578  	if err := writeManifest(cfg.CommonConfig, img, s1); err != nil {
   579  		return fmt.Errorf("error writing manifest: %v", err)
   580  	}
   581  
   582  	if !useOverlay {
   583  		destRootfs := filepath.Join(s1, "rootfs")
   584  		cachedTreePath := cfg.Store.GetTreeStoreRootFS(treeStoreID)
   585  		if err := fileutil.CopyTree(cachedTreePath, destRootfs, cfg.PrivateUsers); err != nil {
   586  			return fmt.Errorf("error rendering ACI: %v", err)
   587  		}
   588  	}
   589  
   590  	fn := path.Join(cdir, common.Stage1TreeStoreIDFilename)
   591  	if err := ioutil.WriteFile(fn, []byte(treeStoreID), defaultRegularFilePerm); err != nil {
   592  		return fmt.Errorf("error writing stage1 treeStoreID: %v", err)
   593  	}
   594  	return nil
   595  }
   596  
   597  // setupStage1Image mounts the overlay filesystem for stage1.
   598  // When useOverlay is false it is a noop
   599  func setupStage1Image(cfg RunConfig, cdir string, useOverlay bool) error {
   600  	s1 := common.Stage1ImagePath(cdir)
   601  	if useOverlay {
   602  		treeStoreID, err := ioutil.ReadFile(filepath.Join(cdir, common.Stage1TreeStoreIDFilename))
   603  		if err != nil {
   604  			return err
   605  		}
   606  
   607  		// pass an empty appName: make sure it remains consistent with
   608  		// overlayStatusDirTemplate
   609  		if err := overlayRender(cfg, string(treeStoreID), cdir, s1, ""); err != nil {
   610  			return fmt.Errorf("error rendering overlay filesystem: %v", err)
   611  		}
   612  
   613  		// we will later read the status from the upper layer of the overlay fs
   614  		// force the status directory to be there by touching it
   615  		statusPath := filepath.Join(s1, "rootfs", "rkt", "status")
   616  		if err := os.Chtimes(statusPath, time.Now(), time.Now()); err != nil {
   617  			return fmt.Errorf("error touching status dir: %v", err)
   618  		}
   619  	}
   620  
   621  	return nil
   622  }
   623  
   624  // writeManifest takes an img ID and writes the corresponding manifest in dest
   625  func writeManifest(cfg CommonConfig, img types.Hash, dest string) error {
   626  	mb, err := cfg.Store.GetImageManifestJSON(img.String())
   627  	if err != nil {
   628  		return err
   629  	}
   630  
   631  	debug("Writing image manifest")
   632  	if err := ioutil.WriteFile(filepath.Join(dest, "manifest"), mb, defaultRegularFilePerm); err != nil {
   633  		return fmt.Errorf("error writing image manifest: %v", err)
   634  	}
   635  
   636  	return nil
   637  }
   638  
   639  // copyAppManifest copies to saved image manifest for the given appName and
   640  // writes it in the dest directory.
   641  func copyAppManifest(cdir string, appName types.ACName, dest string) error {
   642  	appInfoDir := common.AppInfoPath(cdir, appName)
   643  	sourceFn := filepath.Join(appInfoDir, "manifest")
   644  	destFn := filepath.Join(dest, "manifest")
   645  	if err := fileutil.CopyRegularFile(sourceFn, destFn); err != nil {
   646  		return fmt.Errorf("error copying image manifest: %v", err)
   647  	}
   648  	return nil
   649  }
   650  
   651  // overlayRender renders the image that corresponds to the given hash using the
   652  // overlay filesystem.
   653  // It mounts an overlay filesystem from the cached tree of the image as rootfs.
   654  func overlayRender(cfg RunConfig, treeStoreID string, cdir string, dest string, appName string) error {
   655  	cachedTreePath := cfg.Store.GetTreeStoreRootFS(treeStoreID)
   656  	fi, err := os.Stat(cachedTreePath)
   657  	if err != nil {
   658  		return err
   659  	}
   660  	imgMode := fi.Mode()
   661  
   662  	destRootfs := path.Join(dest, "rootfs")
   663  	if err := os.MkdirAll(destRootfs, imgMode); err != nil {
   664  		return err
   665  	}
   666  
   667  	overlayDir := path.Join(cdir, "overlay")
   668  	if err := os.MkdirAll(overlayDir, defaultRegularDirPerm); err != nil {
   669  		return err
   670  	}
   671  
   672  	// Since the parent directory (rkt/pods/$STATE/$POD_UUID) has the 'S_ISGID' bit, here
   673  	// we need to explicitly turn the bit off when creating this overlay
   674  	// directory so that it won't inherit the bit. Otherwise the files
   675  	// created by users within the pod will inherit the 'S_ISGID' bit
   676  	// as well.
   677  	if err := os.Chmod(overlayDir, defaultRegularDirPerm); err != nil {
   678  		return err
   679  	}
   680  
   681  	imgDir := path.Join(overlayDir, treeStoreID)
   682  	if err := os.MkdirAll(imgDir, defaultRegularDirPerm); err != nil {
   683  		return err
   684  	}
   685  
   686  	// Also make 'rkt/pods/$STATE/$POD_UUID/overlay/$IMAGE_ID' to be readable by 'rkt' group
   687  	// As 'rkt' status will read the 'rkt/pods/$STATE/$POD_UUID/overlay/$IMAGE_ID/upper/rkt/status/$APP'
   688  	// to get exit status.
   689  	if err := os.Chown(imgDir, -1, cfg.RktGid); err != nil {
   690  		return err
   691  	}
   692  
   693  	upperDir := path.Join(imgDir, "upper", appName)
   694  	if err := os.MkdirAll(upperDir, imgMode); err != nil {
   695  		return err
   696  	}
   697  	if err := label.SetFileLabel(upperDir, cfg.MountLabel); err != nil {
   698  		return err
   699  	}
   700  
   701  	workDir := path.Join(imgDir, "work", appName)
   702  	if err := os.MkdirAll(workDir, defaultRegularDirPerm); err != nil {
   703  		return err
   704  	}
   705  	if err := label.SetFileLabel(workDir, cfg.MountLabel); err != nil {
   706  		return err
   707  	}
   708  
   709  	opts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", cachedTreePath, upperDir, workDir)
   710  	opts = label.FormatMountLabel(opts, cfg.MountLabel)
   711  	if err := syscall.Mount("overlay", destRootfs, "overlay", 0, opts); err != nil {
   712  		return fmt.Errorf("error mounting: %v", err)
   713  	}
   714  
   715  	return nil
   716  }