github.com/AbhinandanKurakure/podman/v3@v3.4.10/libpod/container_internal.go (about)

     1  package libpod
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"context"
     7  	"fmt"
     8  	"io"
     9  	"io/ioutil"
    10  	"os"
    11  	"path/filepath"
    12  	"strconv"
    13  	"strings"
    14  	"time"
    15  
    16  	metadata "github.com/checkpoint-restore/checkpointctl/lib"
    17  	"github.com/containers/buildah/copier"
    18  	butil "github.com/containers/buildah/util"
    19  	"github.com/containers/podman/v3/libpod/define"
    20  	"github.com/containers/podman/v3/libpod/events"
    21  	"github.com/containers/podman/v3/pkg/cgroups"
    22  	"github.com/containers/podman/v3/pkg/ctime"
    23  	"github.com/containers/podman/v3/pkg/hooks"
    24  	"github.com/containers/podman/v3/pkg/hooks/exec"
    25  	"github.com/containers/podman/v3/pkg/rootless"
    26  	"github.com/containers/podman/v3/pkg/selinux"
    27  	"github.com/containers/podman/v3/pkg/util"
    28  	"github.com/containers/storage"
    29  	"github.com/containers/storage/pkg/archive"
    30  	"github.com/containers/storage/pkg/idtools"
    31  	"github.com/containers/storage/pkg/mount"
    32  	"github.com/coreos/go-systemd/v22/daemon"
    33  	securejoin "github.com/cyphar/filepath-securejoin"
    34  	spec "github.com/opencontainers/runtime-spec/specs-go"
    35  	"github.com/opencontainers/runtime-tools/generate"
    36  	"github.com/opencontainers/selinux/go-selinux/label"
    37  	"github.com/pkg/errors"
    38  	"github.com/sirupsen/logrus"
    39  	"golang.org/x/sys/unix"
    40  )
    41  
    42  const (
    43  	// name of the directory holding the artifacts
    44  	artifactsDir      = "artifacts"
    45  	execDirPermission = 0755
    46  	preCheckpointDir  = "pre-checkpoint"
    47  )
    48  
    49  // rootFsSize gets the size of the container's root filesystem
    50  // A container FS is split into two parts.  The first is the top layer, a
    51  // mutable layer, and the rest is the RootFS: the set of immutable layers
    52  // that make up the image on which the container is based.
    53  func (c *Container) rootFsSize() (int64, error) {
    54  	if c.config.Rootfs != "" {
    55  		return 0, nil
    56  	}
    57  	if c.runtime.store == nil {
    58  		return 0, nil
    59  	}
    60  
    61  	container, err := c.runtime.store.Container(c.ID())
    62  	if err != nil {
    63  		return 0, err
    64  	}
    65  
    66  	// Ignore the size of the top layer.   The top layer is a mutable RW layer
    67  	// and is not considered a part of the rootfs
    68  	rwLayer, err := c.runtime.store.Layer(container.LayerID)
    69  	if err != nil {
    70  		return 0, err
    71  	}
    72  	layer, err := c.runtime.store.Layer(rwLayer.Parent)
    73  	if err != nil {
    74  		return 0, err
    75  	}
    76  
    77  	size := int64(0)
    78  	for layer.Parent != "" {
    79  		layerSize, err := c.runtime.store.DiffSize(layer.Parent, layer.ID)
    80  		if err != nil {
    81  			return size, errors.Wrapf(err, "getting diffsize of layer %q and its parent %q", layer.ID, layer.Parent)
    82  		}
    83  		size += layerSize
    84  		layer, err = c.runtime.store.Layer(layer.Parent)
    85  		if err != nil {
    86  			return 0, err
    87  		}
    88  	}
    89  	// Get the size of the last layer.  Has to be outside of the loop
    90  	// because the parent of the last layer is "", and lstore.Get("")
    91  	// will return an error.
    92  	layerSize, err := c.runtime.store.DiffSize(layer.Parent, layer.ID)
    93  	return size + layerSize, err
    94  }
    95  
    96  // rwSize gets the size of the mutable top layer of the container.
    97  func (c *Container) rwSize() (int64, error) {
    98  	if c.config.Rootfs != "" {
    99  		var size int64
   100  		err := filepath.Walk(c.config.Rootfs, func(path string, info os.FileInfo, err error) error {
   101  			if err != nil {
   102  				return err
   103  			}
   104  			size += info.Size()
   105  			return nil
   106  		})
   107  		return size, err
   108  	}
   109  
   110  	container, err := c.runtime.store.Container(c.ID())
   111  	if err != nil {
   112  		return 0, err
   113  	}
   114  
   115  	// The top layer of a container is
   116  	// the only readable/writeable layer, all others are immutable.
   117  	rwLayer, err := c.runtime.store.Layer(container.LayerID)
   118  	if err != nil {
   119  		return 0, err
   120  	}
   121  
   122  	// Get the size of the top layer by calculating the size of the diff
   123  	// between the layer and its parent.
   124  	return c.runtime.store.DiffSize(rwLayer.Parent, rwLayer.ID)
   125  }
   126  
   127  // bundlePath returns the path to the container's root filesystem - where the OCI spec will be
   128  // placed, amongst other things
   129  func (c *Container) bundlePath() string {
   130  	return c.config.StaticDir
   131  }
   132  
   133  // ControlSocketPath returns the path to the containers control socket for things like tty
   134  // resizing
   135  func (c *Container) ControlSocketPath() string {
   136  	return filepath.Join(c.bundlePath(), "ctl")
   137  }
   138  
   139  // CheckpointPath returns the path to the directory containing the checkpoint
   140  func (c *Container) CheckpointPath() string {
   141  	return filepath.Join(c.bundlePath(), metadata.CheckpointDirectory)
   142  }
   143  
   144  // PreCheckpointPath returns the path to the directory containing the pre-checkpoint-images
   145  func (c *Container) PreCheckPointPath() string {
   146  	return filepath.Join(c.bundlePath(), preCheckpointDir)
   147  }
   148  
   149  // AttachSocketPath retrieves the path of the container's attach socket
   150  func (c *Container) AttachSocketPath() (string, error) {
   151  	return c.ociRuntime.AttachSocketPath(c)
   152  }
   153  
   154  // exitFilePath gets the path to the container's exit file
   155  func (c *Container) exitFilePath() (string, error) {
   156  	return c.ociRuntime.ExitFilePath(c)
   157  }
   158  
   159  // Wait for the container's exit file to appear.
   160  // When it does, update our state based on it.
   161  func (c *Container) waitForExitFileAndSync() error {
   162  	exitFile, err := c.exitFilePath()
   163  	if err != nil {
   164  		return err
   165  	}
   166  
   167  	chWait := make(chan error)
   168  	defer close(chWait)
   169  
   170  	_, err = WaitForFile(exitFile, chWait, time.Second*5)
   171  	if err != nil {
   172  		// Exit file did not appear
   173  		// Reset our state
   174  		c.state.ExitCode = -1
   175  		c.state.FinishedTime = time.Now()
   176  		c.state.State = define.ContainerStateStopped
   177  
   178  		if err2 := c.save(); err2 != nil {
   179  			logrus.Errorf("Error saving container %s state: %v", c.ID(), err2)
   180  		}
   181  
   182  		return err
   183  	}
   184  
   185  	if err := c.checkExitFile(); err != nil {
   186  		return err
   187  	}
   188  
   189  	return c.save()
   190  }
   191  
   192  // Handle the container exit file.
   193  // The exit file is used to supply container exit time and exit code.
   194  // This assumes the exit file already exists.
   195  func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
   196  	c.state.FinishedTime = ctime.Created(fi)
   197  	statusCodeStr, err := ioutil.ReadFile(exitFile)
   198  	if err != nil {
   199  		return errors.Wrapf(err, "failed to read exit file for container %s", c.ID())
   200  	}
   201  	statusCode, err := strconv.Atoi(string(statusCodeStr))
   202  	if err != nil {
   203  		return errors.Wrapf(err, "error converting exit status code (%q) for container %s to int",
   204  			c.ID(), statusCodeStr)
   205  	}
   206  	c.state.ExitCode = int32(statusCode)
   207  
   208  	oomFilePath := filepath.Join(c.bundlePath(), "oom")
   209  	if _, err = os.Stat(oomFilePath); err == nil {
   210  		c.state.OOMKilled = true
   211  	}
   212  
   213  	c.state.Exited = true
   214  
   215  	// Write an event for the container's death
   216  	c.newContainerExitedEvent(c.state.ExitCode)
   217  
   218  	return nil
   219  }
   220  
   221  func (c *Container) shouldRestart() bool {
   222  	// If we did not get a restart policy match, return false
   223  	// Do the same if we're not a policy that restarts.
   224  	if !c.state.RestartPolicyMatch ||
   225  		c.config.RestartPolicy == define.RestartPolicyNo ||
   226  		c.config.RestartPolicy == define.RestartPolicyNone {
   227  		return false
   228  	}
   229  
   230  	// If we're RestartPolicyOnFailure, we need to check retries and exit
   231  	// code.
   232  	if c.config.RestartPolicy == define.RestartPolicyOnFailure {
   233  		if c.state.ExitCode == 0 {
   234  			return false
   235  		}
   236  
   237  		// If we don't have a max retries set, continue
   238  		if c.config.RestartRetries > 0 {
   239  			if c.state.RestartCount >= c.config.RestartRetries {
   240  				return false
   241  			}
   242  		}
   243  	}
   244  	return true
   245  }
   246  
   247  // Handle container restart policy.
   248  // This is called when a container has exited, and was not explicitly stopped by
   249  // an API call to stop the container or pod it is in.
   250  func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr error) {
   251  	if !c.shouldRestart() {
   252  		return false, nil
   253  	}
   254  	logrus.Debugf("Restarting container %s due to restart policy %s", c.ID(), c.config.RestartPolicy)
   255  
   256  	// Need to check if dependencies are alive.
   257  	if err := c.checkDependenciesAndHandleError(); err != nil {
   258  		return false, err
   259  	}
   260  
   261  	// Is the container running again?
   262  	// If so, we don't have to do anything
   263  	if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
   264  		return false, nil
   265  	} else if c.state.State == define.ContainerStateUnknown {
   266  		return false, errors.Wrapf(define.ErrInternal, "invalid container state encountered in restart attempt!")
   267  	}
   268  
   269  	c.newContainerEvent(events.Restart)
   270  
   271  	// Increment restart count
   272  	c.state.RestartCount++
   273  	logrus.Debugf("Container %s now on retry %d", c.ID(), c.state.RestartCount)
   274  	if err := c.save(); err != nil {
   275  		return false, err
   276  	}
   277  
   278  	defer func() {
   279  		if retErr != nil {
   280  			if err := c.cleanup(ctx); err != nil {
   281  				logrus.Errorf("error cleaning up container %s: %v", c.ID(), err)
   282  			}
   283  		}
   284  	}()
   285  	if err := c.prepare(); err != nil {
   286  		return false, err
   287  	}
   288  
   289  	// setup slirp4netns again because slirp4netns will die when conmon exits
   290  	if c.config.NetMode.IsSlirp4netns() {
   291  		err := c.runtime.setupSlirp4netns(c)
   292  		if err != nil {
   293  			return false, err
   294  		}
   295  	}
   296  
   297  	// setup rootlesskit port forwarder again since it dies when conmon exits
   298  	// we use rootlesskit port forwarder only as rootless and when bridge network is used
   299  	if rootless.IsRootless() && c.config.NetMode.IsBridge() && len(c.config.PortMappings) > 0 {
   300  		err := c.runtime.setupRootlessPortMappingViaRLK(c, c.state.NetNS.Path())
   301  		if err != nil {
   302  			return false, err
   303  		}
   304  	}
   305  
   306  	if c.state.State == define.ContainerStateStopped {
   307  		// Reinitialize the container if we need to
   308  		if err := c.reinit(ctx, true); err != nil {
   309  			return false, err
   310  		}
   311  	} else if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
   312  		// Initialize the container
   313  		if err := c.init(ctx, true); err != nil {
   314  			return false, err
   315  		}
   316  	}
   317  	if err := c.start(); err != nil {
   318  		return false, err
   319  	}
   320  	return true, nil
   321  }
   322  
   323  // Ensure that the container is in a specific state or state.
   324  // Returns true if the container is in one of the given states,
   325  // or false otherwise.
   326  func (c *Container) ensureState(states ...define.ContainerStatus) bool {
   327  	for _, state := range states {
   328  		if state == c.state.State {
   329  			return true
   330  		}
   331  	}
   332  	return false
   333  }
   334  
   335  // Sync this container with on-disk state and runtime status
   336  // Should only be called with container lock held
   337  // This function should suffice to ensure a container's state is accurate and
   338  // it is valid for use.
   339  func (c *Container) syncContainer() error {
   340  	if err := c.runtime.state.UpdateContainer(c); err != nil {
   341  		return err
   342  	}
   343  	// If runtime knows about the container, update its status in runtime
   344  	// And then save back to disk
   345  	if c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStatePaused) {
   346  		oldState := c.state.State
   347  
   348  		if err := c.checkExitFile(); err != nil {
   349  			return err
   350  		}
   351  
   352  		// Only save back to DB if state changed
   353  		if c.state.State != oldState {
   354  			// Check for a restart policy match
   355  			if c.config.RestartPolicy != define.RestartPolicyNone && c.config.RestartPolicy != define.RestartPolicyNo &&
   356  				(oldState == define.ContainerStateRunning || oldState == define.ContainerStatePaused) &&
   357  				(c.state.State == define.ContainerStateStopped || c.state.State == define.ContainerStateExited) &&
   358  				!c.state.StoppedByUser {
   359  				c.state.RestartPolicyMatch = true
   360  			}
   361  
   362  			if err := c.save(); err != nil {
   363  				return err
   364  			}
   365  		}
   366  	}
   367  
   368  	if !c.valid {
   369  		return errors.Wrapf(define.ErrCtrRemoved, "container %s is not valid", c.ID())
   370  	}
   371  
   372  	return nil
   373  }
   374  
   375  func (c *Container) setupStorageMapping(dest, from *storage.IDMappingOptions) {
   376  	if c.config.Rootfs != "" {
   377  		return
   378  	}
   379  	*dest = *from
   380  	// If we are creating a container inside a pod, we always want to inherit the
   381  	// userns settings from the infra container. So clear the auto userns settings
   382  	// so that we don't request storage for a new uid/gid map.
   383  	if c.PodID() != "" && !c.IsInfra() {
   384  		dest.AutoUserNs = false
   385  	}
   386  	if dest.AutoUserNs {
   387  		overrides := c.getUserOverrides()
   388  		dest.AutoUserNsOpts.PasswdFile = overrides.ContainerEtcPasswdPath
   389  		dest.AutoUserNsOpts.GroupFile = overrides.ContainerEtcGroupPath
   390  		if c.config.User != "" {
   391  			initialSize := uint32(0)
   392  			parts := strings.Split(c.config.User, ":")
   393  			for _, p := range parts {
   394  				s, err := strconv.ParseUint(p, 10, 32)
   395  				if err == nil && uint32(s) > initialSize {
   396  					initialSize = uint32(s)
   397  				}
   398  			}
   399  			dest.AutoUserNsOpts.InitialSize = initialSize + 1
   400  		}
   401  	} else if c.config.Spec.Linux != nil {
   402  		dest.UIDMap = nil
   403  		for _, r := range c.config.Spec.Linux.UIDMappings {
   404  			u := idtools.IDMap{
   405  				ContainerID: int(r.ContainerID),
   406  				HostID:      int(r.HostID),
   407  				Size:        int(r.Size),
   408  			}
   409  			dest.UIDMap = append(dest.UIDMap, u)
   410  		}
   411  		dest.GIDMap = nil
   412  		for _, r := range c.config.Spec.Linux.GIDMappings {
   413  			g := idtools.IDMap{
   414  				ContainerID: int(r.ContainerID),
   415  				HostID:      int(r.HostID),
   416  				Size:        int(r.Size),
   417  			}
   418  			dest.GIDMap = append(dest.GIDMap, g)
   419  		}
   420  		dest.HostUIDMapping = false
   421  		dest.HostGIDMapping = false
   422  	}
   423  }
   424  
   425  // Create container root filesystem for use
   426  func (c *Container) setupStorage(ctx context.Context) error {
   427  	if !c.valid {
   428  		return errors.Wrapf(define.ErrCtrRemoved, "container %s is not valid", c.ID())
   429  	}
   430  
   431  	if c.state.State != define.ContainerStateConfigured {
   432  		return errors.Wrapf(define.ErrCtrStateInvalid, "container %s must be in Configured state to have storage set up", c.ID())
   433  	}
   434  
   435  	// Need both an image ID and image name, plus a bool telling us whether to use the image configuration
   436  	if c.config.Rootfs == "" && (c.config.RootfsImageID == "" || c.config.RootfsImageName == "") {
   437  		return errors.Wrapf(define.ErrInvalidArg, "must provide image ID and image name to use an image")
   438  	}
   439  	options := storage.ContainerOptions{
   440  		IDMappingOptions: storage.IDMappingOptions{
   441  			HostUIDMapping: true,
   442  			HostGIDMapping: true,
   443  		},
   444  		LabelOpts: c.config.LabelOpts,
   445  	}
   446  	if c.restoreFromCheckpoint && !c.config.Privileged {
   447  		// If restoring from a checkpoint, the root file-system
   448  		// needs to be mounted with the same SELinux labels as
   449  		// it was mounted previously.
   450  		if options.Flags == nil {
   451  			options.Flags = make(map[string]interface{})
   452  		}
   453  		options.Flags["ProcessLabel"] = c.config.ProcessLabel
   454  		options.Flags["MountLabel"] = c.config.MountLabel
   455  	}
   456  	if c.config.Privileged {
   457  		privOpt := func(opt string) bool {
   458  			for _, privopt := range []string{"nodev", "nosuid", "noexec"} {
   459  				if opt == privopt {
   460  					return true
   461  				}
   462  			}
   463  			return false
   464  		}
   465  
   466  		defOptions, err := storage.GetMountOptions(c.runtime.store.GraphDriverName(), c.runtime.store.GraphOptions())
   467  		if err != nil {
   468  			return errors.Wrapf(err, "error getting default mount options")
   469  		}
   470  		var newOptions []string
   471  		for _, opt := range defOptions {
   472  			if !privOpt(opt) {
   473  				newOptions = append(newOptions, opt)
   474  			}
   475  		}
   476  		options.MountOpts = newOptions
   477  	}
   478  
   479  	options.Volatile = c.config.Volatile
   480  
   481  	c.setupStorageMapping(&options.IDMappingOptions, &c.config.IDMappings)
   482  
   483  	// Unless the user has specified a name, use a randomly generated one.
   484  	// Note that name conflicts may occur (see #11735), so we need to loop.
   485  	generateName := c.config.Name == ""
   486  	var containerInfo ContainerInfo
   487  	var containerInfoErr error
   488  	for {
   489  		if generateName {
   490  			name, err := c.runtime.generateName()
   491  			if err != nil {
   492  				return err
   493  			}
   494  			c.config.Name = name
   495  		}
   496  		containerInfo, containerInfoErr = c.runtime.storageService.CreateContainerStorage(ctx, c.runtime.imageContext, c.config.RootfsImageName, c.config.RootfsImageID, c.config.Name, c.config.ID, options)
   497  
   498  		if !generateName || errors.Cause(containerInfoErr) != storage.ErrDuplicateName {
   499  			break
   500  		}
   501  	}
   502  	if containerInfoErr != nil {
   503  		return errors.Wrapf(containerInfoErr, "error creating container storage")
   504  	}
   505  
   506  	c.config.IDMappings.UIDMap = containerInfo.UIDMap
   507  	c.config.IDMappings.GIDMap = containerInfo.GIDMap
   508  
   509  	processLabel, err := c.processLabel(containerInfo.ProcessLabel)
   510  	if err != nil {
   511  		return err
   512  	}
   513  	c.config.ProcessLabel = processLabel
   514  	c.config.MountLabel = containerInfo.MountLabel
   515  	c.config.StaticDir = containerInfo.Dir
   516  	c.state.RunDir = containerInfo.RunDir
   517  
   518  	if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 {
   519  		if err := os.Chown(containerInfo.RunDir, c.RootUID(), c.RootGID()); err != nil {
   520  			return err
   521  		}
   522  
   523  		if err := os.Chown(containerInfo.Dir, c.RootUID(), c.RootGID()); err != nil {
   524  			return err
   525  		}
   526  	}
   527  
   528  	// Set the default Entrypoint and Command
   529  	if containerInfo.Config != nil {
   530  		// Set CMD in the container to the default configuration only if ENTRYPOINT is not set by the user.
   531  		if c.config.Entrypoint == nil && c.config.Command == nil {
   532  			c.config.Command = containerInfo.Config.Config.Cmd
   533  		}
   534  		if c.config.Entrypoint == nil {
   535  			c.config.Entrypoint = containerInfo.Config.Config.Entrypoint
   536  		}
   537  	}
   538  
   539  	artifacts := filepath.Join(c.config.StaticDir, artifactsDir)
   540  	if err := os.MkdirAll(artifacts, 0755); err != nil {
   541  		return errors.Wrap(err, "error creating artifacts directory")
   542  	}
   543  
   544  	return nil
   545  }
   546  
   547  func (c *Container) processLabel(processLabel string) (string, error) {
   548  	if !c.config.Systemd && !c.ociRuntime.SupportsKVM() {
   549  		return processLabel, nil
   550  	}
   551  	ctrSpec, err := c.specFromState()
   552  	if err != nil {
   553  		return "", err
   554  	}
   555  	label, ok := ctrSpec.Annotations[define.InspectAnnotationLabel]
   556  	if !ok || !strings.Contains(label, "type:") {
   557  		switch {
   558  		case c.ociRuntime.SupportsKVM():
   559  			return selinux.KVMLabel(processLabel)
   560  		case c.config.Systemd:
   561  			return selinux.InitLabel(processLabel)
   562  		}
   563  	}
   564  	return processLabel, nil
   565  }
   566  
   567  // Tear down a container's storage prior to removal
   568  func (c *Container) teardownStorage() error {
   569  	if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
   570  		return errors.Wrapf(define.ErrCtrStateInvalid, "cannot remove storage for container %s as it is running or paused", c.ID())
   571  	}
   572  
   573  	artifacts := filepath.Join(c.config.StaticDir, artifactsDir)
   574  	if err := os.RemoveAll(artifacts); err != nil {
   575  		return errors.Wrapf(err, "error removing container %s artifacts %q", c.ID(), artifacts)
   576  	}
   577  
   578  	if err := c.cleanupStorage(); err != nil {
   579  		return errors.Wrapf(err, "failed to cleanup container %s storage", c.ID())
   580  	}
   581  
   582  	if err := c.runtime.storageService.DeleteContainer(c.ID()); err != nil {
   583  		// If the container has already been removed, warn but do not
   584  		// error - we wanted it gone, it is already gone.
   585  		// Potentially another tool using containers/storage already
   586  		// removed it?
   587  		if errors.Cause(err) == storage.ErrNotAContainer || errors.Cause(err) == storage.ErrContainerUnknown {
   588  			logrus.Infof("Storage for container %s already removed", c.ID())
   589  			return nil
   590  		}
   591  
   592  		return errors.Wrapf(err, "error removing container %s root filesystem", c.ID())
   593  	}
   594  
   595  	return nil
   596  }
   597  
   598  // Reset resets state fields to default values.
   599  // It is performed before a refresh and clears the state after a reboot.
   600  // It does not save the results - assumes the database will do that for us.
   601  func resetState(state *ContainerState) {
   602  	state.PID = 0
   603  	state.ConmonPID = 0
   604  	state.Mountpoint = ""
   605  	state.Mounted = false
   606  	if state.State != define.ContainerStateExited {
   607  		state.State = define.ContainerStateConfigured
   608  	}
   609  	state.ExecSessions = make(map[string]*ExecSession)
   610  	state.LegacyExecSessions = nil
   611  	state.BindMounts = make(map[string]string)
   612  	state.StoppedByUser = false
   613  	state.RestartPolicyMatch = false
   614  	state.RestartCount = 0
   615  	state.Checkpointed = false
   616  }
   617  
   618  // Refresh refreshes the container's state after a restart.
   619  // Refresh cannot perform any operations that would lock another container.
   620  // We cannot guarantee any other container has a valid lock at the time it is
   621  // running.
   622  func (c *Container) refresh() error {
   623  	// Don't need a full sync, but we do need to update from the database to
   624  	// pick up potentially-missing container state
   625  	if err := c.runtime.state.UpdateContainer(c); err != nil {
   626  		return err
   627  	}
   628  
   629  	if !c.valid {
   630  		return errors.Wrapf(define.ErrCtrRemoved, "container %s is not valid - may have been removed", c.ID())
   631  	}
   632  
   633  	// We need to get the container's temporary directory from c/storage
   634  	// It was lost in the reboot and must be recreated
   635  	dir, err := c.runtime.storageService.GetRunDir(c.ID())
   636  	if err != nil {
   637  		return errors.Wrapf(err, "error retrieving temporary directory for container %s", c.ID())
   638  	}
   639  	c.state.RunDir = dir
   640  
   641  	if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 {
   642  		info, err := os.Stat(c.runtime.config.Engine.TmpDir)
   643  		if err != nil {
   644  			return err
   645  		}
   646  		if err := os.Chmod(c.runtime.config.Engine.TmpDir, info.Mode()|0111); err != nil {
   647  			return err
   648  		}
   649  		root := filepath.Join(c.runtime.config.Engine.TmpDir, "containers-root", c.ID())
   650  		if err := os.MkdirAll(root, 0755); err != nil {
   651  			return errors.Wrapf(err, "error creating userNS tmpdir for container %s", c.ID())
   652  		}
   653  		if err := os.Chown(root, c.RootUID(), c.RootGID()); err != nil {
   654  			return err
   655  		}
   656  	}
   657  
   658  	// We need to pick up a new lock
   659  	lock, err := c.runtime.lockManager.AllocateAndRetrieveLock(c.config.LockID)
   660  	if err != nil {
   661  		return errors.Wrapf(err, "error acquiring lock %d for container %s", c.config.LockID, c.ID())
   662  	}
   663  	c.lock = lock
   664  
   665  	// Try to delete any lingering IP allocations.
   666  	// If this fails, just log and ignore.
   667  	// I'm a little concerned that this is so far down in refresh() and we
   668  	// could fail before getting to it - but the worst that would happen is
   669  	// that Inspect() would return info on IPs we no longer own.
   670  	if len(c.state.NetworkStatus) > 0 {
   671  		if err := c.removeIPv4Allocations(); err != nil {
   672  			logrus.Errorf("Error removing IP allocations for container %s: %v", c.ID(), err)
   673  		}
   674  	}
   675  	c.state.NetworkStatus = nil
   676  
   677  	if err := c.save(); err != nil {
   678  		return errors.Wrapf(err, "error refreshing state for container %s", c.ID())
   679  	}
   680  
   681  	// Remove ctl and attach files, which may persist across reboot
   682  	if err := c.removeConmonFiles(); err != nil {
   683  		return err
   684  	}
   685  
   686  	return nil
   687  }
   688  
   689  // Try and remove IP address allocations. Presently IPv4 only.
   690  // Should be safe as rootless because NetworkStatus should only be populated if
   691  // CNI is running.
   692  func (c *Container) removeIPv4Allocations() error {
   693  	cniNetworksDir, err := getCNINetworksDir()
   694  	if err != nil {
   695  		return err
   696  	}
   697  
   698  	if len(c.state.NetworkStatus) == 0 {
   699  		return nil
   700  	}
   701  
   702  	cniDefaultNetwork := ""
   703  	if c.runtime.netPlugin != nil {
   704  		cniDefaultNetwork = c.runtime.netPlugin.GetDefaultNetworkName()
   705  	}
   706  
   707  	networks, _, err := c.networks()
   708  	if err != nil {
   709  		return err
   710  	}
   711  
   712  	if len(networks) != len(c.state.NetworkStatus) {
   713  		return errors.Wrapf(define.ErrInternal, "network mismatch: asked to join %d CNI networks but got %d CNI results", len(networks), len(c.state.NetworkStatus))
   714  	}
   715  
   716  	for index, result := range c.state.NetworkStatus {
   717  		for _, ctrIP := range result.IPs {
   718  			if ctrIP.Version != "4" {
   719  				continue
   720  			}
   721  			candidate := ""
   722  			if len(networks) > 0 {
   723  				// CNI returns networks in order we passed them.
   724  				// So our index into results should be our index
   725  				// into networks.
   726  				candidate = filepath.Join(cniNetworksDir, networks[index], ctrIP.Address.IP.String())
   727  			} else {
   728  				candidate = filepath.Join(cniNetworksDir, cniDefaultNetwork, ctrIP.Address.IP.String())
   729  			}
   730  			logrus.Debugf("Going to try removing IP address reservation file %q for container %s", candidate, c.ID())
   731  			if err := os.Remove(candidate); err != nil && !os.IsNotExist(err) {
   732  				return errors.Wrapf(err, "error removing CNI IP reservation file %q for container %s", candidate, c.ID())
   733  			}
   734  		}
   735  	}
   736  
   737  	return nil
   738  }
   739  
   740  // Remove conmon attach socket and terminal resize FIFO
   741  // This is necessary for restarting containers
   742  func (c *Container) removeConmonFiles() error {
   743  	// Files are allowed to not exist, so ignore ENOENT
   744  	attachFile, err := c.AttachSocketPath()
   745  	if err != nil {
   746  		return errors.Wrapf(err, "failed to get attach socket path for container %s", c.ID())
   747  	}
   748  
   749  	if err := os.Remove(attachFile); err != nil && !os.IsNotExist(err) {
   750  		return errors.Wrapf(err, "error removing container %s attach file", c.ID())
   751  	}
   752  
   753  	ctlFile := filepath.Join(c.bundlePath(), "ctl")
   754  	if err := os.Remove(ctlFile); err != nil && !os.IsNotExist(err) {
   755  		return errors.Wrapf(err, "error removing container %s ctl file", c.ID())
   756  	}
   757  
   758  	winszFile := filepath.Join(c.bundlePath(), "winsz")
   759  	if err := os.Remove(winszFile); err != nil && !os.IsNotExist(err) {
   760  		return errors.Wrapf(err, "error removing container %s winsz file", c.ID())
   761  	}
   762  
   763  	oomFile := filepath.Join(c.bundlePath(), "oom")
   764  	if err := os.Remove(oomFile); err != nil && !os.IsNotExist(err) {
   765  		return errors.Wrapf(err, "error removing container %s OOM file", c.ID())
   766  	}
   767  
   768  	// Remove the exit file so we don't leak memory in tmpfs
   769  	exitFile, err := c.exitFilePath()
   770  	if err != nil {
   771  		return err
   772  	}
   773  	if err := os.Remove(exitFile); err != nil && !os.IsNotExist(err) {
   774  		return errors.Wrapf(err, "error removing container %s exit file", c.ID())
   775  	}
   776  
   777  	return nil
   778  }
   779  
   780  func (c *Container) export(path string) error {
   781  	mountPoint := c.state.Mountpoint
   782  	if !c.state.Mounted {
   783  		containerMount, err := c.runtime.store.Mount(c.ID(), c.config.MountLabel)
   784  		if err != nil {
   785  			return errors.Wrapf(err, "error mounting container %q", c.ID())
   786  		}
   787  		mountPoint = containerMount
   788  		defer func() {
   789  			if _, err := c.runtime.store.Unmount(c.ID(), false); err != nil {
   790  				logrus.Errorf("error unmounting container %q: %v", c.ID(), err)
   791  			}
   792  		}()
   793  	}
   794  
   795  	input, err := archive.Tar(mountPoint, archive.Uncompressed)
   796  	if err != nil {
   797  		return errors.Wrapf(err, "error reading container directory %q", c.ID())
   798  	}
   799  
   800  	outFile, err := os.Create(path)
   801  	if err != nil {
   802  		return errors.Wrapf(err, "error creating file %q", path)
   803  	}
   804  	defer outFile.Close()
   805  
   806  	_, err = io.Copy(outFile, input)
   807  	return err
   808  }
   809  
   810  // Get path of artifact with a given name for this container
   811  func (c *Container) getArtifactPath(name string) string {
   812  	return filepath.Join(c.config.StaticDir, artifactsDir, name)
   813  }
   814  
   815  // Used with Wait() to determine if a container has exited
   816  func (c *Container) isStopped() (bool, int32, error) {
   817  	if !c.batched {
   818  		c.lock.Lock()
   819  		defer c.lock.Unlock()
   820  	}
   821  	err := c.syncContainer()
   822  	if err != nil {
   823  		return true, -1, err
   824  	}
   825  
   826  	return !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping), c.state.ExitCode, nil
   827  }
   828  
   829  // save container state to the database
   830  func (c *Container) save() error {
   831  	if err := c.runtime.state.SaveContainer(c); err != nil {
   832  		return errors.Wrapf(err, "error saving container %s state", c.ID())
   833  	}
   834  	return nil
   835  }
   836  
   837  // Checks the container is in the right state, then initializes the container in preparation to start the container.
   838  // If recursive is true, each of the containers dependencies will be started.
   839  // Otherwise, this function will return with error if there are dependencies of this container that aren't running.
   840  func (c *Container) prepareToStart(ctx context.Context, recursive bool) (retErr error) {
   841  	// Container must be created or stopped to be started
   842  	if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateExited) {
   843  		return errors.Wrapf(define.ErrCtrStateInvalid, "container %s must be in Created or Stopped state to be started", c.ID())
   844  	}
   845  
   846  	if !recursive {
   847  		if err := c.checkDependenciesAndHandleError(); err != nil {
   848  			return err
   849  		}
   850  	} else {
   851  		if err := c.startDependencies(ctx); err != nil {
   852  			return err
   853  		}
   854  	}
   855  
   856  	defer func() {
   857  		if retErr != nil {
   858  			if err := c.cleanup(ctx); err != nil {
   859  				logrus.Errorf("error cleaning up container %s: %v", c.ID(), err)
   860  			}
   861  		}
   862  	}()
   863  
   864  	if err := c.prepare(); err != nil {
   865  		return err
   866  	}
   867  
   868  	if c.state.State == define.ContainerStateStopped {
   869  		// Reinitialize the container if we need to
   870  		if err := c.reinit(ctx, false); err != nil {
   871  			return err
   872  		}
   873  	} else if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
   874  		// Or initialize it if necessary
   875  		if err := c.init(ctx, false); err != nil {
   876  			return err
   877  		}
   878  	}
   879  	return nil
   880  }
   881  
   882  // checks dependencies are running and prints a helpful message
   883  func (c *Container) checkDependenciesAndHandleError() error {
   884  	notRunning, err := c.checkDependenciesRunning()
   885  	if err != nil {
   886  		return errors.Wrapf(err, "error checking dependencies for container %s", c.ID())
   887  	}
   888  	if len(notRunning) > 0 {
   889  		depString := strings.Join(notRunning, ",")
   890  		return errors.Wrapf(define.ErrCtrStateInvalid, "some dependencies of container %s are not started: %s", c.ID(), depString)
   891  	}
   892  
   893  	return nil
   894  }
   895  
   896  // Recursively start all dependencies of a container so the container can be started.
   897  func (c *Container) startDependencies(ctx context.Context) error {
   898  	depCtrIDs := c.Dependencies()
   899  	if len(depCtrIDs) == 0 {
   900  		return nil
   901  	}
   902  
   903  	depVisitedCtrs := make(map[string]*Container)
   904  	if err := c.getAllDependencies(depVisitedCtrs); err != nil {
   905  		return errors.Wrapf(err, "error starting dependency for container %s", c.ID())
   906  	}
   907  
   908  	// Because of how Go handles passing slices through functions, a slice cannot grow between function calls
   909  	// without clunky syntax. Circumnavigate this by translating the map to a slice for buildContainerGraph
   910  	depCtrs := make([]*Container, 0)
   911  	for _, ctr := range depVisitedCtrs {
   912  		depCtrs = append(depCtrs, ctr)
   913  	}
   914  
   915  	// Build a dependency graph of containers
   916  	graph, err := BuildContainerGraph(depCtrs)
   917  	if err != nil {
   918  		return errors.Wrapf(err, "error generating dependency graph for container %s", c.ID())
   919  	}
   920  
   921  	// If there are no containers without dependencies, we can't start
   922  	// Error out
   923  	if len(graph.noDepNodes) == 0 {
   924  		// we have no dependencies that need starting, go ahead and return
   925  		if len(graph.nodes) == 0 {
   926  			return nil
   927  		}
   928  		return errors.Wrapf(define.ErrNoSuchCtr, "All dependencies have dependencies of %s", c.ID())
   929  	}
   930  
   931  	ctrErrors := make(map[string]error)
   932  	ctrsVisited := make(map[string]bool)
   933  
   934  	// Traverse the graph beginning at nodes with no dependencies
   935  	for _, node := range graph.noDepNodes {
   936  		startNode(ctx, node, false, ctrErrors, ctrsVisited, true)
   937  	}
   938  
   939  	if len(ctrErrors) > 0 {
   940  		logrus.Errorf("error starting some container dependencies")
   941  		for _, e := range ctrErrors {
   942  			logrus.Errorf("%q", e)
   943  		}
   944  		return errors.Wrapf(define.ErrInternal, "error starting some containers")
   945  	}
   946  	return nil
   947  }
   948  
   949  // getAllDependencies is a precursor to starting dependencies.
   950  // To start a container with all of its dependencies, we need to recursively find all dependencies
   951  // a container has, as well as each of those containers' dependencies, and so on
   952  // To do so, keep track of containers already visited (so there aren't redundant state lookups),
   953  // and recursively search until we have reached the leafs of every dependency node.
   954  // Since we need to start all dependencies for our original container to successfully start, we propagate any errors
   955  // in looking up dependencies.
   956  // Note: this function is currently meant as a robust solution to a narrow problem: start an infra-container when
   957  // a container in the pod is run. It has not been tested for performance past one level, so expansion of recursive start
   958  // must be tested first.
   959  func (c *Container) getAllDependencies(visited map[string]*Container) error {
   960  	depIDs := c.Dependencies()
   961  	if len(depIDs) == 0 {
   962  		return nil
   963  	}
   964  	for _, depID := range depIDs {
   965  		if _, ok := visited[depID]; !ok {
   966  			dep, err := c.runtime.state.Container(depID)
   967  			if err != nil {
   968  				return err
   969  			}
   970  			status, err := dep.State()
   971  			if err != nil {
   972  				return err
   973  			}
   974  			// if the dependency is already running, we can assume its dependencies are also running
   975  			// so no need to add them to those we need to start
   976  			if status != define.ContainerStateRunning {
   977  				visited[depID] = dep
   978  				if err := dep.getAllDependencies(visited); err != nil {
   979  					return err
   980  				}
   981  			}
   982  		}
   983  	}
   984  	return nil
   985  }
   986  
   987  // Check if a container's dependencies are running
   988  // Returns a []string containing the IDs of dependencies that are not running
   989  func (c *Container) checkDependenciesRunning() ([]string, error) {
   990  	deps := c.Dependencies()
   991  	notRunning := []string{}
   992  
   993  	// We were not passed a set of dependency containers
   994  	// Make it ourselves
   995  	depCtrs := make(map[string]*Container, len(deps))
   996  	for _, dep := range deps {
   997  		// Get the dependency container
   998  		depCtr, err := c.runtime.state.Container(dep)
   999  		if err != nil {
  1000  			return nil, errors.Wrapf(err, "error retrieving dependency %s of container %s from state", dep, c.ID())
  1001  		}
  1002  
  1003  		// Check the status
  1004  		state, err := depCtr.State()
  1005  		if err != nil {
  1006  			return nil, errors.Wrapf(err, "error retrieving state of dependency %s of container %s", dep, c.ID())
  1007  		}
  1008  		if state != define.ContainerStateRunning && !depCtr.config.IsInfra {
  1009  			notRunning = append(notRunning, dep)
  1010  		}
  1011  		depCtrs[dep] = depCtr
  1012  	}
  1013  
  1014  	return notRunning, nil
  1015  }
  1016  
  1017  func (c *Container) completeNetworkSetup() error {
  1018  	var outResolvConf []string
  1019  	netDisabled, err := c.NetworkDisabled()
  1020  	if err != nil {
  1021  		return err
  1022  	}
  1023  	if !c.config.PostConfigureNetNS || netDisabled {
  1024  		return nil
  1025  	}
  1026  	if err := c.syncContainer(); err != nil {
  1027  		return err
  1028  	}
  1029  	if c.config.NetMode.IsSlirp4netns() {
  1030  		return c.runtime.setupSlirp4netns(c)
  1031  	}
  1032  	if err := c.runtime.setupNetNS(c); err != nil {
  1033  		return err
  1034  	}
  1035  	state := c.state
  1036  	// collect any dns servers that cni tells us to use (dnsname)
  1037  	for _, cni := range state.NetworkStatus {
  1038  		if cni.DNS.Nameservers != nil {
  1039  			for _, server := range cni.DNS.Nameservers {
  1040  				outResolvConf = append(outResolvConf, fmt.Sprintf("nameserver %s", server))
  1041  			}
  1042  		}
  1043  	}
  1044  	// check if we have a bindmount for /etc/hosts
  1045  	if hostsBindMount, ok := state.BindMounts["/etc/hosts"]; ok && len(c.cniHosts()) > 0 {
  1046  		ctrHostPath := filepath.Join(c.state.RunDir, "hosts")
  1047  		if hostsBindMount == ctrHostPath {
  1048  			// read the existing hosts
  1049  			b, err := ioutil.ReadFile(hostsBindMount)
  1050  			if err != nil {
  1051  				return err
  1052  			}
  1053  			if err := ioutil.WriteFile(hostsBindMount, append(b, []byte(c.cniHosts())...), 0644); err != nil {
  1054  				return err
  1055  			}
  1056  		}
  1057  	}
  1058  
  1059  	// check if we have a bindmount for resolv.conf
  1060  	resolvBindMount := state.BindMounts["/etc/resolv.conf"]
  1061  	if len(outResolvConf) < 1 || resolvBindMount == "" || len(c.config.NetNsCtr) > 0 {
  1062  		return nil
  1063  	}
  1064  	// read the existing resolv.conf
  1065  	b, err := ioutil.ReadFile(resolvBindMount)
  1066  	if err != nil {
  1067  		return err
  1068  	}
  1069  	for _, line := range strings.Split(string(b), "\n") {
  1070  		// only keep things that don't start with nameserver from the old
  1071  		// resolv.conf file
  1072  		if !strings.HasPrefix(line, "nameserver") {
  1073  			outResolvConf = append([]string{line}, outResolvConf...)
  1074  		}
  1075  	}
  1076  	// write and return
  1077  	return ioutil.WriteFile(resolvBindMount, []byte(strings.Join(outResolvConf, "\n")), 0644)
  1078  }
  1079  
  1080  func (c *Container) cniHosts() string {
  1081  	var hosts string
  1082  	if len(c.state.NetworkStatus) > 0 && len(c.state.NetworkStatus[0].IPs) > 0 {
  1083  		ipAddress := strings.Split(c.state.NetworkStatus[0].IPs[0].Address.String(), "/")[0]
  1084  		hosts += fmt.Sprintf("%s\t%s %s\n", ipAddress, c.Hostname(), c.config.Name)
  1085  	}
  1086  	return hosts
  1087  }
  1088  
  1089  // Initialize a container, creating it in the runtime
  1090  func (c *Container) init(ctx context.Context, retainRetries bool) error {
  1091  	// Unconditionally remove conmon temporary files.
  1092  	// We've been running into far too many issues where they block startup.
  1093  	if err := c.removeConmonFiles(); err != nil {
  1094  		return err
  1095  	}
  1096  
  1097  	// Generate the OCI newSpec
  1098  	newSpec, err := c.generateSpec(ctx)
  1099  	if err != nil {
  1100  		return err
  1101  	}
  1102  
  1103  	// Make sure the workdir exists while initializing container
  1104  	if err := c.resolveWorkDir(); err != nil {
  1105  		return err
  1106  	}
  1107  
  1108  	// Save the OCI newSpec to disk
  1109  	if err := c.saveSpec(newSpec); err != nil {
  1110  		return err
  1111  	}
  1112  
  1113  	for _, v := range c.config.NamedVolumes {
  1114  		if err := c.fixVolumePermissions(v); err != nil {
  1115  			return err
  1116  		}
  1117  	}
  1118  
  1119  	// With the spec complete, do an OCI create
  1120  	if err := c.ociRuntime.CreateContainer(c, nil); err != nil {
  1121  		// Fedora 31 is carrying a patch to display improved error
  1122  		// messages to better handle the V2 transition. This is NOT
  1123  		// upstream in any OCI runtime.
  1124  		// TODO: Remove once runc supports cgroupsv2
  1125  		if strings.Contains(err.Error(), "this version of runc doesn't work on cgroups v2") {
  1126  			logrus.Errorf("oci runtime %q does not support CGroups V2: use system migrate to mitigate", c.ociRuntime.Name())
  1127  		}
  1128  		return err
  1129  	}
  1130  
  1131  	logrus.Debugf("Created container %s in OCI runtime", c.ID())
  1132  
  1133  	// Remove any exec sessions leftover from a potential prior run.
  1134  	if len(c.state.ExecSessions) > 0 {
  1135  		if err := c.runtime.state.RemoveContainerExecSessions(c); err != nil {
  1136  			logrus.Errorf("Error removing container %s exec sessions from DB: %v", c.ID(), err)
  1137  		}
  1138  		c.state.ExecSessions = make(map[string]*ExecSession)
  1139  	}
  1140  
  1141  	c.state.Checkpointed = false
  1142  	c.state.ExitCode = 0
  1143  	c.state.Exited = false
  1144  	c.state.State = define.ContainerStateCreated
  1145  	c.state.StoppedByUser = false
  1146  	c.state.RestartPolicyMatch = false
  1147  
  1148  	if !retainRetries {
  1149  		c.state.RestartCount = 0
  1150  	}
  1151  
  1152  	if err := c.save(); err != nil {
  1153  		return err
  1154  	}
  1155  	if c.config.HealthCheckConfig != nil {
  1156  		if err := c.createTimer(); err != nil {
  1157  			logrus.Error(err)
  1158  		}
  1159  	}
  1160  
  1161  	defer c.newContainerEvent(events.Init)
  1162  	return c.completeNetworkSetup()
  1163  }
  1164  
  1165  // Clean up a container in the OCI runtime.
  1166  // Deletes the container in the runtime, and resets its state to Exited.
  1167  // The container can be restarted cleanly after this.
  1168  func (c *Container) cleanupRuntime(ctx context.Context) error {
  1169  	// If the container is not ContainerStateStopped or
  1170  	// ContainerStateCreated, do nothing.
  1171  	if !c.ensureState(define.ContainerStateStopped, define.ContainerStateCreated) {
  1172  		return nil
  1173  	}
  1174  
  1175  	// If necessary, delete attach and ctl files
  1176  	if err := c.removeConmonFiles(); err != nil {
  1177  		return err
  1178  	}
  1179  
  1180  	if err := c.delete(ctx); err != nil {
  1181  		return err
  1182  	}
  1183  
  1184  	// If we were Stopped, we are now Exited, as we've removed ourself
  1185  	// from the runtime.
  1186  	// If we were Created, we are now Configured.
  1187  	if c.state.State == define.ContainerStateStopped {
  1188  		c.state.State = define.ContainerStateExited
  1189  	} else if c.state.State == define.ContainerStateCreated {
  1190  		c.state.State = define.ContainerStateConfigured
  1191  	}
  1192  
  1193  	if c.valid {
  1194  		if err := c.save(); err != nil {
  1195  			return err
  1196  		}
  1197  	}
  1198  
  1199  	logrus.Debugf("Successfully cleaned up container %s", c.ID())
  1200  
  1201  	return nil
  1202  }
  1203  
  1204  // Reinitialize a container.
  1205  // Deletes and recreates a container in the runtime.
  1206  // Should only be done on ContainerStateStopped containers.
  1207  // Not necessary for ContainerStateExited - the container has already been
  1208  // removed from the runtime, so init() can proceed freely.
  1209  func (c *Container) reinit(ctx context.Context, retainRetries bool) error {
  1210  	logrus.Debugf("Recreating container %s in OCI runtime", c.ID())
  1211  
  1212  	if err := c.cleanupRuntime(ctx); err != nil {
  1213  		return err
  1214  	}
  1215  
  1216  	// Initialize the container again
  1217  	return c.init(ctx, retainRetries)
  1218  }
  1219  
  1220  // Initialize (if necessary) and start a container
  1221  // Performs all necessary steps to start a container that is not running
  1222  // Does not lock or check validity
  1223  func (c *Container) initAndStart(ctx context.Context) (retErr error) {
  1224  	// If we are ContainerStateUnknown, throw an error
  1225  	if c.state.State == define.ContainerStateUnknown {
  1226  		return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is in an unknown state", c.ID())
  1227  	} else if c.state.State == define.ContainerStateRemoving {
  1228  		return errors.Wrapf(define.ErrCtrStateInvalid, "cannot start container %s as it is being removed", c.ID())
  1229  	}
  1230  
  1231  	// If we are running, do nothing
  1232  	if c.state.State == define.ContainerStateRunning {
  1233  		return nil
  1234  	}
  1235  	// If we are paused, throw an error
  1236  	if c.state.State == define.ContainerStatePaused {
  1237  		return errors.Wrapf(define.ErrCtrStateInvalid, "cannot start paused container %s", c.ID())
  1238  	}
  1239  
  1240  	defer func() {
  1241  		if retErr != nil {
  1242  			if err := c.cleanup(ctx); err != nil {
  1243  				logrus.Errorf("error cleaning up container %s: %v", c.ID(), err)
  1244  			}
  1245  		}
  1246  	}()
  1247  
  1248  	if err := c.prepare(); err != nil {
  1249  		return err
  1250  	}
  1251  
  1252  	// If we are ContainerStateStopped we need to remove from runtime
  1253  	// And reset to ContainerStateConfigured
  1254  	if c.state.State == define.ContainerStateStopped {
  1255  		logrus.Debugf("Recreating container %s in OCI runtime", c.ID())
  1256  
  1257  		if err := c.reinit(ctx, false); err != nil {
  1258  			return err
  1259  		}
  1260  	} else if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
  1261  		if err := c.init(ctx, false); err != nil {
  1262  			return err
  1263  		}
  1264  	}
  1265  
  1266  	// Now start the container
  1267  	return c.start()
  1268  }
  1269  
  1270  // Internal, non-locking function to start a container
  1271  func (c *Container) start() error {
  1272  	if c.config.Spec.Process != nil {
  1273  		logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args)
  1274  	}
  1275  
  1276  	if err := c.ociRuntime.StartContainer(c); err != nil {
  1277  		return err
  1278  	}
  1279  	logrus.Debugf("Started container %s", c.ID())
  1280  
  1281  	c.state.State = define.ContainerStateRunning
  1282  
  1283  	if c.config.SdNotifyMode != define.SdNotifyModeIgnore {
  1284  		payload := fmt.Sprintf("MAINPID=%d", c.state.ConmonPID)
  1285  		if c.config.SdNotifyMode == define.SdNotifyModeConmon {
  1286  			payload += "\n"
  1287  			payload += daemon.SdNotifyReady
  1288  		}
  1289  		if sent, err := daemon.SdNotify(false, payload); err != nil {
  1290  			logrus.Errorf("Error notifying systemd of Conmon PID: %s", err.Error())
  1291  		} else if sent {
  1292  			logrus.Debugf("Notify sent successfully")
  1293  		}
  1294  	}
  1295  
  1296  	if c.config.HealthCheckConfig != nil {
  1297  		if err := c.updateHealthStatus(define.HealthCheckStarting); err != nil {
  1298  			logrus.Error(err)
  1299  		}
  1300  		if err := c.startTimer(); err != nil {
  1301  			logrus.Error(err)
  1302  		}
  1303  	}
  1304  
  1305  	defer c.newContainerEvent(events.Start)
  1306  
  1307  	return c.save()
  1308  }
  1309  
  1310  // Internal, non-locking function to stop container
  1311  func (c *Container) stop(timeout uint) error {
  1312  	logrus.Debugf("Stopping ctr %s (timeout %d)", c.ID(), timeout)
  1313  
  1314  	// If the container is running in a PID Namespace, then killing the
  1315  	// primary pid is enough to kill the container.  If it is not running in
  1316  	// a pid namespace then the OCI Runtime needs to kill ALL processes in
  1317  	// the containers cgroup in order to make sure the container is stopped.
  1318  	all := !c.hasNamespace(spec.PIDNamespace)
  1319  	// We can't use --all if CGroups aren't present.
  1320  	// Rootless containers with CGroups v1 and NoCgroups are both cases
  1321  	// where this can happen.
  1322  	if all {
  1323  		if c.config.NoCgroups {
  1324  			all = false
  1325  		} else if rootless.IsRootless() {
  1326  			// Only do this check if we need to
  1327  			unified, err := cgroups.IsCgroup2UnifiedMode()
  1328  			if err != nil {
  1329  				return err
  1330  			}
  1331  			if !unified {
  1332  				all = false
  1333  			}
  1334  		}
  1335  	}
  1336  
  1337  	// Check if conmon is still alive.
  1338  	// If it is not, we won't be getting an exit file.
  1339  	conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
  1340  	if err != nil {
  1341  		return err
  1342  	}
  1343  
  1344  	// Set the container state to "stopping" and unlock the container
  1345  	// before handing it over to conmon to unblock other commands.  #8501
  1346  	// demonstrates nicely that a high stop timeout will block even simple
  1347  	// commands such as `podman ps` from progressing if the container lock
  1348  	// is held when busy-waiting for the container to be stopped.
  1349  	c.state.State = define.ContainerStateStopping
  1350  	if err := c.save(); err != nil {
  1351  		return errors.Wrapf(err, "error saving container %s state before stopping", c.ID())
  1352  	}
  1353  	if !c.batched {
  1354  		c.lock.Unlock()
  1355  	}
  1356  
  1357  	stopErr := c.ociRuntime.StopContainer(c, timeout, all)
  1358  
  1359  	if !c.batched {
  1360  		c.lock.Lock()
  1361  		if err := c.syncContainer(); err != nil {
  1362  			switch errors.Cause(err) {
  1363  			// If the container has already been removed (e.g., via
  1364  			// the cleanup process), there's nothing left to do.
  1365  			case define.ErrNoSuchCtr, define.ErrCtrRemoved:
  1366  				return stopErr
  1367  			default:
  1368  				if stopErr != nil {
  1369  					logrus.Errorf("Error syncing container %s status: %v", c.ID(), err)
  1370  					return stopErr
  1371  				}
  1372  				return err
  1373  			}
  1374  		}
  1375  	}
  1376  
  1377  	// We have to check stopErr *after* we lock again - otherwise, we have a
  1378  	// change of panicking on a double-unlock. Ref: GH Issue 9615
  1379  	if stopErr != nil {
  1380  		return stopErr
  1381  	}
  1382  
  1383  	// Since we're now subject to a race condition with other processes who
  1384  	// may have altered the state (and other data), let's check if the
  1385  	// state has changed.  If so, we should return immediately and log a
  1386  	// warning.
  1387  	if c.state.State != define.ContainerStateStopping {
  1388  		logrus.Warnf(
  1389  			"Container %q state changed from %q to %q while waiting for it to be stopped: discontinuing stop procedure as another process interfered",
  1390  			c.ID(), define.ContainerStateStopping, c.state.State,
  1391  		)
  1392  		return nil
  1393  	}
  1394  
  1395  	c.newContainerEvent(events.Stop)
  1396  
  1397  	c.state.PID = 0
  1398  	c.state.ConmonPID = 0
  1399  	c.state.StoppedByUser = true
  1400  
  1401  	if !conmonAlive {
  1402  		// Conmon is dead, so we can't expect an exit code.
  1403  		c.state.ExitCode = -1
  1404  		c.state.FinishedTime = time.Now()
  1405  		c.state.State = define.ContainerStateStopped
  1406  		if err := c.save(); err != nil {
  1407  			logrus.Errorf("Error saving container %s status: %v", c.ID(), err)
  1408  		}
  1409  
  1410  		return errors.Wrapf(define.ErrConmonDead, "container %s conmon process missing, cannot retrieve exit code", c.ID())
  1411  	}
  1412  
  1413  	if err := c.save(); err != nil {
  1414  		return errors.Wrapf(err, "error saving container %s state after stopping", c.ID())
  1415  	}
  1416  
  1417  	// Wait until we have an exit file, and sync once we do
  1418  	if err := c.waitForExitFileAndSync(); err != nil {
  1419  		return err
  1420  	}
  1421  
  1422  	return nil
  1423  }
  1424  
  1425  // Internal, non-locking function to pause a container
  1426  func (c *Container) pause() error {
  1427  	if c.config.NoCgroups {
  1428  		return errors.Wrapf(define.ErrNoCgroups, "cannot pause without using CGroups")
  1429  	}
  1430  
  1431  	if rootless.IsRootless() {
  1432  		cgroupv2, err := cgroups.IsCgroup2UnifiedMode()
  1433  		if err != nil {
  1434  			return errors.Wrap(err, "failed to determine cgroupversion")
  1435  		}
  1436  		if !cgroupv2 {
  1437  			return errors.Wrap(define.ErrNoCgroups, "can not pause containers on rootless containers with cgroup V1")
  1438  		}
  1439  	}
  1440  
  1441  	if err := c.ociRuntime.PauseContainer(c); err != nil {
  1442  		// TODO when using docker-py there is some sort of race/incompatibility here
  1443  		return err
  1444  	}
  1445  
  1446  	logrus.Debugf("Paused container %s", c.ID())
  1447  
  1448  	c.state.State = define.ContainerStatePaused
  1449  
  1450  	return c.save()
  1451  }
  1452  
  1453  // Internal, non-locking function to unpause a container
  1454  func (c *Container) unpause() error {
  1455  	if c.config.NoCgroups {
  1456  		return errors.Wrapf(define.ErrNoCgroups, "cannot unpause without using CGroups")
  1457  	}
  1458  
  1459  	if err := c.ociRuntime.UnpauseContainer(c); err != nil {
  1460  		// TODO when using docker-py there is some sort of race/incompatibility here
  1461  		return err
  1462  	}
  1463  
  1464  	logrus.Debugf("Unpaused container %s", c.ID())
  1465  
  1466  	c.state.State = define.ContainerStateRunning
  1467  
  1468  	return c.save()
  1469  }
  1470  
  1471  // Internal, non-locking function to restart a container
  1472  func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retErr error) {
  1473  	if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStateExited) {
  1474  		return errors.Wrapf(define.ErrCtrStateInvalid, "unable to restart a container in a paused or unknown state")
  1475  	}
  1476  
  1477  	c.newContainerEvent(events.Restart)
  1478  
  1479  	if c.state.State == define.ContainerStateRunning {
  1480  		conmonPID := c.state.ConmonPID
  1481  		if err := c.stop(timeout); err != nil {
  1482  			return err
  1483  		}
  1484  		// Old versions of conmon have a bug where they create the exit file before
  1485  		// closing open file descriptors causing a race condition when restarting
  1486  		// containers with open ports since we cannot bind the ports as they're not
  1487  		// yet closed by conmon.
  1488  		//
  1489  		// Killing the old conmon PID is ~okay since it forces the FDs of old conmons
  1490  		// to be closed, while it's a NOP for newer versions which should have
  1491  		// exited already.
  1492  		if conmonPID != 0 {
  1493  			// Ignore errors from FindProcess() as conmon could already have exited.
  1494  			p, err := os.FindProcess(conmonPID)
  1495  			if p != nil && err == nil {
  1496  				if err = p.Kill(); err != nil {
  1497  					logrus.Debugf("error killing conmon process: %v", err)
  1498  				}
  1499  			}
  1500  		}
  1501  		// Ensure we tear down the container network so it will be
  1502  		// recreated - otherwise, behavior of restart differs from stop
  1503  		// and start
  1504  		if err := c.cleanupNetwork(); err != nil {
  1505  			return err
  1506  		}
  1507  	}
  1508  	defer func() {
  1509  		if retErr != nil {
  1510  			if err := c.cleanup(ctx); err != nil {
  1511  				logrus.Errorf("error cleaning up container %s: %v", c.ID(), err)
  1512  			}
  1513  		}
  1514  	}()
  1515  	if err := c.prepare(); err != nil {
  1516  		return err
  1517  	}
  1518  
  1519  	if c.state.State == define.ContainerStateStopped {
  1520  		// Reinitialize the container if we need to
  1521  		if err := c.reinit(ctx, false); err != nil {
  1522  			return err
  1523  		}
  1524  	} else if c.state.State == define.ContainerStateConfigured ||
  1525  		c.state.State == define.ContainerStateExited {
  1526  		// Initialize the container
  1527  		if err := c.init(ctx, false); err != nil {
  1528  			return err
  1529  		}
  1530  	}
  1531  	return c.start()
  1532  }
  1533  
  1534  // mountStorage sets up the container's root filesystem
  1535  // It mounts the image and any other requested mounts
  1536  // TODO: Add ability to override mount label so we can use this for Mount() too
  1537  // TODO: Can we use this for export? Copying SHM into the export might not be
  1538  // good
  1539  func (c *Container) mountStorage() (_ string, deferredErr error) {
  1540  	var err error
  1541  	// Container already mounted, nothing to do
  1542  	if c.state.Mounted {
  1543  		return c.state.Mountpoint, nil
  1544  	}
  1545  
  1546  	mounted, err := mount.Mounted(c.config.ShmDir)
  1547  	if err != nil {
  1548  		return "", errors.Wrapf(err, "unable to determine if %q is mounted", c.config.ShmDir)
  1549  	}
  1550  
  1551  	if !mounted && !MountExists(c.config.Spec.Mounts, "/dev/shm") {
  1552  		shmOptions := fmt.Sprintf("mode=1777,size=%d", c.config.ShmSize)
  1553  		if err := c.mountSHM(shmOptions); err != nil {
  1554  			return "", err
  1555  		}
  1556  		if err := os.Chown(c.config.ShmDir, c.RootUID(), c.RootGID()); err != nil {
  1557  			return "", errors.Wrapf(err, "failed to chown %s", c.config.ShmDir)
  1558  		}
  1559  		defer func() {
  1560  			if deferredErr != nil {
  1561  				if err := c.unmountSHM(c.config.ShmDir); err != nil {
  1562  					logrus.Errorf("Error unmounting SHM for container %s after mount error: %v", c.ID(), err)
  1563  				}
  1564  			}
  1565  		}()
  1566  	}
  1567  
  1568  	// We need to mount the container before volumes - to ensure the copyup
  1569  	// works properly.
  1570  	mountPoint := c.config.Rootfs
  1571  	if mountPoint == "" {
  1572  		mountPoint, err = c.mount()
  1573  		if err != nil {
  1574  			return "", err
  1575  		}
  1576  		defer func() {
  1577  			if deferredErr != nil {
  1578  				if err := c.unmount(false); err != nil {
  1579  					logrus.Errorf("Error unmounting container %s after mount error: %v", c.ID(), err)
  1580  				}
  1581  			}
  1582  		}()
  1583  	}
  1584  
  1585  	rootUID, rootGID := c.RootUID(), c.RootGID()
  1586  
  1587  	dirfd, err := unix.Open(mountPoint, unix.O_RDONLY|unix.O_PATH, 0)
  1588  	if err != nil {
  1589  		return "", errors.Wrap(err, "open mount point")
  1590  	}
  1591  	defer unix.Close(dirfd)
  1592  
  1593  	err = unix.Mkdirat(dirfd, "etc", 0755)
  1594  	if err != nil && !os.IsExist(err) {
  1595  		return "", errors.Wrap(err, "create /etc")
  1596  	}
  1597  	// If the etc directory was created, chown it to root in the container
  1598  	if err == nil && (rootUID != 0 || rootGID != 0) {
  1599  		err = unix.Fchownat(dirfd, "etc", rootUID, rootGID, unix.AT_SYMLINK_NOFOLLOW)
  1600  		if err != nil {
  1601  			return "", errors.Wrap(err, "chown /etc")
  1602  		}
  1603  	}
  1604  
  1605  	etcInTheContainerPath, err := securejoin.SecureJoin(mountPoint, "etc")
  1606  	if err != nil {
  1607  		return "", errors.Wrap(err, "resolve /etc in the container")
  1608  	}
  1609  
  1610  	etcInTheContainerFd, err := unix.Open(etcInTheContainerPath, unix.O_RDONLY|unix.O_PATH, 0)
  1611  	if err != nil {
  1612  		return "", errors.Wrap(err, "open /etc in the container")
  1613  	}
  1614  	defer unix.Close(etcInTheContainerFd)
  1615  
  1616  	// If /etc/mtab does not exist in container image, then we need to
  1617  	// create it, so that mount command within the container will work.
  1618  	err = unix.Symlinkat("/proc/mounts", etcInTheContainerFd, "mtab")
  1619  	if err != nil && !os.IsExist(err) {
  1620  		return "", errors.Wrap(err, "creating /etc/mtab symlink")
  1621  	}
  1622  	// If the symlink was created, then also chown it to root in the container
  1623  	if err == nil && (rootUID != 0 || rootGID != 0) {
  1624  		err = unix.Fchownat(etcInTheContainerFd, "mtab", rootUID, rootGID, unix.AT_SYMLINK_NOFOLLOW)
  1625  		if err != nil {
  1626  			return "", errors.Wrap(err, "chown /etc/mtab")
  1627  		}
  1628  	}
  1629  
  1630  	// Request a mount of all named volumes
  1631  	for _, v := range c.config.NamedVolumes {
  1632  		vol, err := c.mountNamedVolume(v, mountPoint)
  1633  		if err != nil {
  1634  			return "", err
  1635  		}
  1636  		defer func() {
  1637  			if deferredErr == nil {
  1638  				return
  1639  			}
  1640  			vol.lock.Lock()
  1641  			if err := vol.unmount(false); err != nil {
  1642  				logrus.Errorf("Error unmounting volume %s after error mounting container %s: %v", vol.Name(), c.ID(), err)
  1643  			}
  1644  			vol.lock.Unlock()
  1645  		}()
  1646  	}
  1647  
  1648  	return mountPoint, nil
  1649  }
  1650  
  1651  // Mount a single named volume into the container.
  1652  // If necessary, copy up image contents into the volume.
  1653  // Does not verify that the name volume given is actually present in container
  1654  // config.
  1655  // Returns the volume that was mounted.
  1656  func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string) (*Volume, error) {
  1657  	logrus.Debugf("Going to mount named volume %s", v.Name)
  1658  	vol, err := c.runtime.state.Volume(v.Name)
  1659  	if err != nil {
  1660  		return nil, errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID())
  1661  	}
  1662  
  1663  	if vol.config.LockID == c.config.LockID {
  1664  		return nil, errors.Wrapf(define.ErrWillDeadlock, "container %s and volume %s share lock ID %d", c.ID(), vol.Name(), c.config.LockID)
  1665  	}
  1666  	vol.lock.Lock()
  1667  	defer vol.lock.Unlock()
  1668  	if vol.needsMount() {
  1669  		if err := vol.mount(); err != nil {
  1670  			return nil, errors.Wrapf(err, "error mounting volume %s for container %s", vol.Name(), c.ID())
  1671  		}
  1672  	}
  1673  	// The volume may need a copy-up. Check the state.
  1674  	if err := vol.update(); err != nil {
  1675  		return nil, err
  1676  	}
  1677  	if vol.state.NeedsCopyUp {
  1678  		logrus.Debugf("Copying up contents from container %s to volume %s", c.ID(), vol.Name())
  1679  
  1680  		// Set NeedsCopyUp to false immediately, so we don't try this
  1681  		// again when there are already files copied.
  1682  		vol.state.NeedsCopyUp = false
  1683  		if err := vol.save(); err != nil {
  1684  			return nil, err
  1685  		}
  1686  
  1687  		// If the volume is not empty, we should not copy up.
  1688  		volMount := vol.mountPoint()
  1689  		contents, err := ioutil.ReadDir(volMount)
  1690  		if err != nil {
  1691  			return nil, errors.Wrapf(err, "error listing contents of volume %s mountpoint when copying up from container %s", vol.Name(), c.ID())
  1692  		}
  1693  		if len(contents) > 0 {
  1694  			// The volume is not empty. It was likely modified
  1695  			// outside of Podman. For safety, let's not copy up into
  1696  			// it. Fixes CVE-2020-1726.
  1697  			return vol, nil
  1698  		}
  1699  
  1700  		srcDir, err := securejoin.SecureJoin(mountpoint, v.Dest)
  1701  		if err != nil {
  1702  			return nil, errors.Wrapf(err, "error calculating destination path to copy up container %s volume %s", c.ID(), vol.Name())
  1703  		}
  1704  		// Do a manual stat on the source directory to verify existence.
  1705  		// Skip the rest if it exists.
  1706  		// TODO: Should this be stat or lstat? I'm using lstat because I
  1707  		// think copy-up doesn't happen when the source is a link.
  1708  		srcStat, err := os.Lstat(srcDir)
  1709  		if err != nil {
  1710  			if os.IsNotExist(err) {
  1711  				// Source does not exist, don't bother copying
  1712  				// up.
  1713  				return vol, nil
  1714  			}
  1715  			return nil, errors.Wrapf(err, "error identifying source directory for copy up into volume %s", vol.Name())
  1716  		}
  1717  		// If it's not a directory we're mounting over it.
  1718  		if !srcStat.IsDir() {
  1719  			return vol, nil
  1720  		}
  1721  		// Read contents, do not bother continuing if it's empty. Fixes
  1722  		// a bizarre issue where something copier.Get will ENOENT on
  1723  		// empty directories and sometimes it will not.
  1724  		// RHBZ#1928643
  1725  		srcContents, err := ioutil.ReadDir(srcDir)
  1726  		if err != nil {
  1727  			return nil, errors.Wrapf(err, "error reading contents of source directory for copy up into volume %s", vol.Name())
  1728  		}
  1729  		if len(srcContents) == 0 {
  1730  			return vol, nil
  1731  		}
  1732  
  1733  		// Buildah Copier accepts a reader, so we'll need a pipe.
  1734  		reader, writer := io.Pipe()
  1735  		defer reader.Close()
  1736  
  1737  		errChan := make(chan error, 1)
  1738  
  1739  		logrus.Infof("About to copy up into volume %s", vol.Name())
  1740  
  1741  		// Copy, container side: get a tar archive of what needs to be
  1742  		// streamed into the volume.
  1743  		go func() {
  1744  			defer writer.Close()
  1745  			getOptions := copier.GetOptions{
  1746  				KeepDirectoryNames: false,
  1747  			}
  1748  			errChan <- copier.Get(srcDir, "", getOptions, []string{"/."}, writer)
  1749  		}()
  1750  
  1751  		// Copy, volume side: stream what we've written to the pipe, into
  1752  		// the volume.
  1753  		copyOpts := copier.PutOptions{}
  1754  		if err := copier.Put(volMount, "", copyOpts, reader); err != nil {
  1755  			err2 := <-errChan
  1756  			if err2 != nil {
  1757  				logrus.Errorf("Error streaming contents of container %s directory for volume copy-up: %v", c.ID(), err2)
  1758  			}
  1759  			return nil, errors.Wrapf(err, "error copying up to volume %s", vol.Name())
  1760  		}
  1761  
  1762  		if err := <-errChan; err != nil {
  1763  			return nil, errors.Wrapf(err, "error streaming container content for copy up into volume %s", vol.Name())
  1764  		}
  1765  	}
  1766  	return vol, nil
  1767  }
  1768  
  1769  // cleanupStorage unmounts and cleans up the container's root filesystem
  1770  func (c *Container) cleanupStorage() error {
  1771  	if !c.state.Mounted {
  1772  		// Already unmounted, do nothing
  1773  		logrus.Debugf("Container %s storage is already unmounted, skipping...", c.ID())
  1774  		return nil
  1775  	}
  1776  
  1777  	var cleanupErr error
  1778  
  1779  	for _, containerMount := range c.config.Mounts {
  1780  		if err := c.unmountSHM(containerMount); err != nil {
  1781  			if cleanupErr != nil {
  1782  				logrus.Errorf("Error unmounting container %s: %v", c.ID(), cleanupErr)
  1783  			}
  1784  			cleanupErr = err
  1785  		}
  1786  	}
  1787  
  1788  	if err := c.cleanupOverlayMounts(); err != nil {
  1789  		// If the container can't remove content report the error
  1790  		logrus.Errorf("Failed to cleanup overlay mounts for %s: %v", c.ID(), err)
  1791  		cleanupErr = err
  1792  	}
  1793  
  1794  	if c.config.Rootfs != "" {
  1795  		return cleanupErr
  1796  	}
  1797  
  1798  	if err := c.unmount(false); err != nil {
  1799  		// If the container has already been removed, warn but don't
  1800  		// error
  1801  		// We still want to be able to kick the container out of the
  1802  		// state
  1803  		if errors.Cause(err) == storage.ErrNotAContainer || errors.Cause(err) == storage.ErrContainerUnknown || errors.Cause(err) == storage.ErrLayerNotMounted {
  1804  			logrus.Errorf("Storage for container %s has been removed", c.ID())
  1805  		} else {
  1806  			if cleanupErr != nil {
  1807  				logrus.Errorf("Error cleaning up container %s storage: %v", c.ID(), cleanupErr)
  1808  			}
  1809  			cleanupErr = err
  1810  		}
  1811  	}
  1812  
  1813  	// Request an unmount of all named volumes
  1814  	for _, v := range c.config.NamedVolumes {
  1815  		vol, err := c.runtime.state.Volume(v.Name)
  1816  		if err != nil {
  1817  			if cleanupErr != nil {
  1818  				logrus.Errorf("Error unmounting container %s: %v", c.ID(), cleanupErr)
  1819  			}
  1820  			cleanupErr = errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID())
  1821  
  1822  			// We need to try and unmount every volume, so continue
  1823  			// if they fail.
  1824  			continue
  1825  		}
  1826  
  1827  		if vol.needsMount() {
  1828  			vol.lock.Lock()
  1829  			if err := vol.unmount(false); err != nil {
  1830  				if cleanupErr != nil {
  1831  					logrus.Errorf("Error unmounting container %s: %v", c.ID(), cleanupErr)
  1832  				}
  1833  				cleanupErr = errors.Wrapf(err, "error unmounting volume %s for container %s", vol.Name(), c.ID())
  1834  			}
  1835  			vol.lock.Unlock()
  1836  		}
  1837  	}
  1838  
  1839  	c.state.Mountpoint = ""
  1840  	c.state.Mounted = false
  1841  
  1842  	if c.valid {
  1843  		if err := c.save(); err != nil {
  1844  			if cleanupErr != nil {
  1845  				logrus.Errorf("Error unmounting container %s: %v", c.ID(), cleanupErr)
  1846  			}
  1847  			cleanupErr = err
  1848  		}
  1849  	}
  1850  	return cleanupErr
  1851  }
  1852  
  1853  // Unmount the a container and free its resources
  1854  func (c *Container) cleanup(ctx context.Context) error {
  1855  	var lastError error
  1856  
  1857  	logrus.Debugf("Cleaning up container %s", c.ID())
  1858  
  1859  	// Remove healthcheck unit/timer file if it execs
  1860  	if c.config.HealthCheckConfig != nil {
  1861  		if err := c.removeTimer(); err != nil {
  1862  			logrus.Errorf("Error removing timer for container %s healthcheck: %v", c.ID(), err)
  1863  		}
  1864  	}
  1865  
  1866  	// Clean up network namespace, if present
  1867  	if err := c.cleanupNetwork(); err != nil {
  1868  		lastError = errors.Wrapf(err, "error removing container %s network", c.ID())
  1869  	}
  1870  
  1871  	// Remove the container from the runtime, if necessary.
  1872  	// Do this *before* unmounting storage - some runtimes (e.g. Kata)
  1873  	// apparently object to having storage removed while the container still
  1874  	// exists.
  1875  	if err := c.cleanupRuntime(ctx); err != nil {
  1876  		if lastError != nil {
  1877  			logrus.Errorf("Error removing container %s from OCI runtime: %v", c.ID(), err)
  1878  		} else {
  1879  			lastError = err
  1880  		}
  1881  	}
  1882  
  1883  	// Unmount storage
  1884  	if err := c.cleanupStorage(); err != nil {
  1885  		if lastError != nil {
  1886  			logrus.Errorf("Error unmounting container %s storage: %v", c.ID(), err)
  1887  		} else {
  1888  			lastError = errors.Wrapf(err, "error unmounting container %s storage", c.ID())
  1889  		}
  1890  	}
  1891  
  1892  	// Unmount image volumes
  1893  	for _, v := range c.config.ImageVolumes {
  1894  		img, _, err := c.runtime.LibimageRuntime().LookupImage(v.Source, nil)
  1895  		if err != nil {
  1896  			if lastError == nil {
  1897  				lastError = err
  1898  				continue
  1899  			}
  1900  			logrus.Errorf("error unmounting image volume %q:%q :%v", v.Source, v.Dest, err)
  1901  		}
  1902  		if err := img.Unmount(false); err != nil {
  1903  			if lastError == nil {
  1904  				lastError = err
  1905  				continue
  1906  			}
  1907  			logrus.Errorf("error unmounting image volume %q:%q :%v", v.Source, v.Dest, err)
  1908  		}
  1909  	}
  1910  
  1911  	return lastError
  1912  }
  1913  
  1914  // delete deletes the container and runs any configured poststop
  1915  // hooks.
  1916  func (c *Container) delete(ctx context.Context) error {
  1917  	if err := c.ociRuntime.DeleteContainer(c); err != nil {
  1918  		return errors.Wrapf(err, "error removing container %s from runtime", c.ID())
  1919  	}
  1920  
  1921  	if err := c.postDeleteHooks(ctx); err != nil {
  1922  		return errors.Wrapf(err, "container %s poststop hooks", c.ID())
  1923  	}
  1924  
  1925  	return nil
  1926  }
  1927  
  1928  // postDeleteHooks runs the poststop hooks (if any) as specified by
  1929  // the OCI Runtime Specification (which requires them to run
  1930  // post-delete, despite the stage name).
  1931  func (c *Container) postDeleteHooks(ctx context.Context) error {
  1932  	if c.state.ExtensionStageHooks != nil {
  1933  		extensionHooks, ok := c.state.ExtensionStageHooks["poststop"]
  1934  		if ok {
  1935  			state, err := json.Marshal(spec.State{
  1936  				Version:     spec.Version,
  1937  				ID:          c.ID(),
  1938  				Status:      "stopped",
  1939  				Bundle:      c.bundlePath(),
  1940  				Annotations: c.config.Spec.Annotations,
  1941  			})
  1942  			if err != nil {
  1943  				return err
  1944  			}
  1945  			for i, hook := range extensionHooks {
  1946  				hook := hook
  1947  				logrus.Debugf("container %s: invoke poststop hook %d, path %s", c.ID(), i, hook.Path)
  1948  				var stderr, stdout bytes.Buffer
  1949  				hookErr, err := exec.Run(ctx, &hook, state, &stdout, &stderr, exec.DefaultPostKillTimeout)
  1950  				if err != nil {
  1951  					logrus.Warnf("container %s: poststop hook %d: %v", c.ID(), i, err)
  1952  					if hookErr != err {
  1953  						logrus.Debugf("container %s: poststop hook %d (hook error): %v", c.ID(), i, hookErr)
  1954  					}
  1955  					stdoutString := stdout.String()
  1956  					if stdoutString != "" {
  1957  						logrus.Debugf("container %s: poststop hook %d: stdout:\n%s", c.ID(), i, stdoutString)
  1958  					}
  1959  					stderrString := stderr.String()
  1960  					if stderrString != "" {
  1961  						logrus.Debugf("container %s: poststop hook %d: stderr:\n%s", c.ID(), i, stderrString)
  1962  					}
  1963  				}
  1964  			}
  1965  		}
  1966  	}
  1967  
  1968  	return nil
  1969  }
  1970  
  1971  // writeStringToRundir writes the given string to a file with the given name in
  1972  // the container's temporary files directory. The file will be chown'd to the
  1973  // container's root user and have an appropriate SELinux label set.
  1974  // If a file with the same name already exists, it will be deleted and recreated
  1975  // with the new contents.
  1976  // Returns the full path to the new file.
  1977  func (c *Container) writeStringToRundir(destFile, contents string) (string, error) {
  1978  	destFileName := filepath.Join(c.state.RunDir, destFile)
  1979  
  1980  	if err := os.Remove(destFileName); err != nil && !os.IsNotExist(err) {
  1981  		return "", errors.Wrapf(err, "error removing %s for container %s", destFile, c.ID())
  1982  	}
  1983  
  1984  	if err := writeStringToPath(destFileName, contents, c.config.MountLabel, c.RootUID(), c.RootGID()); err != nil {
  1985  		return "", err
  1986  	}
  1987  
  1988  	return destFileName, nil
  1989  }
  1990  
  1991  // writeStringToStaticDir writes the given string to a file with the given name
  1992  // in the container's permanent files directory. The file will be chown'd to the
  1993  // container's root user and have an appropriate SELinux label set.
  1994  // Unlike writeStringToRundir, will *not* delete and re-create if the file
  1995  // already exists (will instead error).
  1996  // Returns the full path to the new file.
  1997  func (c *Container) writeStringToStaticDir(filename, contents string) (string, error) {
  1998  	destFileName := filepath.Join(c.config.StaticDir, filename)
  1999  
  2000  	if err := writeStringToPath(destFileName, contents, c.config.MountLabel, c.RootUID(), c.RootGID()); err != nil {
  2001  		return "", err
  2002  	}
  2003  
  2004  	return destFileName, nil
  2005  }
  2006  
  2007  // appendStringToRunDir appends the provided string to the runtimedir file
  2008  func (c *Container) appendStringToRunDir(destFile, output string) (string, error) {
  2009  	destFileName := filepath.Join(c.state.RunDir, destFile)
  2010  
  2011  	f, err := os.OpenFile(destFileName, os.O_APPEND|os.O_RDWR, 0600)
  2012  	if err != nil {
  2013  		return "", err
  2014  	}
  2015  	defer f.Close()
  2016  
  2017  	compareStr := strings.TrimRight(output, "\n")
  2018  	scanner := bufio.NewScanner(f)
  2019  	scanner.Split(bufio.ScanLines)
  2020  
  2021  	for scanner.Scan() {
  2022  		if strings.Compare(scanner.Text(), compareStr) == 0 {
  2023  			return filepath.Join(c.state.RunDir, destFile), nil
  2024  		}
  2025  	}
  2026  
  2027  	if _, err := f.WriteString(output); err != nil {
  2028  		return "", errors.Wrapf(err, "unable to write %s", destFileName)
  2029  	}
  2030  
  2031  	return filepath.Join(c.state.RunDir, destFile), nil
  2032  }
  2033  
  2034  // saveSpec saves the OCI spec to disk, replacing any existing specs for the container
  2035  func (c *Container) saveSpec(spec *spec.Spec) error {
  2036  	// If the OCI spec already exists, we need to replace it
  2037  	// Cannot guarantee some things, e.g. network namespaces, have the same
  2038  	// paths
  2039  	jsonPath := filepath.Join(c.bundlePath(), "config.json")
  2040  	if _, err := os.Stat(jsonPath); err != nil {
  2041  		if !os.IsNotExist(err) {
  2042  			return errors.Wrapf(err, "error doing stat on container %s spec", c.ID())
  2043  		}
  2044  		// The spec does not exist, we're fine
  2045  	} else {
  2046  		// The spec exists, need to remove it
  2047  		if err := os.Remove(jsonPath); err != nil {
  2048  			return errors.Wrapf(err, "error replacing runtime spec for container %s", c.ID())
  2049  		}
  2050  	}
  2051  
  2052  	fileJSON, err := json.Marshal(spec)
  2053  	if err != nil {
  2054  		return errors.Wrapf(err, "error exporting runtime spec for container %s to JSON", c.ID())
  2055  	}
  2056  	if err := ioutil.WriteFile(jsonPath, fileJSON, 0644); err != nil {
  2057  		return errors.Wrapf(err, "error writing runtime spec JSON for container %s to disk", c.ID())
  2058  	}
  2059  
  2060  	logrus.Debugf("Created OCI spec for container %s at %s", c.ID(), jsonPath)
  2061  
  2062  	c.state.ConfigPath = jsonPath
  2063  
  2064  	return nil
  2065  }
  2066  
  2067  // Warning: precreate hooks may alter 'config' in place.
  2068  func (c *Container) setupOCIHooks(ctx context.Context, config *spec.Spec) (map[string][]spec.Hook, error) {
  2069  	allHooks := make(map[string][]spec.Hook)
  2070  	if c.runtime.config.Engine.HooksDir == nil {
  2071  		if rootless.IsRootless() {
  2072  			return nil, nil
  2073  		}
  2074  		for _, hDir := range []string{hooks.DefaultDir, hooks.OverrideDir} {
  2075  			manager, err := hooks.New(ctx, []string{hDir}, []string{"precreate", "poststop"})
  2076  			if err != nil {
  2077  				if os.IsNotExist(err) {
  2078  					continue
  2079  				}
  2080  				return nil, err
  2081  			}
  2082  			ociHooks, err := manager.Hooks(config, c.Spec().Annotations, len(c.config.UserVolumes) > 0)
  2083  			if err != nil {
  2084  				return nil, err
  2085  			}
  2086  			if len(ociHooks) > 0 || config.Hooks != nil {
  2087  				logrus.Warnf("implicit hook directories are deprecated; set --ociHooks-dir=%q explicitly to continue to load ociHooks from this directory", hDir)
  2088  			}
  2089  			for i, hook := range ociHooks {
  2090  				allHooks[i] = hook
  2091  			}
  2092  		}
  2093  	} else {
  2094  		manager, err := hooks.New(ctx, c.runtime.config.Engine.HooksDir, []string{"precreate", "poststop"})
  2095  		if err != nil {
  2096  			return nil, err
  2097  		}
  2098  
  2099  		allHooks, err = manager.Hooks(config, c.Spec().Annotations, len(c.config.UserVolumes) > 0)
  2100  		if err != nil {
  2101  			return nil, err
  2102  		}
  2103  	}
  2104  
  2105  	hookErr, err := exec.RuntimeConfigFilter(ctx, allHooks["precreate"], config, exec.DefaultPostKillTimeout)
  2106  	if err != nil {
  2107  		logrus.Warnf("container %s: precreate hook: %v", c.ID(), err)
  2108  		if hookErr != nil && hookErr != err {
  2109  			logrus.Debugf("container %s: precreate hook (hook error): %v", c.ID(), hookErr)
  2110  		}
  2111  		return nil, err
  2112  	}
  2113  
  2114  	return allHooks, nil
  2115  }
  2116  
  2117  // mount mounts the container's root filesystem
  2118  func (c *Container) mount() (string, error) {
  2119  	if c.state.State == define.ContainerStateRemoving {
  2120  		return "", errors.Wrapf(define.ErrCtrStateInvalid, "cannot mount container %s as it is being removed", c.ID())
  2121  	}
  2122  
  2123  	mountPoint, err := c.runtime.storageService.MountContainerImage(c.ID())
  2124  	if err != nil {
  2125  		return "", errors.Wrapf(err, "error mounting storage for container %s", c.ID())
  2126  	}
  2127  	mountPoint, err = filepath.EvalSymlinks(mountPoint)
  2128  	if err != nil {
  2129  		return "", errors.Wrapf(err, "error resolving storage path for container %s", c.ID())
  2130  	}
  2131  	if err := os.Chown(mountPoint, c.RootUID(), c.RootGID()); err != nil {
  2132  		return "", errors.Wrapf(err, "cannot chown %s to %d:%d", mountPoint, c.RootUID(), c.RootGID())
  2133  	}
  2134  	return mountPoint, nil
  2135  }
  2136  
  2137  // unmount unmounts the container's root filesystem
  2138  func (c *Container) unmount(force bool) error {
  2139  	// Also unmount storage
  2140  	if _, err := c.runtime.storageService.UnmountContainerImage(c.ID(), force); err != nil {
  2141  		return errors.Wrapf(err, "error unmounting container %s root filesystem", c.ID())
  2142  	}
  2143  
  2144  	return nil
  2145  }
  2146  
  2147  // checkReadyForRemoval checks whether the given container is ready to be
  2148  // removed.
  2149  // These checks are only used if force-remove is not specified.
  2150  // If it is, we'll remove the container anyways.
  2151  // Returns nil if safe to remove, or an error describing why it's unsafe if not.
  2152  func (c *Container) checkReadyForRemoval() error {
  2153  	if c.state.State == define.ContainerStateUnknown {
  2154  		return errors.Wrapf(define.ErrCtrStateInvalid, "container %s is in invalid state", c.ID())
  2155  	}
  2156  
  2157  	if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) && !c.IsInfra() {
  2158  		return errors.Wrapf(define.ErrCtrStateInvalid, "cannot remove container %s as it is %s - running or paused containers cannot be removed without force", c.ID(), c.state.State.String())
  2159  	}
  2160  
  2161  	// Check exec sessions
  2162  	sessions, err := c.getActiveExecSessions()
  2163  	if err != nil {
  2164  		return err
  2165  	}
  2166  	if len(sessions) != 0 {
  2167  		return errors.Wrapf(define.ErrCtrStateInvalid, "cannot remove container %s as it has active exec sessions", c.ID())
  2168  	}
  2169  
  2170  	return nil
  2171  }
  2172  
  2173  // canWithPrevious return the stat of the preCheckPoint dir
  2174  func (c *Container) canWithPrevious() error {
  2175  	_, err := os.Stat(c.PreCheckPointPath())
  2176  	return err
  2177  }
  2178  
  2179  // prepareCheckpointExport writes the config and spec to
  2180  // JSON files for later export
  2181  func (c *Container) prepareCheckpointExport() error {
  2182  	// save live config
  2183  	if _, err := metadata.WriteJSONFile(c.config, c.bundlePath(), metadata.ConfigDumpFile); err != nil {
  2184  		return err
  2185  	}
  2186  
  2187  	// save spec
  2188  	jsonPath := filepath.Join(c.bundlePath(), "config.json")
  2189  	g, err := generate.NewFromFile(jsonPath)
  2190  	if err != nil {
  2191  		logrus.Debugf("generating spec for container %q failed with %v", c.ID(), err)
  2192  		return err
  2193  	}
  2194  	if _, err := metadata.WriteJSONFile(g.Config, c.bundlePath(), metadata.SpecDumpFile); err != nil {
  2195  		return err
  2196  	}
  2197  
  2198  	return nil
  2199  }
  2200  
  2201  // sortUserVolumes sorts the volumes specified for a container
  2202  // between named and normal volumes
  2203  func (c *Container) sortUserVolumes(ctrSpec *spec.Spec) ([]*ContainerNamedVolume, []spec.Mount) {
  2204  	namedUserVolumes := []*ContainerNamedVolume{}
  2205  	userMounts := []spec.Mount{}
  2206  
  2207  	// We need to parse all named volumes and mounts into maps, so we don't
  2208  	// end up with repeated lookups for each user volume.
  2209  	// Map destination to struct, as destination is what is stored in
  2210  	// UserVolumes.
  2211  	namedVolumes := make(map[string]*ContainerNamedVolume)
  2212  	mounts := make(map[string]spec.Mount)
  2213  	for _, namedVol := range c.config.NamedVolumes {
  2214  		namedVolumes[namedVol.Dest] = namedVol
  2215  	}
  2216  	for _, mount := range ctrSpec.Mounts {
  2217  		mounts[mount.Destination] = mount
  2218  	}
  2219  
  2220  	for _, vol := range c.config.UserVolumes {
  2221  		if volume, ok := namedVolumes[vol]; ok {
  2222  			namedUserVolumes = append(namedUserVolumes, volume)
  2223  		} else if mount, ok := mounts[vol]; ok {
  2224  			userMounts = append(userMounts, mount)
  2225  		} else {
  2226  			logrus.Warnf("Could not find mount at destination %q when parsing user volumes for container %s", vol, c.ID())
  2227  		}
  2228  	}
  2229  	return namedUserVolumes, userMounts
  2230  }
  2231  
  2232  // Check for an exit file, and handle one if present
  2233  func (c *Container) checkExitFile() error {
  2234  	// If the container's not running, nothing to do.
  2235  	if !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping) {
  2236  		return nil
  2237  	}
  2238  
  2239  	exitFile, err := c.exitFilePath()
  2240  	if err != nil {
  2241  		return err
  2242  	}
  2243  
  2244  	// Check for the exit file
  2245  	info, err := os.Stat(exitFile)
  2246  	if err != nil {
  2247  		if os.IsNotExist(err) {
  2248  			// Container is still running, no error
  2249  			return nil
  2250  		}
  2251  
  2252  		return errors.Wrapf(err, "error running stat on container %s exit file", c.ID())
  2253  	}
  2254  
  2255  	// Alright, it exists. Transition to Stopped state.
  2256  	c.state.State = define.ContainerStateStopped
  2257  	c.state.PID = 0
  2258  	c.state.ConmonPID = 0
  2259  
  2260  	// Read the exit file to get our stopped time and exit code.
  2261  	return c.handleExitFile(exitFile, info)
  2262  }
  2263  
  2264  func (c *Container) hasNamespace(namespace spec.LinuxNamespaceType) bool {
  2265  	if c.config.Spec == nil || c.config.Spec.Linux == nil {
  2266  		return false
  2267  	}
  2268  	for _, n := range c.config.Spec.Linux.Namespaces {
  2269  		if n.Type == namespace {
  2270  			return true
  2271  		}
  2272  	}
  2273  	return false
  2274  }
  2275  
  2276  // extractSecretToStorage copies a secret's data from the secrets manager to the container's static dir
  2277  func (c *Container) extractSecretToCtrStorage(secr *ContainerSecret) error {
  2278  	manager, err := c.runtime.SecretsManager()
  2279  	if err != nil {
  2280  		return err
  2281  	}
  2282  	_, data, err := manager.LookupSecretData(secr.Name)
  2283  	if err != nil {
  2284  		return err
  2285  	}
  2286  	secretFile := filepath.Join(c.config.SecretsPath, secr.Name)
  2287  
  2288  	hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), secr.UID, secr.GID)
  2289  	if err != nil {
  2290  		return errors.Wrap(err, "unable to extract secret")
  2291  	}
  2292  	err = ioutil.WriteFile(secretFile, data, 0644)
  2293  	if err != nil {
  2294  		return errors.Wrapf(err, "unable to create %s", secretFile)
  2295  	}
  2296  	if err := os.Lchown(secretFile, int(hostUID), int(hostGID)); err != nil {
  2297  		return err
  2298  	}
  2299  	if err := os.Chmod(secretFile, os.FileMode(secr.Mode)); err != nil {
  2300  		return err
  2301  	}
  2302  	if err := label.Relabel(secretFile, c.config.MountLabel, false); err != nil {
  2303  		return err
  2304  	}
  2305  	return nil
  2306  }