github.com/containers/podman/v2@v2.2.2-0.20210501105131-c1e07d070c4c/libpod/runtime_pod_linux.go (about)

     1  // +build linux
     2  
     3  package libpod
     4  
     5  import (
     6  	"context"
     7  	"fmt"
     8  	"path"
     9  	"path/filepath"
    10  	"strings"
    11  
    12  	"github.com/containers/common/pkg/config"
    13  	"github.com/containers/podman/v2/libpod/define"
    14  	"github.com/containers/podman/v2/libpod/events"
    15  	"github.com/containers/podman/v2/pkg/cgroups"
    16  	"github.com/containers/podman/v2/pkg/rootless"
    17  	spec "github.com/opencontainers/runtime-spec/specs-go"
    18  	"github.com/pkg/errors"
    19  	"github.com/sirupsen/logrus"
    20  )
    21  
    22  // NewPod makes a new, empty pod
    23  func (r *Runtime) NewPod(ctx context.Context, options ...PodCreateOption) (_ *Pod, deferredErr error) {
    24  	r.lock.Lock()
    25  	defer r.lock.Unlock()
    26  
    27  	if !r.valid {
    28  		return nil, define.ErrRuntimeStopped
    29  	}
    30  
    31  	pod := newPod(r)
    32  
    33  	// Set default namespace to runtime's namespace
    34  	// Do so before options run so they can override it
    35  	if r.config.Engine.Namespace != "" {
    36  		pod.config.Namespace = r.config.Engine.Namespace
    37  	}
    38  
    39  	for _, option := range options {
    40  		if err := option(pod); err != nil {
    41  			return nil, errors.Wrapf(err, "error running pod create option")
    42  		}
    43  	}
    44  
    45  	if pod.config.Name == "" {
    46  		name, err := r.generateName()
    47  		if err != nil {
    48  			return nil, err
    49  		}
    50  		pod.config.Name = name
    51  	}
    52  
    53  	if pod.config.Hostname == "" {
    54  		pod.config.Hostname = pod.config.Name
    55  	}
    56  
    57  	// Allocate a lock for the pod
    58  	lock, err := r.lockManager.AllocateLock()
    59  	if err != nil {
    60  		return nil, errors.Wrapf(err, "error allocating lock for new pod")
    61  	}
    62  	pod.lock = lock
    63  	pod.config.LockID = pod.lock.ID()
    64  
    65  	defer func() {
    66  		if deferredErr != nil {
    67  			if err := pod.lock.Free(); err != nil {
    68  				logrus.Errorf("Error freeing pod lock after failed creation: %v", err)
    69  			}
    70  		}
    71  	}()
    72  
    73  	pod.valid = true
    74  
    75  	// Check CGroup parent sanity, and set it if it was not set
    76  	switch r.config.Engine.CgroupManager {
    77  	case config.CgroupfsCgroupsManager:
    78  		if pod.config.CgroupParent == "" {
    79  			pod.config.CgroupParent = CgroupfsDefaultCgroupParent
    80  		} else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
    81  			return nil, errors.Wrapf(define.ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs")
    82  		}
    83  		// If we are set to use pod cgroups, set the cgroup parent that
    84  		// all containers in the pod will share
    85  		// No need to create it with cgroupfs - the first container to
    86  		// launch should do it for us
    87  		if pod.config.UsePodCgroup {
    88  			pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID())
    89  		}
    90  	case config.SystemdCgroupsManager:
    91  		if pod.config.CgroupParent == "" {
    92  			if rootless.IsRootless() {
    93  				pod.config.CgroupParent = SystemdDefaultRootlessCgroupParent
    94  			} else {
    95  				pod.config.CgroupParent = SystemdDefaultCgroupParent
    96  			}
    97  		} else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
    98  			return nil, errors.Wrapf(define.ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups")
    99  		}
   100  		// If we are set to use pod cgroups, set the cgroup parent that
   101  		// all containers in the pod will share
   102  		if pod.config.UsePodCgroup {
   103  			cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()))
   104  			if err != nil {
   105  				return nil, errors.Wrapf(err, "unable to create pod cgroup for pod %s", pod.ID())
   106  			}
   107  			pod.state.CgroupPath = cgroupPath
   108  		}
   109  	default:
   110  		return nil, errors.Wrapf(define.ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.Engine.CgroupManager)
   111  	}
   112  
   113  	if pod.config.UsePodCgroup {
   114  		logrus.Debugf("Got pod cgroup as %s", pod.state.CgroupPath)
   115  	}
   116  	if !pod.HasInfraContainer() && pod.SharesNamespaces() {
   117  		return nil, errors.Errorf("Pods must have an infra container to share namespaces")
   118  	}
   119  	if pod.HasInfraContainer() && !pod.SharesNamespaces() {
   120  		logrus.Warnf("Pod has an infra container, but shares no namespaces")
   121  	}
   122  
   123  	if err := r.state.AddPod(pod); err != nil {
   124  		return nil, errors.Wrapf(err, "error adding pod to state")
   125  	}
   126  	defer func() {
   127  		if deferredErr != nil {
   128  			if err := r.removePod(ctx, pod, true, true); err != nil {
   129  				logrus.Errorf("Error removing pod after pause container creation failure: %v", err)
   130  			}
   131  		}
   132  	}()
   133  
   134  	if pod.HasInfraContainer() {
   135  		ctr, err := r.createInfraContainer(ctx, pod)
   136  		if err != nil {
   137  			return nil, errors.Wrapf(err, "error adding Infra Container")
   138  		}
   139  		pod.state.InfraContainerID = ctr.ID()
   140  		if err := pod.save(); err != nil {
   141  			return nil, err
   142  		}
   143  	}
   144  	pod.newPodEvent(events.Create)
   145  	return pod, nil
   146  }
   147  
   148  func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool) error {
   149  	if err := p.updatePod(); err != nil {
   150  		return err
   151  	}
   152  
   153  	ctrs, err := r.state.PodContainers(p)
   154  	if err != nil {
   155  		return err
   156  	}
   157  
   158  	numCtrs := len(ctrs)
   159  
   160  	// If the only container in the pod is the pause container, remove the pod and container unconditionally.
   161  	pauseCtrID := p.state.InfraContainerID
   162  	if numCtrs == 1 && ctrs[0].ID() == pauseCtrID {
   163  		removeCtrs = true
   164  		force = true
   165  	}
   166  	if !removeCtrs && numCtrs > 0 {
   167  		return errors.Wrapf(define.ErrCtrExists, "pod %s contains containers and cannot be removed", p.ID())
   168  	}
   169  
   170  	// Go through and lock all containers so we can operate on them all at
   171  	// once.
   172  	// First loop also checks that we are ready to go ahead and remove.
   173  	for _, ctr := range ctrs {
   174  		ctrLock := ctr.lock
   175  		ctrLock.Lock()
   176  		defer ctrLock.Unlock()
   177  
   178  		// If we're force-removing, no need to check status.
   179  		if force {
   180  			continue
   181  		}
   182  
   183  		// Sync all containers
   184  		if err := ctr.syncContainer(); err != nil {
   185  			return err
   186  		}
   187  
   188  		// Ensure state appropriate for removal
   189  		if err := ctr.checkReadyForRemoval(); err != nil {
   190  			return errors.Wrapf(err, "pod %s has containers that are not ready to be removed", p.ID())
   191  		}
   192  	}
   193  
   194  	// We're going to be removing containers.
   195  	// If we are CGroupfs cgroup driver, to avoid races, we need to hit
   196  	// the pod and conmon CGroups with a PID limit to prevent them from
   197  	// spawning any further processes (particularly cleanup processes) which
   198  	// would prevent removing the CGroups.
   199  	if p.runtime.config.Engine.CgroupManager == config.CgroupfsCgroupsManager {
   200  		// Get the conmon CGroup
   201  		conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
   202  		conmonCgroup, err := cgroups.Load(conmonCgroupPath)
   203  		if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
   204  			logrus.Errorf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
   205  		}
   206  
   207  		// New resource limits
   208  		resLimits := new(spec.LinuxResources)
   209  		resLimits.Pids = new(spec.LinuxPids)
   210  		resLimits.Pids.Limit = 1 // Inhibit forks with very low pids limit
   211  
   212  		// Don't try if we failed to retrieve the cgroup
   213  		if err == nil {
   214  			if err := conmonCgroup.Update(resLimits); err != nil {
   215  				logrus.Warnf("Error updating pod %s conmon cgroup %s PID limit: %v", p.ID(), conmonCgroupPath, err)
   216  			}
   217  		}
   218  	}
   219  
   220  	var removalErr error
   221  
   222  	ctrNamedVolumes := make(map[string]*ContainerNamedVolume)
   223  
   224  	// Second loop - all containers are good, so we should be clear to
   225  	// remove.
   226  	for _, ctr := range ctrs {
   227  		// Remove the container.
   228  		// Do NOT remove named volumes. Instead, we're going to build a
   229  		// list of them to be removed at the end, once the containers
   230  		// have been removed by RemovePodContainers.
   231  		for _, vol := range ctr.config.NamedVolumes {
   232  			ctrNamedVolumes[vol.Name] = vol
   233  		}
   234  
   235  		if err := r.removeContainer(ctx, ctr, force, false, true); err != nil {
   236  			if removalErr == nil {
   237  				removalErr = err
   238  			} else {
   239  				logrus.Errorf("Error removing container %s from pod %s: %v", ctr.ID(), p.ID(), err)
   240  			}
   241  		}
   242  	}
   243  
   244  	// Remove all containers in the pod from the state.
   245  	if err := r.state.RemovePodContainers(p); err != nil {
   246  		// If this fails, there isn't much more we can do.
   247  		// The containers in the pod are unusable, but they still exist,
   248  		// so pod removal will fail.
   249  		return err
   250  	}
   251  
   252  	for volName := range ctrNamedVolumes {
   253  		volume, err := r.state.Volume(volName)
   254  		if err != nil && errors.Cause(err) != define.ErrNoSuchVolume {
   255  			logrus.Errorf("Error retrieving volume %s: %v", volName, err)
   256  			continue
   257  		}
   258  		if !volume.Anonymous() {
   259  			continue
   260  		}
   261  		if err := r.removeVolume(ctx, volume, false); err != nil {
   262  			if errors.Cause(err) == define.ErrNoSuchVolume || errors.Cause(err) == define.ErrVolumeRemoved {
   263  				continue
   264  			}
   265  			logrus.Errorf("Error removing volume %s: %v", volName, err)
   266  		}
   267  	}
   268  
   269  	// Remove pod cgroup, if present
   270  	if p.state.CgroupPath != "" {
   271  		logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)
   272  
   273  		switch p.runtime.config.Engine.CgroupManager {
   274  		case config.SystemdCgroupsManager:
   275  			if err := deleteSystemdCgroup(p.state.CgroupPath); err != nil {
   276  				if removalErr == nil {
   277  					removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID())
   278  				} else {
   279  					logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
   280  				}
   281  			}
   282  		case config.CgroupfsCgroupsManager:
   283  			// Delete the cgroupfs cgroup
   284  			// Make sure the conmon cgroup is deleted first
   285  			// Since the pod is almost gone, don't bother failing
   286  			// hard - instead, just log errors.
   287  			conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
   288  			conmonCgroup, err := cgroups.Load(conmonCgroupPath)
   289  			if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
   290  				if removalErr == nil {
   291  					removalErr = errors.Wrapf(err, "error retrieving pod %s conmon cgroup", p.ID())
   292  				} else {
   293  					logrus.Debugf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
   294  				}
   295  			}
   296  			if err == nil {
   297  				if err := conmonCgroup.Delete(); err != nil {
   298  					if removalErr == nil {
   299  						removalErr = errors.Wrapf(err, "error removing pod %s conmon cgroup", p.ID())
   300  					} else {
   301  						logrus.Errorf("Error deleting pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
   302  					}
   303  				}
   304  			}
   305  			cgroup, err := cgroups.Load(p.state.CgroupPath)
   306  			if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
   307  				if removalErr == nil {
   308  					removalErr = errors.Wrapf(err, "error retrieving pod %s cgroup", p.ID())
   309  				} else {
   310  					logrus.Errorf("Error retrieving pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
   311  				}
   312  			}
   313  			if err == nil {
   314  				if err := cgroup.Delete(); err != nil {
   315  					if removalErr == nil {
   316  						removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID())
   317  					} else {
   318  						logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
   319  					}
   320  				}
   321  			}
   322  		default:
   323  			// This should be caught much earlier, but let's still
   324  			// keep going so we make sure to evict the pod before
   325  			// ending up with an inconsistent state.
   326  			if removalErr == nil {
   327  				removalErr = errors.Wrapf(define.ErrInternal, "unrecognized cgroup manager %s when removing pod %s cgroups", p.runtime.config.Engine.CgroupManager, p.ID())
   328  			} else {
   329  				logrus.Errorf("Unknown cgroups manager %s specified - cannot remove pod %s cgroup", p.runtime.config.Engine.CgroupManager, p.ID())
   330  			}
   331  		}
   332  	}
   333  
   334  	// Remove pod from state
   335  	if err := r.state.RemovePod(p); err != nil {
   336  		if removalErr != nil {
   337  			logrus.Errorf("%v", removalErr)
   338  		}
   339  		return err
   340  	}
   341  
   342  	// Mark pod invalid
   343  	p.valid = false
   344  	p.newPodEvent(events.Remove)
   345  
   346  	// Deallocate the pod lock
   347  	if err := p.lock.Free(); err != nil {
   348  		if removalErr == nil {
   349  			removalErr = errors.Wrapf(err, "error freeing pod %s lock", p.ID())
   350  		} else {
   351  			logrus.Errorf("Error freeing pod %s lock: %v", p.ID(), err)
   352  		}
   353  	}
   354  
   355  	return removalErr
   356  }