github.com/AbhinandanKurakure/podman/v3@v3.4.10/libpod/runtime_pod_linux.go (about)

     1  // +build linux
     2  
     3  package libpod
     4  
     5  import (
     6  	"context"
     7  	"fmt"
     8  	"path"
     9  	"path/filepath"
    10  	"strings"
    11  
    12  	"github.com/containers/common/pkg/config"
    13  	"github.com/containers/podman/v3/libpod/define"
    14  	"github.com/containers/podman/v3/libpod/events"
    15  	"github.com/containers/podman/v3/pkg/cgroups"
    16  	"github.com/containers/podman/v3/pkg/rootless"
    17  	"github.com/containers/podman/v3/pkg/specgen"
    18  	spec "github.com/opencontainers/runtime-spec/specs-go"
    19  	"github.com/pkg/errors"
    20  	"github.com/sirupsen/logrus"
    21  )
    22  
    23  // NewPod makes a new, empty pod
    24  func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, options ...PodCreateOption) (_ *Pod, deferredErr error) {
    25  	r.lock.Lock()
    26  	defer r.lock.Unlock()
    27  
    28  	if !r.valid {
    29  		return nil, define.ErrRuntimeStopped
    30  	}
    31  
    32  	pod := newPod(r)
    33  
    34  	// Set default namespace to runtime's namespace
    35  	// Do so before options run so they can override it
    36  	if r.config.Engine.Namespace != "" {
    37  		pod.config.Namespace = r.config.Engine.Namespace
    38  	}
    39  
    40  	for _, option := range options {
    41  		if err := option(pod); err != nil {
    42  			return nil, errors.Wrapf(err, "error running pod create option")
    43  		}
    44  	}
    45  
    46  	// Allocate a lock for the pod
    47  	lock, err := r.lockManager.AllocateLock()
    48  	if err != nil {
    49  		return nil, errors.Wrapf(err, "error allocating lock for new pod")
    50  	}
    51  	pod.lock = lock
    52  	pod.config.LockID = pod.lock.ID()
    53  
    54  	defer func() {
    55  		if deferredErr != nil {
    56  			if err := pod.lock.Free(); err != nil {
    57  				logrus.Errorf("Error freeing pod lock after failed creation: %v", err)
    58  			}
    59  		}
    60  	}()
    61  
    62  	pod.valid = true
    63  
    64  	// Check CGroup parent sanity, and set it if it was not set
    65  	switch r.config.Engine.CgroupManager {
    66  	case config.CgroupfsCgroupsManager:
    67  		canUseCgroup := !rootless.IsRootless() || isRootlessCgroupSet(pod.config.CgroupParent)
    68  		if canUseCgroup {
    69  			if pod.config.CgroupParent == "" {
    70  				pod.config.CgroupParent = CgroupfsDefaultCgroupParent
    71  			} else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
    72  				return nil, errors.Wrapf(define.ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs")
    73  			}
    74  			// If we are set to use pod cgroups, set the cgroup parent that
    75  			// all containers in the pod will share
    76  			// No need to create it with cgroupfs - the first container to
    77  			// launch should do it for us
    78  			if pod.config.UsePodCgroup {
    79  				pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID())
    80  				if p.InfraContainerSpec != nil {
    81  					p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath
    82  				}
    83  			}
    84  		}
    85  	case config.SystemdCgroupsManager:
    86  		if pod.config.CgroupParent == "" {
    87  			if rootless.IsRootless() {
    88  				pod.config.CgroupParent = SystemdDefaultRootlessCgroupParent
    89  			} else {
    90  				pod.config.CgroupParent = SystemdDefaultCgroupParent
    91  			}
    92  		} else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
    93  			return nil, errors.Wrapf(define.ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups")
    94  		}
    95  		// If we are set to use pod cgroups, set the cgroup parent that
    96  		// all containers in the pod will share
    97  		if pod.config.UsePodCgroup {
    98  			cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()))
    99  			if err != nil {
   100  				return nil, errors.Wrapf(err, "unable to create pod cgroup for pod %s", pod.ID())
   101  			}
   102  			pod.state.CgroupPath = cgroupPath
   103  			if p.InfraContainerSpec != nil {
   104  				p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath
   105  			}
   106  		}
   107  	default:
   108  		return nil, errors.Wrapf(define.ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.Engine.CgroupManager)
   109  	}
   110  
   111  	if pod.config.UsePodCgroup {
   112  		logrus.Debugf("Got pod cgroup as %s", pod.state.CgroupPath)
   113  	}
   114  
   115  	if !pod.HasInfraContainer() && pod.SharesNamespaces() {
   116  		return nil, errors.Errorf("Pods must have an infra container to share namespaces")
   117  	}
   118  	if pod.HasInfraContainer() && !pod.SharesNamespaces() {
   119  		logrus.Infof("Pod has an infra container, but shares no namespaces")
   120  	}
   121  
   122  	// Unless the user has specified a name, use a randomly generated one.
   123  	// Note that name conflicts may occur (see #11735), so we need to loop.
   124  	generateName := pod.config.Name == ""
   125  	var addPodErr error
   126  	for {
   127  		if generateName {
   128  			name, err := r.generateName()
   129  			if err != nil {
   130  				return nil, err
   131  			}
   132  			pod.config.Name = name
   133  		}
   134  
   135  		if p.InfraContainerSpec != nil && p.InfraContainerSpec.Hostname == "" {
   136  			p.InfraContainerSpec.Hostname = pod.config.Name
   137  		}
   138  		if addPodErr = r.state.AddPod(pod); addPodErr == nil {
   139  			return pod, nil
   140  		}
   141  		if !generateName || (errors.Cause(addPodErr) != define.ErrPodExists && errors.Cause(addPodErr) != define.ErrCtrExists) {
   142  			break
   143  		}
   144  	}
   145  	if addPodErr != nil {
   146  		return nil, errors.Wrapf(addPodErr, "error adding pod to state")
   147  	}
   148  
   149  	return pod, nil
   150  }
   151  
   152  // AddInfra adds the created infra container to the pod state
   153  func (r *Runtime) AddInfra(ctx context.Context, pod *Pod, infraCtr *Container) (*Pod, error) {
   154  	r.lock.Lock()
   155  	defer r.lock.Unlock()
   156  
   157  	if !r.valid {
   158  		return nil, define.ErrRuntimeStopped
   159  	}
   160  	pod.state.InfraContainerID = infraCtr.ID()
   161  	if err := pod.save(); err != nil {
   162  		return nil, err
   163  	}
   164  	pod.newPodEvent(events.Create)
   165  	return pod, nil
   166  }
   167  
   168  // SavePod is a helper function to save the pod state from outside of libpod
   169  func (r *Runtime) SavePod(pod *Pod) error {
   170  	r.lock.Lock()
   171  	defer r.lock.Unlock()
   172  
   173  	if !r.valid {
   174  		return define.ErrRuntimeStopped
   175  	}
   176  	if err := pod.save(); err != nil {
   177  		return err
   178  	}
   179  	pod.newPodEvent(events.Create)
   180  	return nil
   181  }
   182  
   183  func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool) error {
   184  	if err := p.updatePod(); err != nil {
   185  		return err
   186  	}
   187  
   188  	ctrs, err := r.state.PodContainers(p)
   189  	if err != nil {
   190  		return err
   191  	}
   192  	numCtrs := len(ctrs)
   193  
   194  	// If the only running container in the pod is the pause container, remove the pod and container unconditionally.
   195  	pauseCtrID := p.state.InfraContainerID
   196  	if numCtrs == 1 && ctrs[0].ID() == pauseCtrID {
   197  		removeCtrs = true
   198  		force = true
   199  	}
   200  	if !removeCtrs && numCtrs > 0 {
   201  		return errors.Wrapf(define.ErrCtrExists, "pod %s contains containers and cannot be removed", p.ID())
   202  	}
   203  
   204  	// Go through and lock all containers so we can operate on them all at
   205  	// once.
   206  	// First loop also checks that we are ready to go ahead and remove.
   207  	for _, ctr := range ctrs {
   208  		ctrLock := ctr.lock
   209  		ctrLock.Lock()
   210  		defer ctrLock.Unlock()
   211  
   212  		// If we're force-removing, no need to check status.
   213  		if force {
   214  			continue
   215  		}
   216  
   217  		// Sync all containers
   218  		if err := ctr.syncContainer(); err != nil {
   219  			return err
   220  		}
   221  
   222  		// Ensure state appropriate for removal
   223  		if err := ctr.checkReadyForRemoval(); err != nil {
   224  			return errors.Wrapf(err, "pod %s has containers that are not ready to be removed", p.ID())
   225  		}
   226  	}
   227  
   228  	// We're going to be removing containers.
   229  	// If we are CGroupfs cgroup driver, to avoid races, we need to hit
   230  	// the pod and conmon CGroups with a PID limit to prevent them from
   231  	// spawning any further processes (particularly cleanup processes) which
   232  	// would prevent removing the CGroups.
   233  	if p.runtime.config.Engine.CgroupManager == config.CgroupfsCgroupsManager {
   234  		// Get the conmon CGroup
   235  		conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
   236  		conmonCgroup, err := cgroups.Load(conmonCgroupPath)
   237  		if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
   238  			logrus.Errorf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
   239  		}
   240  
   241  		// New resource limits
   242  		resLimits := new(spec.LinuxResources)
   243  		resLimits.Pids = new(spec.LinuxPids)
   244  		resLimits.Pids.Limit = 1 // Inhibit forks with very low pids limit
   245  
   246  		// Don't try if we failed to retrieve the cgroup
   247  		if err == nil {
   248  			if err := conmonCgroup.Update(resLimits); err != nil {
   249  				logrus.Warnf("Error updating pod %s conmon cgroup PID limit: %v", p.ID(), err)
   250  			}
   251  		}
   252  	}
   253  
   254  	var removalErr error
   255  
   256  	ctrNamedVolumes := make(map[string]*ContainerNamedVolume)
   257  
   258  	// Second loop - all containers are good, so we should be clear to
   259  	// remove.
   260  	for _, ctr := range ctrs {
   261  		// Remove the container.
   262  		// Do NOT remove named volumes. Instead, we're going to build a
   263  		// list of them to be removed at the end, once the containers
   264  		// have been removed by RemovePodContainers.
   265  		for _, vol := range ctr.config.NamedVolumes {
   266  			ctrNamedVolumes[vol.Name] = vol
   267  		}
   268  
   269  		if err := r.removeContainer(ctx, ctr, force, false, true); err != nil {
   270  			if removalErr == nil {
   271  				removalErr = err
   272  			} else {
   273  				logrus.Errorf("Error removing container %s from pod %s: %v", ctr.ID(), p.ID(), err)
   274  			}
   275  		}
   276  	}
   277  
   278  	// Clear infra container ID before we remove the infra container.
   279  	// There is a potential issue if we don't do that, and removal is
   280  	// interrupted between RemoveAllContainers() below and the pod's removal
   281  	// later - we end up with a reference to a nonexistent infra container.
   282  	p.state.InfraContainerID = ""
   283  	if err := p.save(); err != nil {
   284  		return err
   285  	}
   286  
   287  	// Remove all containers in the pod from the state.
   288  	if err := r.state.RemovePodContainers(p); err != nil {
   289  		// If this fails, there isn't much more we can do.
   290  		// The containers in the pod are unusable, but they still exist,
   291  		// so pod removal will fail.
   292  		return err
   293  	}
   294  
   295  	for volName := range ctrNamedVolumes {
   296  		volume, err := r.state.Volume(volName)
   297  		if err != nil && errors.Cause(err) != define.ErrNoSuchVolume {
   298  			logrus.Errorf("Error retrieving volume %s: %v", volName, err)
   299  			continue
   300  		}
   301  		if !volume.Anonymous() {
   302  			continue
   303  		}
   304  		if err := r.removeVolume(ctx, volume, false); err != nil {
   305  			if errors.Cause(err) == define.ErrNoSuchVolume || errors.Cause(err) == define.ErrVolumeRemoved {
   306  				continue
   307  			}
   308  			logrus.Errorf("Error removing volume %s: %v", volName, err)
   309  		}
   310  	}
   311  
   312  	// Remove pod cgroup, if present
   313  	if p.state.CgroupPath != "" {
   314  		logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)
   315  
   316  		switch p.runtime.config.Engine.CgroupManager {
   317  		case config.SystemdCgroupsManager:
   318  			if err := deleteSystemdCgroup(p.state.CgroupPath); err != nil {
   319  				if removalErr == nil {
   320  					removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID())
   321  				} else {
   322  					logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
   323  				}
   324  			}
   325  		case config.CgroupfsCgroupsManager:
   326  			// Delete the cgroupfs cgroup
   327  			// Make sure the conmon cgroup is deleted first
   328  			// Since the pod is almost gone, don't bother failing
   329  			// hard - instead, just log errors.
   330  			conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon")
   331  			conmonCgroup, err := cgroups.Load(conmonCgroupPath)
   332  			if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
   333  				if removalErr == nil {
   334  					removalErr = errors.Wrapf(err, "error retrieving pod %s conmon cgroup", p.ID())
   335  				} else {
   336  					logrus.Debugf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
   337  				}
   338  			}
   339  			if err == nil {
   340  				if err := conmonCgroup.Delete(); err != nil {
   341  					if removalErr == nil {
   342  						removalErr = errors.Wrapf(err, "error removing pod %s conmon cgroup", p.ID())
   343  					} else {
   344  						logrus.Errorf("Error deleting pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err)
   345  					}
   346  				}
   347  			}
   348  			cgroup, err := cgroups.Load(p.state.CgroupPath)
   349  			if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless {
   350  				if removalErr == nil {
   351  					removalErr = errors.Wrapf(err, "error retrieving pod %s cgroup", p.ID())
   352  				} else {
   353  					logrus.Errorf("Error retrieving pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
   354  				}
   355  			}
   356  			if err == nil {
   357  				if err := cgroup.Delete(); err != nil {
   358  					if removalErr == nil {
   359  						removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID())
   360  					} else {
   361  						logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err)
   362  					}
   363  				}
   364  			}
   365  		default:
   366  			// This should be caught much earlier, but let's still
   367  			// keep going so we make sure to evict the pod before
   368  			// ending up with an inconsistent state.
   369  			if removalErr == nil {
   370  				removalErr = errors.Wrapf(define.ErrInternal, "unrecognized cgroup manager %s when removing pod %s cgroups", p.runtime.config.Engine.CgroupManager, p.ID())
   371  			} else {
   372  				logrus.Errorf("Unknown cgroups manager %s specified - cannot remove pod %s cgroup", p.runtime.config.Engine.CgroupManager, p.ID())
   373  			}
   374  		}
   375  	}
   376  
   377  	// Remove pod from state
   378  	if err := r.state.RemovePod(p); err != nil {
   379  		if removalErr != nil {
   380  			logrus.Errorf("%v", removalErr)
   381  		}
   382  		return err
   383  	}
   384  
   385  	// Mark pod invalid
   386  	p.valid = false
   387  	p.newPodEvent(events.Remove)
   388  
   389  	// Deallocate the pod lock
   390  	if err := p.lock.Free(); err != nil {
   391  		if removalErr == nil {
   392  			removalErr = errors.Wrapf(err, "error freeing pod %s lock", p.ID())
   393  		} else {
   394  			logrus.Errorf("Error freeing pod %s lock: %v", p.ID(), err)
   395  		}
   396  	}
   397  
   398  	return removalErr
   399  }