github.com/containers/podman/v2@v2.2.2-0.20210501105131-c1e07d070c4c/libpod/runtime_pod_linux.go (about) 1 // +build linux 2 3 package libpod 4 5 import ( 6 "context" 7 "fmt" 8 "path" 9 "path/filepath" 10 "strings" 11 12 "github.com/containers/common/pkg/config" 13 "github.com/containers/podman/v2/libpod/define" 14 "github.com/containers/podman/v2/libpod/events" 15 "github.com/containers/podman/v2/pkg/cgroups" 16 "github.com/containers/podman/v2/pkg/rootless" 17 spec "github.com/opencontainers/runtime-spec/specs-go" 18 "github.com/pkg/errors" 19 "github.com/sirupsen/logrus" 20 ) 21 22 // NewPod makes a new, empty pod 23 func (r *Runtime) NewPod(ctx context.Context, options ...PodCreateOption) (_ *Pod, deferredErr error) { 24 r.lock.Lock() 25 defer r.lock.Unlock() 26 27 if !r.valid { 28 return nil, define.ErrRuntimeStopped 29 } 30 31 pod := newPod(r) 32 33 // Set default namespace to runtime's namespace 34 // Do so before options run so they can override it 35 if r.config.Engine.Namespace != "" { 36 pod.config.Namespace = r.config.Engine.Namespace 37 } 38 39 for _, option := range options { 40 if err := option(pod); err != nil { 41 return nil, errors.Wrapf(err, "error running pod create option") 42 } 43 } 44 45 if pod.config.Name == "" { 46 name, err := r.generateName() 47 if err != nil { 48 return nil, err 49 } 50 pod.config.Name = name 51 } 52 53 if pod.config.Hostname == "" { 54 pod.config.Hostname = pod.config.Name 55 } 56 57 // Allocate a lock for the pod 58 lock, err := r.lockManager.AllocateLock() 59 if err != nil { 60 return nil, errors.Wrapf(err, "error allocating lock for new pod") 61 } 62 pod.lock = lock 63 pod.config.LockID = pod.lock.ID() 64 65 defer func() { 66 if deferredErr != nil { 67 if err := pod.lock.Free(); err != nil { 68 logrus.Errorf("Error freeing pod lock after failed creation: %v", err) 69 } 70 } 71 }() 72 73 pod.valid = true 74 75 // Check CGroup parent sanity, and set it if it was not set 76 switch r.config.Engine.CgroupManager { 77 case config.CgroupfsCgroupsManager: 78 if pod.config.CgroupParent == "" { 79 pod.config.CgroupParent = CgroupfsDefaultCgroupParent 80 } else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") { 81 return nil, errors.Wrapf(define.ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs") 82 } 83 // If we are set to use pod cgroups, set the cgroup parent that 84 // all containers in the pod will share 85 // No need to create it with cgroupfs - the first container to 86 // launch should do it for us 87 if pod.config.UsePodCgroup { 88 pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID()) 89 } 90 case config.SystemdCgroupsManager: 91 if pod.config.CgroupParent == "" { 92 if rootless.IsRootless() { 93 pod.config.CgroupParent = SystemdDefaultRootlessCgroupParent 94 } else { 95 pod.config.CgroupParent = SystemdDefaultCgroupParent 96 } 97 } else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") { 98 return nil, errors.Wrapf(define.ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups") 99 } 100 // If we are set to use pod cgroups, set the cgroup parent that 101 // all containers in the pod will share 102 if pod.config.UsePodCgroup { 103 cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID())) 104 if err != nil { 105 return nil, errors.Wrapf(err, "unable to create pod cgroup for pod %s", pod.ID()) 106 } 107 pod.state.CgroupPath = cgroupPath 108 } 109 default: 110 return nil, errors.Wrapf(define.ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.Engine.CgroupManager) 111 } 112 113 if pod.config.UsePodCgroup { 114 logrus.Debugf("Got pod cgroup as %s", pod.state.CgroupPath) 115 } 116 if !pod.HasInfraContainer() && pod.SharesNamespaces() { 117 return nil, errors.Errorf("Pods must have an infra container to share namespaces") 118 } 119 if pod.HasInfraContainer() && !pod.SharesNamespaces() { 120 logrus.Warnf("Pod has an infra container, but shares no namespaces") 121 } 122 123 if err := r.state.AddPod(pod); err != nil { 124 return nil, errors.Wrapf(err, "error adding pod to state") 125 } 126 defer func() { 127 if deferredErr != nil { 128 if err := r.removePod(ctx, pod, true, true); err != nil { 129 logrus.Errorf("Error removing pod after pause container creation failure: %v", err) 130 } 131 } 132 }() 133 134 if pod.HasInfraContainer() { 135 ctr, err := r.createInfraContainer(ctx, pod) 136 if err != nil { 137 return nil, errors.Wrapf(err, "error adding Infra Container") 138 } 139 pod.state.InfraContainerID = ctr.ID() 140 if err := pod.save(); err != nil { 141 return nil, err 142 } 143 } 144 pod.newPodEvent(events.Create) 145 return pod, nil 146 } 147 148 func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool) error { 149 if err := p.updatePod(); err != nil { 150 return err 151 } 152 153 ctrs, err := r.state.PodContainers(p) 154 if err != nil { 155 return err 156 } 157 158 numCtrs := len(ctrs) 159 160 // If the only container in the pod is the pause container, remove the pod and container unconditionally. 161 pauseCtrID := p.state.InfraContainerID 162 if numCtrs == 1 && ctrs[0].ID() == pauseCtrID { 163 removeCtrs = true 164 force = true 165 } 166 if !removeCtrs && numCtrs > 0 { 167 return errors.Wrapf(define.ErrCtrExists, "pod %s contains containers and cannot be removed", p.ID()) 168 } 169 170 // Go through and lock all containers so we can operate on them all at 171 // once. 172 // First loop also checks that we are ready to go ahead and remove. 173 for _, ctr := range ctrs { 174 ctrLock := ctr.lock 175 ctrLock.Lock() 176 defer ctrLock.Unlock() 177 178 // If we're force-removing, no need to check status. 179 if force { 180 continue 181 } 182 183 // Sync all containers 184 if err := ctr.syncContainer(); err != nil { 185 return err 186 } 187 188 // Ensure state appropriate for removal 189 if err := ctr.checkReadyForRemoval(); err != nil { 190 return errors.Wrapf(err, "pod %s has containers that are not ready to be removed", p.ID()) 191 } 192 } 193 194 // We're going to be removing containers. 195 // If we are CGroupfs cgroup driver, to avoid races, we need to hit 196 // the pod and conmon CGroups with a PID limit to prevent them from 197 // spawning any further processes (particularly cleanup processes) which 198 // would prevent removing the CGroups. 199 if p.runtime.config.Engine.CgroupManager == config.CgroupfsCgroupsManager { 200 // Get the conmon CGroup 201 conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon") 202 conmonCgroup, err := cgroups.Load(conmonCgroupPath) 203 if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless { 204 logrus.Errorf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err) 205 } 206 207 // New resource limits 208 resLimits := new(spec.LinuxResources) 209 resLimits.Pids = new(spec.LinuxPids) 210 resLimits.Pids.Limit = 1 // Inhibit forks with very low pids limit 211 212 // Don't try if we failed to retrieve the cgroup 213 if err == nil { 214 if err := conmonCgroup.Update(resLimits); err != nil { 215 logrus.Warnf("Error updating pod %s conmon cgroup %s PID limit: %v", p.ID(), conmonCgroupPath, err) 216 } 217 } 218 } 219 220 var removalErr error 221 222 ctrNamedVolumes := make(map[string]*ContainerNamedVolume) 223 224 // Second loop - all containers are good, so we should be clear to 225 // remove. 226 for _, ctr := range ctrs { 227 // Remove the container. 228 // Do NOT remove named volumes. Instead, we're going to build a 229 // list of them to be removed at the end, once the containers 230 // have been removed by RemovePodContainers. 231 for _, vol := range ctr.config.NamedVolumes { 232 ctrNamedVolumes[vol.Name] = vol 233 } 234 235 if err := r.removeContainer(ctx, ctr, force, false, true); err != nil { 236 if removalErr == nil { 237 removalErr = err 238 } else { 239 logrus.Errorf("Error removing container %s from pod %s: %v", ctr.ID(), p.ID(), err) 240 } 241 } 242 } 243 244 // Remove all containers in the pod from the state. 245 if err := r.state.RemovePodContainers(p); err != nil { 246 // If this fails, there isn't much more we can do. 247 // The containers in the pod are unusable, but they still exist, 248 // so pod removal will fail. 249 return err 250 } 251 252 for volName := range ctrNamedVolumes { 253 volume, err := r.state.Volume(volName) 254 if err != nil && errors.Cause(err) != define.ErrNoSuchVolume { 255 logrus.Errorf("Error retrieving volume %s: %v", volName, err) 256 continue 257 } 258 if !volume.Anonymous() { 259 continue 260 } 261 if err := r.removeVolume(ctx, volume, false); err != nil { 262 if errors.Cause(err) == define.ErrNoSuchVolume || errors.Cause(err) == define.ErrVolumeRemoved { 263 continue 264 } 265 logrus.Errorf("Error removing volume %s: %v", volName, err) 266 } 267 } 268 269 // Remove pod cgroup, if present 270 if p.state.CgroupPath != "" { 271 logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath) 272 273 switch p.runtime.config.Engine.CgroupManager { 274 case config.SystemdCgroupsManager: 275 if err := deleteSystemdCgroup(p.state.CgroupPath); err != nil { 276 if removalErr == nil { 277 removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID()) 278 } else { 279 logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err) 280 } 281 } 282 case config.CgroupfsCgroupsManager: 283 // Delete the cgroupfs cgroup 284 // Make sure the conmon cgroup is deleted first 285 // Since the pod is almost gone, don't bother failing 286 // hard - instead, just log errors. 287 conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon") 288 conmonCgroup, err := cgroups.Load(conmonCgroupPath) 289 if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless { 290 if removalErr == nil { 291 removalErr = errors.Wrapf(err, "error retrieving pod %s conmon cgroup", p.ID()) 292 } else { 293 logrus.Debugf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err) 294 } 295 } 296 if err == nil { 297 if err := conmonCgroup.Delete(); err != nil { 298 if removalErr == nil { 299 removalErr = errors.Wrapf(err, "error removing pod %s conmon cgroup", p.ID()) 300 } else { 301 logrus.Errorf("Error deleting pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err) 302 } 303 } 304 } 305 cgroup, err := cgroups.Load(p.state.CgroupPath) 306 if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless { 307 if removalErr == nil { 308 removalErr = errors.Wrapf(err, "error retrieving pod %s cgroup", p.ID()) 309 } else { 310 logrus.Errorf("Error retrieving pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err) 311 } 312 } 313 if err == nil { 314 if err := cgroup.Delete(); err != nil { 315 if removalErr == nil { 316 removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID()) 317 } else { 318 logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err) 319 } 320 } 321 } 322 default: 323 // This should be caught much earlier, but let's still 324 // keep going so we make sure to evict the pod before 325 // ending up with an inconsistent state. 326 if removalErr == nil { 327 removalErr = errors.Wrapf(define.ErrInternal, "unrecognized cgroup manager %s when removing pod %s cgroups", p.runtime.config.Engine.CgroupManager, p.ID()) 328 } else { 329 logrus.Errorf("Unknown cgroups manager %s specified - cannot remove pod %s cgroup", p.runtime.config.Engine.CgroupManager, p.ID()) 330 } 331 } 332 } 333 334 // Remove pod from state 335 if err := r.state.RemovePod(p); err != nil { 336 if removalErr != nil { 337 logrus.Errorf("%v", removalErr) 338 } 339 return err 340 } 341 342 // Mark pod invalid 343 p.valid = false 344 p.newPodEvent(events.Remove) 345 346 // Deallocate the pod lock 347 if err := p.lock.Free(); err != nil { 348 if removalErr == nil { 349 removalErr = errors.Wrapf(err, "error freeing pod %s lock", p.ID()) 350 } else { 351 logrus.Errorf("Error freeing pod %s lock: %v", p.ID(), err) 352 } 353 } 354 355 return removalErr 356 }