github.com/AbhinandanKurakure/podman/v3@v3.4.10/libpod/runtime_pod_linux.go (about) 1 // +build linux 2 3 package libpod 4 5 import ( 6 "context" 7 "fmt" 8 "path" 9 "path/filepath" 10 "strings" 11 12 "github.com/containers/common/pkg/config" 13 "github.com/containers/podman/v3/libpod/define" 14 "github.com/containers/podman/v3/libpod/events" 15 "github.com/containers/podman/v3/pkg/cgroups" 16 "github.com/containers/podman/v3/pkg/rootless" 17 "github.com/containers/podman/v3/pkg/specgen" 18 spec "github.com/opencontainers/runtime-spec/specs-go" 19 "github.com/pkg/errors" 20 "github.com/sirupsen/logrus" 21 ) 22 23 // NewPod makes a new, empty pod 24 func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, options ...PodCreateOption) (_ *Pod, deferredErr error) { 25 r.lock.Lock() 26 defer r.lock.Unlock() 27 28 if !r.valid { 29 return nil, define.ErrRuntimeStopped 30 } 31 32 pod := newPod(r) 33 34 // Set default namespace to runtime's namespace 35 // Do so before options run so they can override it 36 if r.config.Engine.Namespace != "" { 37 pod.config.Namespace = r.config.Engine.Namespace 38 } 39 40 for _, option := range options { 41 if err := option(pod); err != nil { 42 return nil, errors.Wrapf(err, "error running pod create option") 43 } 44 } 45 46 // Allocate a lock for the pod 47 lock, err := r.lockManager.AllocateLock() 48 if err != nil { 49 return nil, errors.Wrapf(err, "error allocating lock for new pod") 50 } 51 pod.lock = lock 52 pod.config.LockID = pod.lock.ID() 53 54 defer func() { 55 if deferredErr != nil { 56 if err := pod.lock.Free(); err != nil { 57 logrus.Errorf("Error freeing pod lock after failed creation: %v", err) 58 } 59 } 60 }() 61 62 pod.valid = true 63 64 // Check CGroup parent sanity, and set it if it was not set 65 switch r.config.Engine.CgroupManager { 66 case config.CgroupfsCgroupsManager: 67 canUseCgroup := !rootless.IsRootless() || isRootlessCgroupSet(pod.config.CgroupParent) 68 if canUseCgroup { 69 if pod.config.CgroupParent == "" { 70 pod.config.CgroupParent = CgroupfsDefaultCgroupParent 71 } else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") { 72 return nil, errors.Wrapf(define.ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs") 73 } 74 // If we are set to use pod cgroups, set the cgroup parent that 75 // all containers in the pod will share 76 // No need to create it with cgroupfs - the first container to 77 // launch should do it for us 78 if pod.config.UsePodCgroup { 79 pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID()) 80 if p.InfraContainerSpec != nil { 81 p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath 82 } 83 } 84 } 85 case config.SystemdCgroupsManager: 86 if pod.config.CgroupParent == "" { 87 if rootless.IsRootless() { 88 pod.config.CgroupParent = SystemdDefaultRootlessCgroupParent 89 } else { 90 pod.config.CgroupParent = SystemdDefaultCgroupParent 91 } 92 } else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") { 93 return nil, errors.Wrapf(define.ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups") 94 } 95 // If we are set to use pod cgroups, set the cgroup parent that 96 // all containers in the pod will share 97 if pod.config.UsePodCgroup { 98 cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID())) 99 if err != nil { 100 return nil, errors.Wrapf(err, "unable to create pod cgroup for pod %s", pod.ID()) 101 } 102 pod.state.CgroupPath = cgroupPath 103 if p.InfraContainerSpec != nil { 104 p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath 105 } 106 } 107 default: 108 return nil, errors.Wrapf(define.ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.Engine.CgroupManager) 109 } 110 111 if pod.config.UsePodCgroup { 112 logrus.Debugf("Got pod cgroup as %s", pod.state.CgroupPath) 113 } 114 115 if !pod.HasInfraContainer() && pod.SharesNamespaces() { 116 return nil, errors.Errorf("Pods must have an infra container to share namespaces") 117 } 118 if pod.HasInfraContainer() && !pod.SharesNamespaces() { 119 logrus.Infof("Pod has an infra container, but shares no namespaces") 120 } 121 122 // Unless the user has specified a name, use a randomly generated one. 123 // Note that name conflicts may occur (see #11735), so we need to loop. 124 generateName := pod.config.Name == "" 125 var addPodErr error 126 for { 127 if generateName { 128 name, err := r.generateName() 129 if err != nil { 130 return nil, err 131 } 132 pod.config.Name = name 133 } 134 135 if p.InfraContainerSpec != nil && p.InfraContainerSpec.Hostname == "" { 136 p.InfraContainerSpec.Hostname = pod.config.Name 137 } 138 if addPodErr = r.state.AddPod(pod); addPodErr == nil { 139 return pod, nil 140 } 141 if !generateName || (errors.Cause(addPodErr) != define.ErrPodExists && errors.Cause(addPodErr) != define.ErrCtrExists) { 142 break 143 } 144 } 145 if addPodErr != nil { 146 return nil, errors.Wrapf(addPodErr, "error adding pod to state") 147 } 148 149 return pod, nil 150 } 151 152 // AddInfra adds the created infra container to the pod state 153 func (r *Runtime) AddInfra(ctx context.Context, pod *Pod, infraCtr *Container) (*Pod, error) { 154 r.lock.Lock() 155 defer r.lock.Unlock() 156 157 if !r.valid { 158 return nil, define.ErrRuntimeStopped 159 } 160 pod.state.InfraContainerID = infraCtr.ID() 161 if err := pod.save(); err != nil { 162 return nil, err 163 } 164 pod.newPodEvent(events.Create) 165 return pod, nil 166 } 167 168 // SavePod is a helper function to save the pod state from outside of libpod 169 func (r *Runtime) SavePod(pod *Pod) error { 170 r.lock.Lock() 171 defer r.lock.Unlock() 172 173 if !r.valid { 174 return define.ErrRuntimeStopped 175 } 176 if err := pod.save(); err != nil { 177 return err 178 } 179 pod.newPodEvent(events.Create) 180 return nil 181 } 182 183 func (r *Runtime) removePod(ctx context.Context, p *Pod, removeCtrs, force bool) error { 184 if err := p.updatePod(); err != nil { 185 return err 186 } 187 188 ctrs, err := r.state.PodContainers(p) 189 if err != nil { 190 return err 191 } 192 numCtrs := len(ctrs) 193 194 // If the only running container in the pod is the pause container, remove the pod and container unconditionally. 195 pauseCtrID := p.state.InfraContainerID 196 if numCtrs == 1 && ctrs[0].ID() == pauseCtrID { 197 removeCtrs = true 198 force = true 199 } 200 if !removeCtrs && numCtrs > 0 { 201 return errors.Wrapf(define.ErrCtrExists, "pod %s contains containers and cannot be removed", p.ID()) 202 } 203 204 // Go through and lock all containers so we can operate on them all at 205 // once. 206 // First loop also checks that we are ready to go ahead and remove. 207 for _, ctr := range ctrs { 208 ctrLock := ctr.lock 209 ctrLock.Lock() 210 defer ctrLock.Unlock() 211 212 // If we're force-removing, no need to check status. 213 if force { 214 continue 215 } 216 217 // Sync all containers 218 if err := ctr.syncContainer(); err != nil { 219 return err 220 } 221 222 // Ensure state appropriate for removal 223 if err := ctr.checkReadyForRemoval(); err != nil { 224 return errors.Wrapf(err, "pod %s has containers that are not ready to be removed", p.ID()) 225 } 226 } 227 228 // We're going to be removing containers. 229 // If we are CGroupfs cgroup driver, to avoid races, we need to hit 230 // the pod and conmon CGroups with a PID limit to prevent them from 231 // spawning any further processes (particularly cleanup processes) which 232 // would prevent removing the CGroups. 233 if p.runtime.config.Engine.CgroupManager == config.CgroupfsCgroupsManager { 234 // Get the conmon CGroup 235 conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon") 236 conmonCgroup, err := cgroups.Load(conmonCgroupPath) 237 if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless { 238 logrus.Errorf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err) 239 } 240 241 // New resource limits 242 resLimits := new(spec.LinuxResources) 243 resLimits.Pids = new(spec.LinuxPids) 244 resLimits.Pids.Limit = 1 // Inhibit forks with very low pids limit 245 246 // Don't try if we failed to retrieve the cgroup 247 if err == nil { 248 if err := conmonCgroup.Update(resLimits); err != nil { 249 logrus.Warnf("Error updating pod %s conmon cgroup PID limit: %v", p.ID(), err) 250 } 251 } 252 } 253 254 var removalErr error 255 256 ctrNamedVolumes := make(map[string]*ContainerNamedVolume) 257 258 // Second loop - all containers are good, so we should be clear to 259 // remove. 260 for _, ctr := range ctrs { 261 // Remove the container. 262 // Do NOT remove named volumes. Instead, we're going to build a 263 // list of them to be removed at the end, once the containers 264 // have been removed by RemovePodContainers. 265 for _, vol := range ctr.config.NamedVolumes { 266 ctrNamedVolumes[vol.Name] = vol 267 } 268 269 if err := r.removeContainer(ctx, ctr, force, false, true); err != nil { 270 if removalErr == nil { 271 removalErr = err 272 } else { 273 logrus.Errorf("Error removing container %s from pod %s: %v", ctr.ID(), p.ID(), err) 274 } 275 } 276 } 277 278 // Clear infra container ID before we remove the infra container. 279 // There is a potential issue if we don't do that, and removal is 280 // interrupted between RemoveAllContainers() below and the pod's removal 281 // later - we end up with a reference to a nonexistent infra container. 282 p.state.InfraContainerID = "" 283 if err := p.save(); err != nil { 284 return err 285 } 286 287 // Remove all containers in the pod from the state. 288 if err := r.state.RemovePodContainers(p); err != nil { 289 // If this fails, there isn't much more we can do. 290 // The containers in the pod are unusable, but they still exist, 291 // so pod removal will fail. 292 return err 293 } 294 295 for volName := range ctrNamedVolumes { 296 volume, err := r.state.Volume(volName) 297 if err != nil && errors.Cause(err) != define.ErrNoSuchVolume { 298 logrus.Errorf("Error retrieving volume %s: %v", volName, err) 299 continue 300 } 301 if !volume.Anonymous() { 302 continue 303 } 304 if err := r.removeVolume(ctx, volume, false); err != nil { 305 if errors.Cause(err) == define.ErrNoSuchVolume || errors.Cause(err) == define.ErrVolumeRemoved { 306 continue 307 } 308 logrus.Errorf("Error removing volume %s: %v", volName, err) 309 } 310 } 311 312 // Remove pod cgroup, if present 313 if p.state.CgroupPath != "" { 314 logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath) 315 316 switch p.runtime.config.Engine.CgroupManager { 317 case config.SystemdCgroupsManager: 318 if err := deleteSystemdCgroup(p.state.CgroupPath); err != nil { 319 if removalErr == nil { 320 removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID()) 321 } else { 322 logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err) 323 } 324 } 325 case config.CgroupfsCgroupsManager: 326 // Delete the cgroupfs cgroup 327 // Make sure the conmon cgroup is deleted first 328 // Since the pod is almost gone, don't bother failing 329 // hard - instead, just log errors. 330 conmonCgroupPath := filepath.Join(p.state.CgroupPath, "conmon") 331 conmonCgroup, err := cgroups.Load(conmonCgroupPath) 332 if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless { 333 if removalErr == nil { 334 removalErr = errors.Wrapf(err, "error retrieving pod %s conmon cgroup", p.ID()) 335 } else { 336 logrus.Debugf("Error retrieving pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err) 337 } 338 } 339 if err == nil { 340 if err := conmonCgroup.Delete(); err != nil { 341 if removalErr == nil { 342 removalErr = errors.Wrapf(err, "error removing pod %s conmon cgroup", p.ID()) 343 } else { 344 logrus.Errorf("Error deleting pod %s conmon cgroup %s: %v", p.ID(), conmonCgroupPath, err) 345 } 346 } 347 } 348 cgroup, err := cgroups.Load(p.state.CgroupPath) 349 if err != nil && err != cgroups.ErrCgroupDeleted && err != cgroups.ErrCgroupV1Rootless { 350 if removalErr == nil { 351 removalErr = errors.Wrapf(err, "error retrieving pod %s cgroup", p.ID()) 352 } else { 353 logrus.Errorf("Error retrieving pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err) 354 } 355 } 356 if err == nil { 357 if err := cgroup.Delete(); err != nil { 358 if removalErr == nil { 359 removalErr = errors.Wrapf(err, "error removing pod %s cgroup", p.ID()) 360 } else { 361 logrus.Errorf("Error deleting pod %s cgroup %s: %v", p.ID(), p.state.CgroupPath, err) 362 } 363 } 364 } 365 default: 366 // This should be caught much earlier, but let's still 367 // keep going so we make sure to evict the pod before 368 // ending up with an inconsistent state. 369 if removalErr == nil { 370 removalErr = errors.Wrapf(define.ErrInternal, "unrecognized cgroup manager %s when removing pod %s cgroups", p.runtime.config.Engine.CgroupManager, p.ID()) 371 } else { 372 logrus.Errorf("Unknown cgroups manager %s specified - cannot remove pod %s cgroup", p.runtime.config.Engine.CgroupManager, p.ID()) 373 } 374 } 375 } 376 377 // Remove pod from state 378 if err := r.state.RemovePod(p); err != nil { 379 if removalErr != nil { 380 logrus.Errorf("%v", removalErr) 381 } 382 return err 383 } 384 385 // Mark pod invalid 386 p.valid = false 387 p.newPodEvent(events.Remove) 388 389 // Deallocate the pod lock 390 if err := p.lock.Free(); err != nil { 391 if removalErr == nil { 392 removalErr = errors.Wrapf(err, "error freeing pod %s lock", p.ID()) 393 } else { 394 logrus.Errorf("Error freeing pod %s lock: %v", p.ID(), err) 395 } 396 } 397 398 return removalErr 399 }