github.com/containers/podman/v4@v4.9.4/libpod/container_graph.go (about) 1 //go:build !remote 2 // +build !remote 3 4 package libpod 5 6 import ( 7 "context" 8 "fmt" 9 "strings" 10 11 "github.com/containers/podman/v4/libpod/define" 12 "github.com/sirupsen/logrus" 13 ) 14 15 type containerNode struct { 16 id string 17 container *Container 18 dependsOn []*containerNode 19 dependedOn []*containerNode 20 } 21 22 // ContainerGraph is a dependency graph based on a set of containers. 23 type ContainerGraph struct { 24 nodes map[string]*containerNode 25 noDepNodes []*containerNode 26 notDependedOnNodes map[string]*containerNode 27 } 28 29 // DependencyMap returns the dependency graph as map with the key being a 30 // container and the value being the containers the key depends on. 31 func (cg *ContainerGraph) DependencyMap() (dependencies map[*Container][]*Container) { 32 dependencies = make(map[*Container][]*Container) 33 for _, node := range cg.nodes { 34 dependsOn := make([]*Container, len(node.dependsOn)) 35 for i, d := range node.dependsOn { 36 dependsOn[i] = d.container 37 } 38 dependencies[node.container] = dependsOn 39 } 40 return dependencies 41 } 42 43 // BuildContainerGraph builds a dependency graph based on the container slice. 44 func BuildContainerGraph(ctrs []*Container) (*ContainerGraph, error) { 45 graph := new(ContainerGraph) 46 graph.nodes = make(map[string]*containerNode) 47 graph.notDependedOnNodes = make(map[string]*containerNode) 48 49 // Start by building all nodes, with no edges 50 for _, ctr := range ctrs { 51 ctrNode := new(containerNode) 52 ctrNode.id = ctr.ID() 53 ctrNode.container = ctr 54 55 graph.nodes[ctr.ID()] = ctrNode 56 graph.notDependedOnNodes[ctr.ID()] = ctrNode 57 } 58 59 // Now add edges based on dependencies 60 for _, node := range graph.nodes { 61 deps := node.container.Dependencies() 62 for _, dep := range deps { 63 // Get the dep's node 64 depNode, ok := graph.nodes[dep] 65 if !ok { 66 return nil, fmt.Errorf("container %s depends on container %s not found in input list: %w", node.id, dep, define.ErrNoSuchCtr) 67 } 68 69 // Add the dependent node to the node's dependencies 70 // And add the node to the dependent node's dependedOn 71 node.dependsOn = append(node.dependsOn, depNode) 72 depNode.dependedOn = append(depNode.dependedOn, node) 73 74 // The dependency now has something depending on it 75 delete(graph.notDependedOnNodes, dep) 76 } 77 78 // Maintain a list of nodes with no dependencies 79 // (no edges coming from them) 80 if len(deps) == 0 { 81 graph.noDepNodes = append(graph.noDepNodes, node) 82 } 83 } 84 85 // Need to do cycle detection 86 // We cannot start or stop if there are cyclic dependencies 87 cycle, err := detectCycles(graph) 88 if err != nil { 89 return nil, err 90 } else if cycle { 91 return nil, fmt.Errorf("cycle found in container dependency graph: %w", define.ErrInternal) 92 } 93 94 return graph, nil 95 } 96 97 // Detect cycles in a container graph using Tarjan's strongly connected 98 // components algorithm 99 // Return true if a cycle is found, false otherwise 100 func detectCycles(graph *ContainerGraph) (bool, error) { 101 type nodeInfo struct { 102 index int 103 lowLink int 104 onStack bool 105 } 106 107 index := 0 108 109 nodes := make(map[string]*nodeInfo) 110 stack := make([]*containerNode, 0, len(graph.nodes)) 111 112 var strongConnect func(*containerNode) (bool, error) 113 strongConnect = func(node *containerNode) (bool, error) { 114 logrus.Debugf("Strongconnecting node %s", node.id) 115 116 info := new(nodeInfo) 117 info.index = index 118 info.lowLink = index 119 index++ 120 121 nodes[node.id] = info 122 123 stack = append(stack, node) 124 125 info.onStack = true 126 127 logrus.Debugf("Pushed %s onto stack", node.id) 128 129 // Work through all nodes we point to 130 for _, successor := range node.dependsOn { 131 if _, ok := nodes[successor.id]; !ok { 132 logrus.Debugf("Recursing to successor node %s", successor.id) 133 134 cycle, err := strongConnect(successor) 135 if err != nil { 136 return false, err 137 } else if cycle { 138 return true, nil 139 } 140 141 successorInfo := nodes[successor.id] 142 if successorInfo.lowLink < info.lowLink { 143 info.lowLink = successorInfo.lowLink 144 } 145 } else { 146 successorInfo := nodes[successor.id] 147 if successorInfo.index < info.lowLink && successorInfo.onStack { 148 info.lowLink = successorInfo.index 149 } 150 } 151 } 152 153 if info.lowLink == info.index { 154 l := len(stack) 155 if l == 0 { 156 return false, fmt.Errorf("empty stack in detectCycles: %w", define.ErrInternal) 157 } 158 159 // Pop off the stack 160 topOfStack := stack[l-1] 161 stack = stack[:l-1] 162 163 // Popped item is no longer on the stack, mark as such 164 topInfo, ok := nodes[topOfStack.id] 165 if !ok { 166 return false, fmt.Errorf("finding node info for %s: %w", topOfStack.id, define.ErrInternal) 167 } 168 topInfo.onStack = false 169 170 logrus.Debugf("Finishing node %s. Popped %s off stack", node.id, topOfStack.id) 171 172 // If the top of the stack is not us, we have found a 173 // cycle 174 if topOfStack.id != node.id { 175 return true, nil 176 } 177 } 178 179 return false, nil 180 } 181 182 for id, node := range graph.nodes { 183 if _, ok := nodes[id]; !ok { 184 cycle, err := strongConnect(node) 185 if err != nil { 186 return false, err 187 } else if cycle { 188 return true, nil 189 } 190 } 191 } 192 193 return false, nil 194 } 195 196 // Visit a node on a container graph and start the container, or set an error if 197 // a dependency failed to start. if restart is true, startNode will restart the node instead of starting it. 198 func startNode(ctx context.Context, node *containerNode, setError bool, ctrErrors map[string]error, ctrsVisited map[string]bool, restart bool) { 199 // First, check if we have already visited the node 200 if ctrsVisited[node.id] { 201 return 202 } 203 204 // If setError is true, a dependency of us failed 205 // Mark us as failed and recurse 206 if setError { 207 // Mark us as visited, and set an error 208 ctrsVisited[node.id] = true 209 ctrErrors[node.id] = fmt.Errorf("a dependency of container %s failed to start: %w", node.id, define.ErrCtrStateInvalid) 210 211 // Hit anyone who depends on us, and set errors on them too 212 for _, successor := range node.dependedOn { 213 startNode(ctx, successor, true, ctrErrors, ctrsVisited, restart) 214 } 215 216 return 217 } 218 219 // Have all our dependencies started? 220 // If not, don't visit the node yet 221 depsVisited := true 222 for _, dep := range node.dependsOn { 223 depsVisited = depsVisited && ctrsVisited[dep.id] 224 } 225 if !depsVisited { 226 // Don't visit us yet, all dependencies are not up 227 // We'll hit the dependencies eventually, and when we do it will 228 // recurse here 229 return 230 } 231 232 // Going to try to start the container, mark us as visited 233 ctrsVisited[node.id] = true 234 235 ctrErrored := false 236 237 // Check if dependencies are running 238 // Graph traversal means we should have started them 239 // But they could have died before we got here 240 // Does not require that the container be locked, we only need to lock 241 // the dependencies 242 depsStopped, err := node.container.checkDependenciesRunning() 243 if err != nil { 244 ctrErrors[node.id] = err 245 ctrErrored = true 246 } else if len(depsStopped) > 0 { 247 // Our dependencies are not running 248 depsList := strings.Join(depsStopped, ",") 249 ctrErrors[node.id] = fmt.Errorf("the following dependencies of container %s are not running: %s: %w", node.id, depsList, define.ErrCtrStateInvalid) 250 ctrErrored = true 251 } 252 253 // Lock before we start 254 node.container.lock.Lock() 255 256 // Sync the container to pick up current state 257 if !ctrErrored { 258 if err := node.container.syncContainer(); err != nil { 259 ctrErrored = true 260 ctrErrors[node.id] = err 261 } 262 } 263 264 // Start the container (only if it is not running) 265 if !ctrErrored && len(node.container.config.InitContainerType) < 1 { 266 if !restart && node.container.state.State != define.ContainerStateRunning { 267 if err := node.container.initAndStart(ctx); err != nil { 268 ctrErrored = true 269 ctrErrors[node.id] = err 270 } 271 } 272 if restart && node.container.state.State != define.ContainerStatePaused && node.container.state.State != define.ContainerStateUnknown { 273 if err := node.container.restartWithTimeout(ctx, node.container.config.StopTimeout); err != nil { 274 ctrErrored = true 275 ctrErrors[node.id] = err 276 } 277 } 278 } 279 280 node.container.lock.Unlock() 281 282 // Recurse to anyone who depends on us and start them 283 for _, successor := range node.dependedOn { 284 startNode(ctx, successor, ctrErrored, ctrErrors, ctrsVisited, restart) 285 } 286 } 287 288 // Visit a node on the container graph and remove it, or set an error if it 289 // failed to remove. Only intended for use in pod removal; do *not* use when 290 // removing individual containers. 291 // All containers are assumed to be *UNLOCKED* on running this function. 292 // Container locks will be acquired as necessary. 293 // Pod and infraID are optional. If a pod is given it must be *LOCKED*. 294 func removeNode(ctx context.Context, node *containerNode, pod *Pod, force bool, timeout *uint, setError bool, ctrErrors map[string]error, ctrsVisited map[string]bool, ctrNamedVolumes map[string]*ContainerNamedVolume) { 295 // If we already visited this node, we're done. 296 if ctrsVisited[node.id] { 297 return 298 } 299 300 // Someone who depends on us failed. 301 // Mark us as failed and recurse. 302 if setError { 303 ctrsVisited[node.id] = true 304 ctrErrors[node.id] = fmt.Errorf("a container that depends on container %s could not be removed: %w", node.id, define.ErrCtrStateInvalid) 305 306 // Hit anyone who depends on us, set errors there as well. 307 for _, successor := range node.dependsOn { 308 removeNode(ctx, successor, pod, force, timeout, true, ctrErrors, ctrsVisited, ctrNamedVolumes) 309 } 310 } 311 312 // Does anyone still depend on us? 313 // Cannot remove if true. Once all our dependencies have been removed, 314 // we will be removed. 315 for _, dep := range node.dependedOn { 316 // The container that depends on us hasn't been removed yet. 317 // OK to continue on 318 if ok := ctrsVisited[dep.id]; !ok { 319 return 320 } 321 } 322 323 // Going to try to remove the node, mark us as visited 324 ctrsVisited[node.id] = true 325 326 ctrErrored := false 327 328 // Verify that all that depend on us are gone. 329 // Graph traversal should guarantee this is true, but this isn't that 330 // expensive, and it's better to be safe. 331 for _, dep := range node.dependedOn { 332 if _, err := node.container.runtime.GetContainer(dep.id); err == nil { 333 ctrErrored = true 334 ctrErrors[node.id] = fmt.Errorf("a container that depends on container %s still exists: %w", node.id, define.ErrDepExists) 335 } 336 } 337 338 // Lock the container 339 node.container.lock.Lock() 340 341 // Gate all subsequent bits behind a ctrErrored check - we don't want to 342 // proceed if a previous step failed. 343 if !ctrErrored { 344 if err := node.container.syncContainer(); err != nil { 345 ctrErrored = true 346 ctrErrors[node.id] = err 347 } 348 } 349 350 if !ctrErrored { 351 for _, vol := range node.container.config.NamedVolumes { 352 ctrNamedVolumes[vol.Name] = vol 353 } 354 355 if pod != nil && pod.state.InfraContainerID == node.id { 356 pod.state.InfraContainerID = "" 357 if err := pod.save(); err != nil { 358 ctrErrored = true 359 ctrErrors[node.id] = fmt.Errorf("error removing infra container %s from pod %s: %w", node.id, pod.ID(), err) 360 } 361 } 362 } 363 364 if !ctrErrored { 365 opts := ctrRmOpts{ 366 Force: force, 367 RemovePod: true, 368 Timeout: timeout, 369 } 370 371 if _, _, err := node.container.runtime.removeContainer(ctx, node.container, opts); err != nil { 372 ctrErrored = true 373 ctrErrors[node.id] = err 374 } 375 } 376 377 node.container.lock.Unlock() 378 379 // Recurse to anyone who we depend on and remove them 380 for _, successor := range node.dependsOn { 381 removeNode(ctx, successor, pod, force, timeout, ctrErrored, ctrErrors, ctrsVisited, ctrNamedVolumes) 382 } 383 }