github.com/rish1988/moby@v25.0.2+incompatible/daemon/monitor.go (about) 1 package daemon // import "github.com/docker/docker/daemon" 2 3 import ( 4 "context" 5 "strconv" 6 "time" 7 8 "github.com/containerd/log" 9 "github.com/docker/docker/api/types/backend" 10 "github.com/docker/docker/api/types/events" 11 "github.com/docker/docker/container" 12 "github.com/docker/docker/daemon/config" 13 "github.com/docker/docker/errdefs" 14 libcontainerdtypes "github.com/docker/docker/libcontainerd/types" 15 "github.com/docker/docker/restartmanager" 16 "github.com/pkg/errors" 17 ) 18 19 func (daemon *Daemon) setStateCounter(c *container.Container) { 20 switch c.StateString() { 21 case "paused": 22 stateCtr.set(c.ID, "paused") 23 case "running": 24 stateCtr.set(c.ID, "running") 25 default: 26 stateCtr.set(c.ID, "stopped") 27 } 28 } 29 30 func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error { 31 var exitStatus container.ExitStatus 32 c.Lock() 33 34 cfg := daemon.config() 35 36 // Health checks will be automatically restarted if/when the 37 // container is started again. 38 daemon.stopHealthchecks(c) 39 40 tsk, ok := c.Task() 41 if ok { 42 ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 43 es, err := tsk.Delete(ctx) 44 cancel() 45 if err != nil { 46 log.G(ctx).WithFields(log.Fields{ 47 "error": err, 48 "container": c.ID, 49 }).Warn("failed to delete container from containerd") 50 } else { 51 exitStatus = container.ExitStatus{ 52 ExitCode: int(es.ExitCode()), 53 ExitedAt: es.ExitTime(), 54 } 55 } 56 } 57 58 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) 59 c.StreamConfig.Wait(ctx) 60 cancel() 61 62 c.Reset(false) 63 64 if e != nil { 65 exitStatus.ExitCode = int(e.ExitCode) 66 exitStatus.ExitedAt = e.ExitedAt 67 if e.Error != nil { 68 c.SetError(e.Error) 69 } 70 } 71 72 daemonShutdown := daemon.IsShuttingDown() 73 execDuration := time.Since(c.StartedAt) 74 restart, wait, err := c.RestartManager().ShouldRestart(uint32(exitStatus.ExitCode), daemonShutdown || c.HasBeenManuallyStopped, execDuration) 75 if err != nil { 76 log.G(ctx).WithFields(log.Fields{ 77 "error": err, 78 "container": c.ID, 79 "restartCount": c.RestartCount, 80 "exitStatus": exitStatus, 81 "daemonShuttingDown": daemonShutdown, 82 "hasBeenManuallyStopped": c.HasBeenManuallyStopped, 83 "execDuration": execDuration, 84 }).Warn("ShouldRestart failed, container will not be restarted") 85 restart = false 86 } 87 88 attributes := map[string]string{ 89 "exitCode": strconv.Itoa(exitStatus.ExitCode), 90 "execDuration": strconv.Itoa(int(execDuration.Seconds())), 91 } 92 daemon.Cleanup(context.TODO(), c) 93 94 if restart { 95 c.RestartCount++ 96 log.G(ctx).WithFields(log.Fields{ 97 "container": c.ID, 98 "restartCount": c.RestartCount, 99 "exitStatus": exitStatus, 100 "manualRestart": c.HasBeenManuallyRestarted, 101 }).Debug("Restarting container") 102 c.SetRestarting(&exitStatus) 103 } else { 104 c.SetStopped(&exitStatus) 105 if !c.HasBeenManuallyRestarted { 106 defer daemon.autoRemove(&cfg.Config, c) 107 } 108 } 109 defer c.Unlock() // needs to be called before autoRemove 110 111 daemon.setStateCounter(c) 112 checkpointErr := c.CheckpointTo(daemon.containersReplica) 113 114 daemon.LogContainerEventWithAttributes(c, events.ActionDie, attributes) 115 116 if restart { 117 go func() { 118 err := <-wait 119 if err == nil { 120 // daemon.netController is initialized when daemon is restoring containers. 121 // But containerStart will use daemon.netController segment. 122 // So to avoid panic at startup process, here must wait util daemon restore done. 123 daemon.waitForStartupDone() 124 cfg := daemon.config() // Apply the most up-to-date daemon config to the restarted container. 125 126 // update the error if we fail to start the container, so that the cleanup code 127 // below can handle updating the container's status, and auto-remove (if set). 128 err = daemon.containerStart(context.Background(), cfg, c, "", "", false) 129 if err != nil { 130 log.G(ctx).Debugf("failed to restart container: %+v", err) 131 } 132 } 133 if err != nil { 134 c.Lock() 135 c.SetStopped(&exitStatus) 136 daemon.setStateCounter(c) 137 c.CheckpointTo(daemon.containersReplica) 138 c.Unlock() 139 defer daemon.autoRemove(&cfg.Config, c) 140 if err != restartmanager.ErrRestartCanceled { 141 log.G(ctx).Errorf("restartmanger wait error: %+v", err) 142 } 143 } 144 }() 145 } 146 147 return checkpointErr 148 } 149 150 // ProcessEvent is called by libcontainerd whenever an event occurs 151 func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) error { 152 c, err := daemon.GetContainer(id) 153 if err != nil { 154 return errors.Wrapf(err, "could not find container %s", id) 155 } 156 157 switch e { 158 case libcontainerdtypes.EventOOM: 159 // StateOOM is Linux specific and should never be hit on Windows 160 if isWindows { 161 return errors.New("received StateOOM from libcontainerd on Windows. This should never happen") 162 } 163 164 c.Lock() 165 defer c.Unlock() 166 c.OOMKilled = true 167 daemon.updateHealthMonitor(c) 168 if err := c.CheckpointTo(daemon.containersReplica); err != nil { 169 return err 170 } 171 172 daemon.LogContainerEvent(c, events.ActionOOM) 173 case libcontainerdtypes.EventExit: 174 if ei.ProcessID == ei.ContainerID { 175 return daemon.handleContainerExit(c, &ei) 176 } 177 178 exitCode := 127 179 if execConfig := c.ExecCommands.Get(ei.ProcessID); execConfig != nil { 180 ec := int(ei.ExitCode) 181 execConfig.Lock() 182 defer execConfig.Unlock() 183 184 // Remove the exec command from the container's store only and not the 185 // daemon's store so that the exec command can be inspected. Remove it 186 // before mutating execConfig to maintain the invariant that 187 // c.ExecCommands only contains execs that have not exited. 188 c.ExecCommands.Delete(execConfig.ID) 189 190 execConfig.ExitCode = &ec 191 execConfig.Running = false 192 193 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) 194 execConfig.StreamConfig.Wait(ctx) 195 cancel() 196 197 if err := execConfig.CloseStreams(); err != nil { 198 log.G(ctx).Errorf("failed to cleanup exec %s streams: %s", c.ID, err) 199 } 200 201 exitCode = ec 202 203 // If the exec failed at start in such a way that containerd 204 // publishes an exit event for it, we will race processing the event 205 // with daemon.ContainerExecStart() removing the exec from 206 // c.ExecCommands. If we win the race, we will find that there is no 207 // process to clean up. (And ContainerExecStart will clobber the 208 // exit code we set.) Prevent a nil-dereferenc panic in that 209 // situation to restore the status quo where this is merely a 210 // logical race condition. 211 if execConfig.Process != nil { 212 go func() { 213 if _, err := execConfig.Process.Delete(context.Background()); err != nil { 214 log.G(ctx).WithFields(log.Fields{ 215 "error": err, 216 "container": ei.ContainerID, 217 "process": ei.ProcessID, 218 }).Warn("failed to delete process") 219 } 220 }() 221 } 222 } 223 daemon.LogContainerEventWithAttributes(c, events.ActionExecDie, map[string]string{ 224 "execID": ei.ProcessID, 225 "exitCode": strconv.Itoa(exitCode), 226 }) 227 case libcontainerdtypes.EventStart: 228 c.Lock() 229 defer c.Unlock() 230 231 // This is here to handle start not generated by docker 232 if !c.Running { 233 ctr, err := daemon.containerd.LoadContainer(context.Background(), c.ID) 234 if err != nil { 235 if errdefs.IsNotFound(err) { 236 // The container was started by not-docker and so could have been deleted by 237 // not-docker before we got around to loading it from containerd. 238 log.G(context.TODO()).WithFields(log.Fields{ 239 "error": err, 240 "container": c.ID, 241 }).Debug("could not load containerd container for start event") 242 return nil 243 } 244 return err 245 } 246 tsk, err := ctr.Task(context.Background()) 247 if err != nil { 248 if errdefs.IsNotFound(err) { 249 log.G(context.TODO()).WithFields(log.Fields{ 250 "error": err, 251 "container": c.ID, 252 }).Debug("failed to load task for externally-started container") 253 return nil 254 } 255 return err 256 } 257 c.SetRunning(ctr, tsk, false) 258 c.HasBeenManuallyStopped = false 259 c.HasBeenStartedBefore = true 260 daemon.setStateCounter(c) 261 262 daemon.initHealthMonitor(c) 263 264 if err := c.CheckpointTo(daemon.containersReplica); err != nil { 265 return err 266 } 267 daemon.LogContainerEvent(c, events.ActionStart) 268 } 269 270 case libcontainerdtypes.EventPaused: 271 c.Lock() 272 defer c.Unlock() 273 274 if !c.Paused { 275 c.Paused = true 276 daemon.setStateCounter(c) 277 daemon.updateHealthMonitor(c) 278 if err := c.CheckpointTo(daemon.containersReplica); err != nil { 279 return err 280 } 281 daemon.LogContainerEvent(c, events.ActionPause) 282 } 283 case libcontainerdtypes.EventResumed: 284 c.Lock() 285 defer c.Unlock() 286 287 if c.Paused { 288 c.Paused = false 289 daemon.setStateCounter(c) 290 daemon.updateHealthMonitor(c) 291 292 if err := c.CheckpointTo(daemon.containersReplica); err != nil { 293 return err 294 } 295 daemon.LogContainerEvent(c, events.ActionUnPause) 296 } 297 } 298 return nil 299 } 300 301 func (daemon *Daemon) autoRemove(cfg *config.Config, c *container.Container) { 302 c.Lock() 303 ar := c.HostConfig.AutoRemove 304 c.Unlock() 305 if !ar { 306 return 307 } 308 309 err := daemon.containerRm(cfg, c.ID, &backend.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}) 310 if err == nil { 311 return 312 } 313 if c := daemon.containers.Get(c.ID); c == nil { 314 return 315 } 316 317 log.G(context.TODO()).WithFields(log.Fields{"error": err, "container": c.ID}).Error("error removing container") 318 }