gopkg.in/docker/docker.v23@v23.0.11/daemon/monitor.go (about) 1 package daemon // import "github.com/docker/docker/daemon" 2 3 import ( 4 "context" 5 "strconv" 6 "time" 7 8 "github.com/docker/docker/api/types" 9 "github.com/docker/docker/container" 10 libcontainerdtypes "github.com/docker/docker/libcontainerd/types" 11 "github.com/docker/docker/restartmanager" 12 "github.com/pkg/errors" 13 "github.com/sirupsen/logrus" 14 ) 15 16 func (daemon *Daemon) setStateCounter(c *container.Container) { 17 switch c.StateString() { 18 case "paused": 19 stateCtr.set(c.ID, "paused") 20 case "running": 21 stateCtr.set(c.ID, "running") 22 default: 23 stateCtr.set(c.ID, "stopped") 24 } 25 } 26 27 func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error { 28 c.Lock() 29 ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 30 ec, et, err := daemon.containerd.DeleteTask(ctx, c.ID) 31 cancel() 32 if err != nil { 33 logrus.WithError(err).WithField("container", c.ID).Warnf("failed to delete container from containerd") 34 } 35 36 ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second) 37 c.StreamConfig.Wait(ctx) 38 cancel() 39 40 c.Reset(false) 41 42 exitStatus := container.ExitStatus{ 43 ExitCode: int(ec), 44 ExitedAt: et, 45 } 46 if e != nil { 47 exitStatus.ExitCode = int(e.ExitCode) 48 exitStatus.ExitedAt = e.ExitedAt 49 exitStatus.OOMKilled = e.OOMKilled 50 if e.Error != nil { 51 c.SetError(e.Error) 52 } 53 } 54 55 daemonShutdown := daemon.IsShuttingDown() 56 execDuration := time.Since(c.StartedAt) 57 restart, wait, err := c.RestartManager().ShouldRestart(ec, daemonShutdown || c.HasBeenManuallyStopped, execDuration) 58 if err != nil { 59 logrus.WithError(err). 60 WithField("container", c.ID). 61 WithField("restartCount", c.RestartCount). 62 WithField("exitStatus", exitStatus). 63 WithField("daemonShuttingDown", daemonShutdown). 64 WithField("hasBeenManuallyStopped", c.HasBeenManuallyStopped). 65 WithField("execDuration", execDuration). 66 Warn("ShouldRestart failed, container will not be restarted") 67 restart = false 68 } 69 70 // cancel healthcheck here, they will be automatically 71 // restarted if/when the container is started again 72 daemon.stopHealthchecks(c) 73 attributes := map[string]string{ 74 "exitCode": strconv.Itoa(int(ec)), 75 } 76 daemon.Cleanup(c) 77 78 if restart { 79 c.RestartCount++ 80 logrus.WithField("container", c.ID). 81 WithField("restartCount", c.RestartCount). 82 WithField("exitStatus", exitStatus). 83 WithField("manualRestart", c.HasBeenManuallyRestarted). 84 Debug("Restarting container") 85 c.SetRestarting(&exitStatus) 86 } else { 87 c.SetStopped(&exitStatus) 88 if !c.HasBeenManuallyRestarted { 89 defer daemon.autoRemove(c) 90 } 91 } 92 defer c.Unlock() // needs to be called before autoRemove 93 94 daemon.setStateCounter(c) 95 cpErr := c.CheckpointTo(daemon.containersReplica) 96 97 daemon.LogContainerEventWithAttributes(c, "die", attributes) 98 99 if restart { 100 go func() { 101 err := <-wait 102 if err == nil { 103 // daemon.netController is initialized when daemon is restoring containers. 104 // But containerStart will use daemon.netController segment. 105 // So to avoid panic at startup process, here must wait util daemon restore done. 106 daemon.waitForStartupDone() 107 if err = daemon.containerStart(c, "", "", false); err != nil { 108 logrus.Debugf("failed to restart container: %+v", err) 109 } 110 } 111 if err != nil { 112 c.Lock() 113 c.SetStopped(&exitStatus) 114 daemon.setStateCounter(c) 115 c.CheckpointTo(daemon.containersReplica) 116 c.Unlock() 117 defer daemon.autoRemove(c) 118 if err != restartmanager.ErrRestartCanceled { 119 logrus.Errorf("restartmanger wait error: %+v", err) 120 } 121 } 122 }() 123 } 124 125 return cpErr 126 } 127 128 // ProcessEvent is called by libcontainerd whenever an event occurs 129 func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) error { 130 c, err := daemon.GetContainer(id) 131 if err != nil { 132 return errors.Wrapf(err, "could not find container %s", id) 133 } 134 135 switch e { 136 case libcontainerdtypes.EventOOM: 137 // StateOOM is Linux specific and should never be hit on Windows 138 if isWindows { 139 return errors.New("received StateOOM from libcontainerd on Windows. This should never happen") 140 } 141 142 c.Lock() 143 defer c.Unlock() 144 daemon.updateHealthMonitor(c) 145 if err := c.CheckpointTo(daemon.containersReplica); err != nil { 146 return err 147 } 148 149 daemon.LogContainerEvent(c, "oom") 150 case libcontainerdtypes.EventExit: 151 if ei.ProcessID == libcontainerdtypes.InitProcessName { 152 return daemon.handleContainerExit(c, &ei) 153 } 154 155 exitCode := 127 156 if execConfig := c.ExecCommands.Get(ei.ProcessID); execConfig != nil { 157 ec := int(ei.ExitCode) 158 execConfig.Lock() 159 defer execConfig.Unlock() 160 execConfig.ExitCode = &ec 161 execConfig.Running = false 162 163 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) 164 execConfig.StreamConfig.Wait(ctx) 165 cancel() 166 167 if err := execConfig.CloseStreams(); err != nil { 168 logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err) 169 } 170 171 // remove the exec command from the container's store only and not the 172 // daemon's store so that the exec command can be inspected. 173 c.ExecCommands.Delete(execConfig.ID, execConfig.Pid) 174 175 exitCode = ec 176 } 177 attributes := map[string]string{ 178 "execID": ei.ProcessID, 179 "exitCode": strconv.Itoa(exitCode), 180 } 181 daemon.LogContainerEventWithAttributes(c, "exec_die", attributes) 182 case libcontainerdtypes.EventStart: 183 c.Lock() 184 defer c.Unlock() 185 186 // This is here to handle start not generated by docker 187 if !c.Running { 188 c.SetRunning(int(ei.Pid), false) 189 c.HasBeenManuallyStopped = false 190 c.HasBeenStartedBefore = true 191 daemon.setStateCounter(c) 192 193 daemon.initHealthMonitor(c) 194 195 if err := c.CheckpointTo(daemon.containersReplica); err != nil { 196 return err 197 } 198 daemon.LogContainerEvent(c, "start") 199 } 200 201 case libcontainerdtypes.EventPaused: 202 c.Lock() 203 defer c.Unlock() 204 205 if !c.Paused { 206 c.Paused = true 207 daemon.setStateCounter(c) 208 daemon.updateHealthMonitor(c) 209 if err := c.CheckpointTo(daemon.containersReplica); err != nil { 210 return err 211 } 212 daemon.LogContainerEvent(c, "pause") 213 } 214 case libcontainerdtypes.EventResumed: 215 c.Lock() 216 defer c.Unlock() 217 218 if c.Paused { 219 c.Paused = false 220 daemon.setStateCounter(c) 221 daemon.updateHealthMonitor(c) 222 223 if err := c.CheckpointTo(daemon.containersReplica); err != nil { 224 return err 225 } 226 daemon.LogContainerEvent(c, "unpause") 227 } 228 } 229 return nil 230 } 231 232 func (daemon *Daemon) autoRemove(c *container.Container) { 233 c.Lock() 234 ar := c.HostConfig.AutoRemove 235 c.Unlock() 236 if !ar { 237 return 238 } 239 240 err := daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}) 241 if err == nil { 242 return 243 } 244 if c := daemon.containers.Get(c.ID); c == nil { 245 return 246 } 247 248 logrus.WithError(err).WithField("container", c.ID).Error("error removing container") 249 }