github.com/wozhu6104/docker@v20.10.10+incompatible/daemon/monitor.go (about) 1 package daemon // import "github.com/docker/docker/daemon" 2 3 import ( 4 "context" 5 "strconv" 6 "time" 7 8 "github.com/docker/docker/api/types" 9 "github.com/docker/docker/container" 10 libcontainerdtypes "github.com/docker/docker/libcontainerd/types" 11 "github.com/docker/docker/restartmanager" 12 "github.com/pkg/errors" 13 "github.com/sirupsen/logrus" 14 ) 15 16 func (daemon *Daemon) setStateCounter(c *container.Container) { 17 switch c.StateString() { 18 case "paused": 19 stateCtr.set(c.ID, "paused") 20 case "running": 21 stateCtr.set(c.ID, "running") 22 default: 23 stateCtr.set(c.ID, "stopped") 24 } 25 } 26 27 func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error { 28 c.Lock() 29 ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 30 ec, et, err := daemon.containerd.DeleteTask(ctx, c.ID) 31 cancel() 32 if err != nil { 33 logrus.WithError(err).WithField("container", c.ID).Warnf("failed to delete container from containerd") 34 } 35 36 ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second) 37 c.StreamConfig.Wait(ctx) 38 cancel() 39 40 c.Reset(false) 41 42 exitStatus := container.ExitStatus{ 43 ExitCode: int(ec), 44 ExitedAt: et, 45 } 46 if e != nil { 47 exitStatus.ExitCode = int(e.ExitCode) 48 exitStatus.ExitedAt = e.ExitedAt 49 exitStatus.OOMKilled = e.OOMKilled 50 if e.Error != nil { 51 c.SetError(e.Error) 52 } 53 } 54 55 restart, wait, err := c.RestartManager().ShouldRestart(ec, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt)) 56 if err == nil && restart { 57 c.RestartCount++ 58 c.SetRestarting(&exitStatus) 59 } else { 60 c.SetStopped(&exitStatus) 61 defer daemon.autoRemove(c) 62 } 63 defer c.Unlock() // needs to be called before autoRemove 64 65 // cancel healthcheck here, they will be automatically 66 // restarted if/when the container is started again 67 daemon.stopHealthchecks(c) 68 attributes := map[string]string{ 69 "exitCode": strconv.Itoa(int(ec)), 70 } 71 daemon.LogContainerEventWithAttributes(c, "die", attributes) 72 daemon.Cleanup(c) 73 daemon.setStateCounter(c) 74 cpErr := c.CheckpointTo(daemon.containersReplica) 75 76 if err == nil && restart { 77 go func() { 78 err := <-wait 79 if err == nil { 80 // daemon.netController is initialized when daemon is restoring containers. 81 // But containerStart will use daemon.netController segment. 82 // So to avoid panic at startup process, here must wait util daemon restore done. 83 daemon.waitForStartupDone() 84 if err = daemon.containerStart(c, "", "", false); err != nil { 85 logrus.Debugf("failed to restart container: %+v", err) 86 } 87 } 88 if err != nil { 89 c.Lock() 90 c.SetStopped(&exitStatus) 91 daemon.setStateCounter(c) 92 c.CheckpointTo(daemon.containersReplica) 93 c.Unlock() 94 defer daemon.autoRemove(c) 95 if err != restartmanager.ErrRestartCanceled { 96 logrus.Errorf("restartmanger wait error: %+v", err) 97 } 98 } 99 }() 100 } 101 102 return cpErr 103 } 104 105 // ProcessEvent is called by libcontainerd whenever an event occurs 106 func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) error { 107 c, err := daemon.GetContainer(id) 108 if err != nil { 109 return errors.Wrapf(err, "could not find container %s", id) 110 } 111 112 switch e { 113 case libcontainerdtypes.EventOOM: 114 // StateOOM is Linux specific and should never be hit on Windows 115 if isWindows { 116 return errors.New("received StateOOM from libcontainerd on Windows. This should never happen") 117 } 118 119 c.Lock() 120 defer c.Unlock() 121 daemon.updateHealthMonitor(c) 122 if err := c.CheckpointTo(daemon.containersReplica); err != nil { 123 return err 124 } 125 126 daemon.LogContainerEvent(c, "oom") 127 case libcontainerdtypes.EventExit: 128 if int(ei.Pid) == c.Pid { 129 return daemon.handleContainerExit(c, &ei) 130 } 131 132 exitCode := 127 133 if execConfig := c.ExecCommands.Get(ei.ProcessID); execConfig != nil { 134 ec := int(ei.ExitCode) 135 execConfig.Lock() 136 defer execConfig.Unlock() 137 execConfig.ExitCode = &ec 138 execConfig.Running = false 139 140 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) 141 execConfig.StreamConfig.Wait(ctx) 142 cancel() 143 144 if err := execConfig.CloseStreams(); err != nil { 145 logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err) 146 } 147 148 // remove the exec command from the container's store only and not the 149 // daemon's store so that the exec command can be inspected. 150 c.ExecCommands.Delete(execConfig.ID, execConfig.Pid) 151 152 exitCode = ec 153 } 154 attributes := map[string]string{ 155 "execID": ei.ProcessID, 156 "exitCode": strconv.Itoa(exitCode), 157 } 158 daemon.LogContainerEventWithAttributes(c, "exec_die", attributes) 159 case libcontainerdtypes.EventStart: 160 c.Lock() 161 defer c.Unlock() 162 163 // This is here to handle start not generated by docker 164 if !c.Running { 165 c.SetRunning(int(ei.Pid), false) 166 c.HasBeenManuallyStopped = false 167 c.HasBeenStartedBefore = true 168 daemon.setStateCounter(c) 169 170 daemon.initHealthMonitor(c) 171 172 if err := c.CheckpointTo(daemon.containersReplica); err != nil { 173 return err 174 } 175 daemon.LogContainerEvent(c, "start") 176 } 177 178 case libcontainerdtypes.EventPaused: 179 c.Lock() 180 defer c.Unlock() 181 182 if !c.Paused { 183 c.Paused = true 184 daemon.setStateCounter(c) 185 daemon.updateHealthMonitor(c) 186 if err := c.CheckpointTo(daemon.containersReplica); err != nil { 187 return err 188 } 189 daemon.LogContainerEvent(c, "pause") 190 } 191 case libcontainerdtypes.EventResumed: 192 c.Lock() 193 defer c.Unlock() 194 195 if c.Paused { 196 c.Paused = false 197 daemon.setStateCounter(c) 198 daemon.updateHealthMonitor(c) 199 200 if err := c.CheckpointTo(daemon.containersReplica); err != nil { 201 return err 202 } 203 daemon.LogContainerEvent(c, "unpause") 204 } 205 } 206 return nil 207 } 208 209 func (daemon *Daemon) autoRemove(c *container.Container) { 210 c.Lock() 211 ar := c.HostConfig.AutoRemove 212 c.Unlock() 213 if !ar { 214 return 215 } 216 217 err := daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}) 218 if err == nil { 219 return 220 } 221 if c := daemon.containers.Get(c.ID); c == nil { 222 return 223 } 224 225 logrus.WithError(err).WithField("container", c.ID).Error("error removing container") 226 }