github.com/docker/docker@v299999999.0.0-20200612211812-aaf470eca7b5+incompatible/daemon/monitor.go (about)

     1  package daemon // import "github.com/docker/docker/daemon"
     2  
     3  import (
     4  	"context"
     5  	"strconv"
     6  	"time"
     7  
     8  	"github.com/docker/docker/api/types"
     9  	"github.com/docker/docker/container"
    10  	libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
    11  	"github.com/docker/docker/restartmanager"
    12  	"github.com/pkg/errors"
    13  	"github.com/sirupsen/logrus"
    14  )
    15  
    16  func (daemon *Daemon) setStateCounter(c *container.Container) {
    17  	switch c.StateString() {
    18  	case "paused":
    19  		stateCtr.set(c.ID, "paused")
    20  	case "running":
    21  		stateCtr.set(c.ID, "running")
    22  	default:
    23  		stateCtr.set(c.ID, "stopped")
    24  	}
    25  }
    26  
    27  // ProcessEvent is called by libcontainerd whenever an event occurs
    28  func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) error {
    29  	c, err := daemon.GetContainer(id)
    30  	if err != nil {
    31  		return errors.Wrapf(err, "could not find container %s", id)
    32  	}
    33  
    34  	switch e {
    35  	case libcontainerdtypes.EventOOM:
    36  		// StateOOM is Linux specific and should never be hit on Windows
    37  		if isWindows {
    38  			return errors.New("received StateOOM from libcontainerd on Windows. This should never happen")
    39  		}
    40  
    41  		c.Lock()
    42  		defer c.Unlock()
    43  		daemon.updateHealthMonitor(c)
    44  		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
    45  			return err
    46  		}
    47  
    48  		daemon.LogContainerEvent(c, "oom")
    49  	case libcontainerdtypes.EventExit:
    50  		if int(ei.Pid) == c.Pid {
    51  			c.Lock()
    52  			_, _, err := daemon.containerd.DeleteTask(context.Background(), c.ID)
    53  			if err != nil {
    54  				logrus.WithError(err).Warnf("failed to delete container %s from containerd", c.ID)
    55  			}
    56  			ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
    57  			c.StreamConfig.Wait(ctx)
    58  			cancel()
    59  			c.Reset(false)
    60  
    61  			exitStatus := container.ExitStatus{
    62  				ExitCode:  int(ei.ExitCode),
    63  				ExitedAt:  ei.ExitedAt,
    64  				OOMKilled: ei.OOMKilled,
    65  			}
    66  			restart, wait, err := c.RestartManager().ShouldRestart(ei.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
    67  			if err == nil && restart {
    68  				c.RestartCount++
    69  				c.SetRestarting(&exitStatus)
    70  			} else {
    71  				if ei.Error != nil {
    72  					c.SetError(ei.Error)
    73  				}
    74  				c.SetStopped(&exitStatus)
    75  				defer daemon.autoRemove(c)
    76  			}
    77  			defer c.Unlock() // needs to be called before autoRemove
    78  
    79  			// cancel healthcheck here, they will be automatically
    80  			// restarted if/when the container is started again
    81  			daemon.stopHealthchecks(c)
    82  			attributes := map[string]string{
    83  				"exitCode": strconv.Itoa(int(ei.ExitCode)),
    84  			}
    85  			daemon.LogContainerEventWithAttributes(c, "die", attributes)
    86  			daemon.Cleanup(c)
    87  			daemon.setStateCounter(c)
    88  			cpErr := c.CheckpointTo(daemon.containersReplica)
    89  
    90  			if err == nil && restart {
    91  				go func() {
    92  					err := <-wait
    93  					if err == nil {
    94  						// daemon.netController is initialized when daemon is restoring containers.
    95  						// But containerStart will use daemon.netController segment.
    96  						// So to avoid panic at startup process, here must wait util daemon restore done.
    97  						daemon.waitForStartupDone()
    98  						if err = daemon.containerStart(c, "", "", false); err != nil {
    99  							logrus.Debugf("failed to restart container: %+v", err)
   100  						}
   101  					}
   102  					if err != nil {
   103  						c.Lock()
   104  						c.SetStopped(&exitStatus)
   105  						daemon.setStateCounter(c)
   106  						c.CheckpointTo(daemon.containersReplica)
   107  						c.Unlock()
   108  						defer daemon.autoRemove(c)
   109  						if err != restartmanager.ErrRestartCanceled {
   110  							logrus.Errorf("restartmanger wait error: %+v", err)
   111  						}
   112  					}
   113  				}()
   114  			}
   115  
   116  			return cpErr
   117  		}
   118  
   119  		exitCode := 127
   120  		if execConfig := c.ExecCommands.Get(ei.ProcessID); execConfig != nil {
   121  			ec := int(ei.ExitCode)
   122  			execConfig.Lock()
   123  			defer execConfig.Unlock()
   124  			execConfig.ExitCode = &ec
   125  			execConfig.Running = false
   126  
   127  			ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
   128  			execConfig.StreamConfig.Wait(ctx)
   129  			cancel()
   130  
   131  			if err := execConfig.CloseStreams(); err != nil {
   132  				logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
   133  			}
   134  
   135  			// remove the exec command from the container's store only and not the
   136  			// daemon's store so that the exec command can be inspected.
   137  			c.ExecCommands.Delete(execConfig.ID, execConfig.Pid)
   138  
   139  			exitCode = ec
   140  		}
   141  		attributes := map[string]string{
   142  			"execID":   ei.ProcessID,
   143  			"exitCode": strconv.Itoa(exitCode),
   144  		}
   145  		daemon.LogContainerEventWithAttributes(c, "exec_die", attributes)
   146  	case libcontainerdtypes.EventStart:
   147  		c.Lock()
   148  		defer c.Unlock()
   149  
   150  		// This is here to handle start not generated by docker
   151  		if !c.Running {
   152  			c.SetRunning(int(ei.Pid), false)
   153  			c.HasBeenManuallyStopped = false
   154  			c.HasBeenStartedBefore = true
   155  			daemon.setStateCounter(c)
   156  
   157  			daemon.initHealthMonitor(c)
   158  
   159  			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   160  				return err
   161  			}
   162  			daemon.LogContainerEvent(c, "start")
   163  		}
   164  
   165  	case libcontainerdtypes.EventPaused:
   166  		c.Lock()
   167  		defer c.Unlock()
   168  
   169  		if !c.Paused {
   170  			c.Paused = true
   171  			daemon.setStateCounter(c)
   172  			daemon.updateHealthMonitor(c)
   173  			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   174  				return err
   175  			}
   176  			daemon.LogContainerEvent(c, "pause")
   177  		}
   178  	case libcontainerdtypes.EventResumed:
   179  		c.Lock()
   180  		defer c.Unlock()
   181  
   182  		if c.Paused {
   183  			c.Paused = false
   184  			daemon.setStateCounter(c)
   185  			daemon.updateHealthMonitor(c)
   186  
   187  			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   188  				return err
   189  			}
   190  			daemon.LogContainerEvent(c, "unpause")
   191  		}
   192  	}
   193  	return nil
   194  }
   195  
   196  func (daemon *Daemon) autoRemove(c *container.Container) {
   197  	c.Lock()
   198  	ar := c.HostConfig.AutoRemove
   199  	c.Unlock()
   200  	if !ar {
   201  		return
   202  	}
   203  
   204  	err := daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true})
   205  	if err == nil {
   206  		return
   207  	}
   208  	if c := daemon.containers.Get(c.ID); c == nil {
   209  		return
   210  	}
   211  
   212  	logrus.WithError(err).WithField("container", c.ID).Error("error removing container")
   213  }