gopkg.in/docker/docker.v23@v23.0.11/daemon/monitor.go (about)

     1  package daemon // import "github.com/docker/docker/daemon"
     2  
     3  import (
     4  	"context"
     5  	"strconv"
     6  	"time"
     7  
     8  	"github.com/docker/docker/api/types"
     9  	"github.com/docker/docker/container"
    10  	libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
    11  	"github.com/docker/docker/restartmanager"
    12  	"github.com/pkg/errors"
    13  	"github.com/sirupsen/logrus"
    14  )
    15  
    16  func (daemon *Daemon) setStateCounter(c *container.Container) {
    17  	switch c.StateString() {
    18  	case "paused":
    19  		stateCtr.set(c.ID, "paused")
    20  	case "running":
    21  		stateCtr.set(c.ID, "running")
    22  	default:
    23  		stateCtr.set(c.ID, "stopped")
    24  	}
    25  }
    26  
    27  func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error {
    28  	c.Lock()
    29  	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
    30  	ec, et, err := daemon.containerd.DeleteTask(ctx, c.ID)
    31  	cancel()
    32  	if err != nil {
    33  		logrus.WithError(err).WithField("container", c.ID).Warnf("failed to delete container from containerd")
    34  	}
    35  
    36  	ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second)
    37  	c.StreamConfig.Wait(ctx)
    38  	cancel()
    39  
    40  	c.Reset(false)
    41  
    42  	exitStatus := container.ExitStatus{
    43  		ExitCode: int(ec),
    44  		ExitedAt: et,
    45  	}
    46  	if e != nil {
    47  		exitStatus.ExitCode = int(e.ExitCode)
    48  		exitStatus.ExitedAt = e.ExitedAt
    49  		exitStatus.OOMKilled = e.OOMKilled
    50  		if e.Error != nil {
    51  			c.SetError(e.Error)
    52  		}
    53  	}
    54  
    55  	daemonShutdown := daemon.IsShuttingDown()
    56  	execDuration := time.Since(c.StartedAt)
    57  	restart, wait, err := c.RestartManager().ShouldRestart(ec, daemonShutdown || c.HasBeenManuallyStopped, execDuration)
    58  	if err != nil {
    59  		logrus.WithError(err).
    60  			WithField("container", c.ID).
    61  			WithField("restartCount", c.RestartCount).
    62  			WithField("exitStatus", exitStatus).
    63  			WithField("daemonShuttingDown", daemonShutdown).
    64  			WithField("hasBeenManuallyStopped", c.HasBeenManuallyStopped).
    65  			WithField("execDuration", execDuration).
    66  			Warn("ShouldRestart failed, container will not be restarted")
    67  		restart = false
    68  	}
    69  
    70  	// cancel healthcheck here, they will be automatically
    71  	// restarted if/when the container is started again
    72  	daemon.stopHealthchecks(c)
    73  	attributes := map[string]string{
    74  		"exitCode": strconv.Itoa(int(ec)),
    75  	}
    76  	daemon.Cleanup(c)
    77  
    78  	if restart {
    79  		c.RestartCount++
    80  		logrus.WithField("container", c.ID).
    81  			WithField("restartCount", c.RestartCount).
    82  			WithField("exitStatus", exitStatus).
    83  			WithField("manualRestart", c.HasBeenManuallyRestarted).
    84  			Debug("Restarting container")
    85  		c.SetRestarting(&exitStatus)
    86  	} else {
    87  		c.SetStopped(&exitStatus)
    88  		if !c.HasBeenManuallyRestarted {
    89  			defer daemon.autoRemove(c)
    90  		}
    91  	}
    92  	defer c.Unlock() // needs to be called before autoRemove
    93  
    94  	daemon.setStateCounter(c)
    95  	cpErr := c.CheckpointTo(daemon.containersReplica)
    96  
    97  	daemon.LogContainerEventWithAttributes(c, "die", attributes)
    98  
    99  	if restart {
   100  		go func() {
   101  			err := <-wait
   102  			if err == nil {
   103  				// daemon.netController is initialized when daemon is restoring containers.
   104  				// But containerStart will use daemon.netController segment.
   105  				// So to avoid panic at startup process, here must wait util daemon restore done.
   106  				daemon.waitForStartupDone()
   107  				if err = daemon.containerStart(c, "", "", false); err != nil {
   108  					logrus.Debugf("failed to restart container: %+v", err)
   109  				}
   110  			}
   111  			if err != nil {
   112  				c.Lock()
   113  				c.SetStopped(&exitStatus)
   114  				daemon.setStateCounter(c)
   115  				c.CheckpointTo(daemon.containersReplica)
   116  				c.Unlock()
   117  				defer daemon.autoRemove(c)
   118  				if err != restartmanager.ErrRestartCanceled {
   119  					logrus.Errorf("restartmanger wait error: %+v", err)
   120  				}
   121  			}
   122  		}()
   123  	}
   124  
   125  	return cpErr
   126  }
   127  
   128  // ProcessEvent is called by libcontainerd whenever an event occurs
   129  func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) error {
   130  	c, err := daemon.GetContainer(id)
   131  	if err != nil {
   132  		return errors.Wrapf(err, "could not find container %s", id)
   133  	}
   134  
   135  	switch e {
   136  	case libcontainerdtypes.EventOOM:
   137  		// StateOOM is Linux specific and should never be hit on Windows
   138  		if isWindows {
   139  			return errors.New("received StateOOM from libcontainerd on Windows. This should never happen")
   140  		}
   141  
   142  		c.Lock()
   143  		defer c.Unlock()
   144  		daemon.updateHealthMonitor(c)
   145  		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   146  			return err
   147  		}
   148  
   149  		daemon.LogContainerEvent(c, "oom")
   150  	case libcontainerdtypes.EventExit:
   151  		if ei.ProcessID == libcontainerdtypes.InitProcessName {
   152  			return daemon.handleContainerExit(c, &ei)
   153  		}
   154  
   155  		exitCode := 127
   156  		if execConfig := c.ExecCommands.Get(ei.ProcessID); execConfig != nil {
   157  			ec := int(ei.ExitCode)
   158  			execConfig.Lock()
   159  			defer execConfig.Unlock()
   160  			execConfig.ExitCode = &ec
   161  			execConfig.Running = false
   162  
   163  			ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
   164  			execConfig.StreamConfig.Wait(ctx)
   165  			cancel()
   166  
   167  			if err := execConfig.CloseStreams(); err != nil {
   168  				logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
   169  			}
   170  
   171  			// remove the exec command from the container's store only and not the
   172  			// daemon's store so that the exec command can be inspected.
   173  			c.ExecCommands.Delete(execConfig.ID, execConfig.Pid)
   174  
   175  			exitCode = ec
   176  		}
   177  		attributes := map[string]string{
   178  			"execID":   ei.ProcessID,
   179  			"exitCode": strconv.Itoa(exitCode),
   180  		}
   181  		daemon.LogContainerEventWithAttributes(c, "exec_die", attributes)
   182  	case libcontainerdtypes.EventStart:
   183  		c.Lock()
   184  		defer c.Unlock()
   185  
   186  		// This is here to handle start not generated by docker
   187  		if !c.Running {
   188  			c.SetRunning(int(ei.Pid), false)
   189  			c.HasBeenManuallyStopped = false
   190  			c.HasBeenStartedBefore = true
   191  			daemon.setStateCounter(c)
   192  
   193  			daemon.initHealthMonitor(c)
   194  
   195  			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   196  				return err
   197  			}
   198  			daemon.LogContainerEvent(c, "start")
   199  		}
   200  
   201  	case libcontainerdtypes.EventPaused:
   202  		c.Lock()
   203  		defer c.Unlock()
   204  
   205  		if !c.Paused {
   206  			c.Paused = true
   207  			daemon.setStateCounter(c)
   208  			daemon.updateHealthMonitor(c)
   209  			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   210  				return err
   211  			}
   212  			daemon.LogContainerEvent(c, "pause")
   213  		}
   214  	case libcontainerdtypes.EventResumed:
   215  		c.Lock()
   216  		defer c.Unlock()
   217  
   218  		if c.Paused {
   219  			c.Paused = false
   220  			daemon.setStateCounter(c)
   221  			daemon.updateHealthMonitor(c)
   222  
   223  			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   224  				return err
   225  			}
   226  			daemon.LogContainerEvent(c, "unpause")
   227  		}
   228  	}
   229  	return nil
   230  }
   231  
   232  func (daemon *Daemon) autoRemove(c *container.Container) {
   233  	c.Lock()
   234  	ar := c.HostConfig.AutoRemove
   235  	c.Unlock()
   236  	if !ar {
   237  		return
   238  	}
   239  
   240  	err := daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true})
   241  	if err == nil {
   242  		return
   243  	}
   244  	if c := daemon.containers.Get(c.ID); c == nil {
   245  		return
   246  	}
   247  
   248  	logrus.WithError(err).WithField("container", c.ID).Error("error removing container")
   249  }