github.com/rish1988/moby@v25.0.2+incompatible/daemon/monitor.go (about)

     1  package daemon // import "github.com/docker/docker/daemon"
     2  
     3  import (
     4  	"context"
     5  	"strconv"
     6  	"time"
     7  
     8  	"github.com/containerd/log"
     9  	"github.com/docker/docker/api/types/backend"
    10  	"github.com/docker/docker/api/types/events"
    11  	"github.com/docker/docker/container"
    12  	"github.com/docker/docker/daemon/config"
    13  	"github.com/docker/docker/errdefs"
    14  	libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
    15  	"github.com/docker/docker/restartmanager"
    16  	"github.com/pkg/errors"
    17  )
    18  
    19  func (daemon *Daemon) setStateCounter(c *container.Container) {
    20  	switch c.StateString() {
    21  	case "paused":
    22  		stateCtr.set(c.ID, "paused")
    23  	case "running":
    24  		stateCtr.set(c.ID, "running")
    25  	default:
    26  		stateCtr.set(c.ID, "stopped")
    27  	}
    28  }
    29  
    30  func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error {
    31  	var exitStatus container.ExitStatus
    32  	c.Lock()
    33  
    34  	cfg := daemon.config()
    35  
    36  	// Health checks will be automatically restarted if/when the
    37  	// container is started again.
    38  	daemon.stopHealthchecks(c)
    39  
    40  	tsk, ok := c.Task()
    41  	if ok {
    42  		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
    43  		es, err := tsk.Delete(ctx)
    44  		cancel()
    45  		if err != nil {
    46  			log.G(ctx).WithFields(log.Fields{
    47  				"error":     err,
    48  				"container": c.ID,
    49  			}).Warn("failed to delete container from containerd")
    50  		} else {
    51  			exitStatus = container.ExitStatus{
    52  				ExitCode: int(es.ExitCode()),
    53  				ExitedAt: es.ExitTime(),
    54  			}
    55  		}
    56  	}
    57  
    58  	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
    59  	c.StreamConfig.Wait(ctx)
    60  	cancel()
    61  
    62  	c.Reset(false)
    63  
    64  	if e != nil {
    65  		exitStatus.ExitCode = int(e.ExitCode)
    66  		exitStatus.ExitedAt = e.ExitedAt
    67  		if e.Error != nil {
    68  			c.SetError(e.Error)
    69  		}
    70  	}
    71  
    72  	daemonShutdown := daemon.IsShuttingDown()
    73  	execDuration := time.Since(c.StartedAt)
    74  	restart, wait, err := c.RestartManager().ShouldRestart(uint32(exitStatus.ExitCode), daemonShutdown || c.HasBeenManuallyStopped, execDuration)
    75  	if err != nil {
    76  		log.G(ctx).WithFields(log.Fields{
    77  			"error":                  err,
    78  			"container":              c.ID,
    79  			"restartCount":           c.RestartCount,
    80  			"exitStatus":             exitStatus,
    81  			"daemonShuttingDown":     daemonShutdown,
    82  			"hasBeenManuallyStopped": c.HasBeenManuallyStopped,
    83  			"execDuration":           execDuration,
    84  		}).Warn("ShouldRestart failed, container will not be restarted")
    85  		restart = false
    86  	}
    87  
    88  	attributes := map[string]string{
    89  		"exitCode":     strconv.Itoa(exitStatus.ExitCode),
    90  		"execDuration": strconv.Itoa(int(execDuration.Seconds())),
    91  	}
    92  	daemon.Cleanup(context.TODO(), c)
    93  
    94  	if restart {
    95  		c.RestartCount++
    96  		log.G(ctx).WithFields(log.Fields{
    97  			"container":     c.ID,
    98  			"restartCount":  c.RestartCount,
    99  			"exitStatus":    exitStatus,
   100  			"manualRestart": c.HasBeenManuallyRestarted,
   101  		}).Debug("Restarting container")
   102  		c.SetRestarting(&exitStatus)
   103  	} else {
   104  		c.SetStopped(&exitStatus)
   105  		if !c.HasBeenManuallyRestarted {
   106  			defer daemon.autoRemove(&cfg.Config, c)
   107  		}
   108  	}
   109  	defer c.Unlock() // needs to be called before autoRemove
   110  
   111  	daemon.setStateCounter(c)
   112  	checkpointErr := c.CheckpointTo(daemon.containersReplica)
   113  
   114  	daemon.LogContainerEventWithAttributes(c, events.ActionDie, attributes)
   115  
   116  	if restart {
   117  		go func() {
   118  			err := <-wait
   119  			if err == nil {
   120  				// daemon.netController is initialized when daemon is restoring containers.
   121  				// But containerStart will use daemon.netController segment.
   122  				// So to avoid panic at startup process, here must wait util daemon restore done.
   123  				daemon.waitForStartupDone()
   124  				cfg := daemon.config() // Apply the most up-to-date daemon config to the restarted container.
   125  
   126  				// update the error if we fail to start the container, so that the cleanup code
   127  				// below can handle updating the container's status, and auto-remove (if set).
   128  				err = daemon.containerStart(context.Background(), cfg, c, "", "", false)
   129  				if err != nil {
   130  					log.G(ctx).Debugf("failed to restart container: %+v", err)
   131  				}
   132  			}
   133  			if err != nil {
   134  				c.Lock()
   135  				c.SetStopped(&exitStatus)
   136  				daemon.setStateCounter(c)
   137  				c.CheckpointTo(daemon.containersReplica)
   138  				c.Unlock()
   139  				defer daemon.autoRemove(&cfg.Config, c)
   140  				if err != restartmanager.ErrRestartCanceled {
   141  					log.G(ctx).Errorf("restartmanger wait error: %+v", err)
   142  				}
   143  			}
   144  		}()
   145  	}
   146  
   147  	return checkpointErr
   148  }
   149  
   150  // ProcessEvent is called by libcontainerd whenever an event occurs
   151  func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) error {
   152  	c, err := daemon.GetContainer(id)
   153  	if err != nil {
   154  		return errors.Wrapf(err, "could not find container %s", id)
   155  	}
   156  
   157  	switch e {
   158  	case libcontainerdtypes.EventOOM:
   159  		// StateOOM is Linux specific and should never be hit on Windows
   160  		if isWindows {
   161  			return errors.New("received StateOOM from libcontainerd on Windows. This should never happen")
   162  		}
   163  
   164  		c.Lock()
   165  		defer c.Unlock()
   166  		c.OOMKilled = true
   167  		daemon.updateHealthMonitor(c)
   168  		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   169  			return err
   170  		}
   171  
   172  		daemon.LogContainerEvent(c, events.ActionOOM)
   173  	case libcontainerdtypes.EventExit:
   174  		if ei.ProcessID == ei.ContainerID {
   175  			return daemon.handleContainerExit(c, &ei)
   176  		}
   177  
   178  		exitCode := 127
   179  		if execConfig := c.ExecCommands.Get(ei.ProcessID); execConfig != nil {
   180  			ec := int(ei.ExitCode)
   181  			execConfig.Lock()
   182  			defer execConfig.Unlock()
   183  
   184  			// Remove the exec command from the container's store only and not the
   185  			// daemon's store so that the exec command can be inspected. Remove it
   186  			// before mutating execConfig to maintain the invariant that
   187  			// c.ExecCommands only contains execs that have not exited.
   188  			c.ExecCommands.Delete(execConfig.ID)
   189  
   190  			execConfig.ExitCode = &ec
   191  			execConfig.Running = false
   192  
   193  			ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
   194  			execConfig.StreamConfig.Wait(ctx)
   195  			cancel()
   196  
   197  			if err := execConfig.CloseStreams(); err != nil {
   198  				log.G(ctx).Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
   199  			}
   200  
   201  			exitCode = ec
   202  
   203  			// If the exec failed at start in such a way that containerd
   204  			// publishes an exit event for it, we will race processing the event
   205  			// with daemon.ContainerExecStart() removing the exec from
   206  			// c.ExecCommands. If we win the race, we will find that there is no
   207  			// process to clean up. (And ContainerExecStart will clobber the
   208  			// exit code we set.) Prevent a nil-dereferenc panic in that
   209  			// situation to restore the status quo where this is merely a
   210  			// logical race condition.
   211  			if execConfig.Process != nil {
   212  				go func() {
   213  					if _, err := execConfig.Process.Delete(context.Background()); err != nil {
   214  						log.G(ctx).WithFields(log.Fields{
   215  							"error":     err,
   216  							"container": ei.ContainerID,
   217  							"process":   ei.ProcessID,
   218  						}).Warn("failed to delete process")
   219  					}
   220  				}()
   221  			}
   222  		}
   223  		daemon.LogContainerEventWithAttributes(c, events.ActionExecDie, map[string]string{
   224  			"execID":   ei.ProcessID,
   225  			"exitCode": strconv.Itoa(exitCode),
   226  		})
   227  	case libcontainerdtypes.EventStart:
   228  		c.Lock()
   229  		defer c.Unlock()
   230  
   231  		// This is here to handle start not generated by docker
   232  		if !c.Running {
   233  			ctr, err := daemon.containerd.LoadContainer(context.Background(), c.ID)
   234  			if err != nil {
   235  				if errdefs.IsNotFound(err) {
   236  					// The container was started by not-docker and so could have been deleted by
   237  					// not-docker before we got around to loading it from containerd.
   238  					log.G(context.TODO()).WithFields(log.Fields{
   239  						"error":     err,
   240  						"container": c.ID,
   241  					}).Debug("could not load containerd container for start event")
   242  					return nil
   243  				}
   244  				return err
   245  			}
   246  			tsk, err := ctr.Task(context.Background())
   247  			if err != nil {
   248  				if errdefs.IsNotFound(err) {
   249  					log.G(context.TODO()).WithFields(log.Fields{
   250  						"error":     err,
   251  						"container": c.ID,
   252  					}).Debug("failed to load task for externally-started container")
   253  					return nil
   254  				}
   255  				return err
   256  			}
   257  			c.SetRunning(ctr, tsk, false)
   258  			c.HasBeenManuallyStopped = false
   259  			c.HasBeenStartedBefore = true
   260  			daemon.setStateCounter(c)
   261  
   262  			daemon.initHealthMonitor(c)
   263  
   264  			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   265  				return err
   266  			}
   267  			daemon.LogContainerEvent(c, events.ActionStart)
   268  		}
   269  
   270  	case libcontainerdtypes.EventPaused:
   271  		c.Lock()
   272  		defer c.Unlock()
   273  
   274  		if !c.Paused {
   275  			c.Paused = true
   276  			daemon.setStateCounter(c)
   277  			daemon.updateHealthMonitor(c)
   278  			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   279  				return err
   280  			}
   281  			daemon.LogContainerEvent(c, events.ActionPause)
   282  		}
   283  	case libcontainerdtypes.EventResumed:
   284  		c.Lock()
   285  		defer c.Unlock()
   286  
   287  		if c.Paused {
   288  			c.Paused = false
   289  			daemon.setStateCounter(c)
   290  			daemon.updateHealthMonitor(c)
   291  
   292  			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
   293  				return err
   294  			}
   295  			daemon.LogContainerEvent(c, events.ActionUnPause)
   296  		}
   297  	}
   298  	return nil
   299  }
   300  
   301  func (daemon *Daemon) autoRemove(cfg *config.Config, c *container.Container) {
   302  	c.Lock()
   303  	ar := c.HostConfig.AutoRemove
   304  	c.Unlock()
   305  	if !ar {
   306  		return
   307  	}
   308  
   309  	err := daemon.containerRm(cfg, c.ID, &backend.ContainerRmConfig{ForceRemove: true, RemoveVolume: true})
   310  	if err == nil {
   311  		return
   312  	}
   313  	if c := daemon.containers.Get(c.ID); c == nil {
   314  		return
   315  	}
   316  
   317  	log.G(context.TODO()).WithFields(log.Fields{"error": err, "container": c.ID}).Error("error removing container")
   318  }