github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/drivers/docker/handle.go (about)

     1  package docker
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"runtime"
     8  	"strings"
     9  	"sync"
    10  	"syscall"
    11  	"time"
    12  
    13  	"github.com/armon/circbuf"
    14  	docker "github.com/fsouza/go-dockerclient"
    15  	"github.com/hashicorp/consul-template/signals"
    16  	"github.com/hashicorp/go-hclog"
    17  	"github.com/hashicorp/go-plugin"
    18  
    19  	"github.com/hashicorp/nomad/drivers/docker/docklog"
    20  	"github.com/hashicorp/nomad/plugins/drivers"
    21  	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
    22  )
    23  
    24  type taskHandle struct {
    25  	client                *docker.Client
    26  	waitClient            *docker.Client
    27  	logger                hclog.Logger
    28  	dlogger               docklog.DockerLogger
    29  	dloggerPluginClient   *plugin.Client
    30  	task                  *drivers.TaskConfig
    31  	containerID           string
    32  	containerImage        string
    33  	doneCh                chan bool
    34  	waitCh                chan struct{}
    35  	removeContainerOnExit bool
    36  	net                   *drivers.DriverNetwork
    37  
    38  	exitResult     *drivers.ExitResult
    39  	exitResultLock sync.Mutex
    40  }
    41  
    42  func (h *taskHandle) ExitResult() *drivers.ExitResult {
    43  	h.exitResultLock.Lock()
    44  	defer h.exitResultLock.Unlock()
    45  	return h.exitResult.Copy()
    46  }
    47  
    48  type taskHandleState struct {
    49  	// ReattachConfig for the docker logger plugin
    50  	ReattachConfig *pstructs.ReattachConfig
    51  
    52  	ContainerID   string
    53  	DriverNetwork *drivers.DriverNetwork
    54  }
    55  
    56  func (h *taskHandle) buildState() *taskHandleState {
    57  	s := &taskHandleState{
    58  		ContainerID:   h.containerID,
    59  		DriverNetwork: h.net,
    60  	}
    61  	if h.dloggerPluginClient != nil {
    62  		s.ReattachConfig = pstructs.ReattachConfigFromGoPlugin(h.dloggerPluginClient.ReattachConfig())
    63  	}
    64  	return s
    65  }
    66  
    67  func (h *taskHandle) Exec(ctx context.Context, cmd string, args []string) (*drivers.ExecTaskResult, error) {
    68  	fullCmd := make([]string, len(args)+1)
    69  	fullCmd[0] = cmd
    70  	copy(fullCmd[1:], args)
    71  	createExecOpts := docker.CreateExecOptions{
    72  		AttachStdin:  false,
    73  		AttachStdout: true,
    74  		AttachStderr: true,
    75  		Tty:          false,
    76  		Cmd:          fullCmd,
    77  		Container:    h.containerID,
    78  		Context:      ctx,
    79  	}
    80  	exec, err := h.client.CreateExec(createExecOpts)
    81  	if err != nil {
    82  		return nil, err
    83  	}
    84  
    85  	execResult := &drivers.ExecTaskResult{ExitResult: &drivers.ExitResult{}}
    86  	stdout, _ := circbuf.NewBuffer(int64(drivers.CheckBufSize))
    87  	stderr, _ := circbuf.NewBuffer(int64(drivers.CheckBufSize))
    88  	startOpts := docker.StartExecOptions{
    89  		Detach:       false,
    90  		Tty:          false,
    91  		OutputStream: stdout,
    92  		ErrorStream:  stderr,
    93  		Context:      ctx,
    94  	}
    95  	if err := client.StartExec(exec.ID, startOpts); err != nil {
    96  		return nil, err
    97  	}
    98  	execResult.Stdout = stdout.Bytes()
    99  	execResult.Stderr = stderr.Bytes()
   100  	res, err := client.InspectExec(exec.ID)
   101  	if err != nil {
   102  		return execResult, err
   103  	}
   104  
   105  	execResult.ExitResult.ExitCode = res.ExitCode
   106  	return execResult, nil
   107  }
   108  
   109  func (h *taskHandle) Signal(ctx context.Context, s os.Signal) error {
   110  	// Convert types
   111  	sysSig, ok := s.(syscall.Signal)
   112  	if !ok {
   113  		return fmt.Errorf("Failed to determine signal number")
   114  	}
   115  
   116  	// TODO When we expose signals we will need a mapping layer that converts
   117  	// MacOS signals to the correct signal number for docker. Or we change the
   118  	// interface to take a signal string and leave it up to driver to map?
   119  
   120  	dockerSignal := docker.Signal(sysSig)
   121  	opts := docker.KillContainerOptions{
   122  		ID:      h.containerID,
   123  		Signal:  dockerSignal,
   124  		Context: ctx,
   125  	}
   126  	return h.client.KillContainer(opts)
   127  }
   128  
   129  // parseSignal interprets the signal name into an os.Signal. If no name is
   130  // provided, the docker driver defaults to SIGTERM. If the OS is Windows and
   131  // SIGINT is provided, the signal is converted to SIGTERM.
   132  func parseSignal(os, signal string) (os.Signal, error) {
   133  	// Unlike other drivers, docker defaults to SIGTERM, aiming for consistency
   134  	// with the 'docker stop' command.
   135  	// https://docs.docker.com/engine/reference/commandline/stop/#extended-description
   136  	if signal == "" {
   137  		signal = "SIGTERM"
   138  	}
   139  
   140  	// Windows Docker daemon does not support SIGINT, SIGTERM is the semantic equivalent that
   141  	// allows for graceful shutdown before being followed up by a SIGKILL.
   142  	// Supported signals:
   143  	//   https://github.com/moby/moby/blob/0111ee70874a4947d93f64b672f66a2a35071ee2/pkg/signal/signal_windows.go#L17-L26
   144  	if os == "windows" && signal == "SIGINT" {
   145  		signal = "SIGTERM"
   146  	}
   147  
   148  	return signals.Parse(signal)
   149  }
   150  
   151  // Kill is used to terminate the task.
   152  func (h *taskHandle) Kill(killTimeout time.Duration, signal string) error {
   153  	var err error
   154  	// Calling StopContainer lets docker handle the stop signal (specified
   155  	// in the Dockerfile or defaulting to SIGTERM). If kill_signal is specified,
   156  	// Signal is used to kill the container with the desired signal before
   157  	// calling StopContainer
   158  	if signal == "" {
   159  		err = h.client.StopContainer(h.containerID, uint(killTimeout.Seconds()))
   160  	} else {
   161  		ctx, cancel := context.WithTimeout(context.Background(), killTimeout)
   162  		defer cancel()
   163  
   164  		sig, parseErr := parseSignal(runtime.GOOS, signal)
   165  		if parseErr != nil {
   166  			return fmt.Errorf("failed to parse signal: %v", parseErr)
   167  		}
   168  
   169  		if err := h.Signal(ctx, sig); err != nil {
   170  			// Container has already been removed.
   171  			if strings.Contains(err.Error(), NoSuchContainerError) {
   172  				h.logger.Debug("attempted to signal nonexistent container")
   173  				return nil
   174  			}
   175  			// Container has already been stopped.
   176  			if strings.Contains(err.Error(), ContainerNotRunningError) {
   177  				h.logger.Debug("attempted to signal a not-running container")
   178  				return nil
   179  			}
   180  
   181  			h.logger.Error("failed to signal container while killing", "error", err)
   182  			return fmt.Errorf("Failed to signal container %q while killing: %v", h.containerID, err)
   183  		}
   184  
   185  		select {
   186  		case <-h.waitCh:
   187  			return nil
   188  		case <-ctx.Done():
   189  		}
   190  
   191  		// Stop the container
   192  		err = h.client.StopContainer(h.containerID, 0)
   193  	}
   194  
   195  	if err != nil {
   196  		// Container has already been removed.
   197  		if strings.Contains(err.Error(), NoSuchContainerError) {
   198  			h.logger.Debug("attempted to stop nonexistent container")
   199  			return nil
   200  		}
   201  		// Container has already been stopped.
   202  		if strings.Contains(err.Error(), ContainerNotRunningError) {
   203  			h.logger.Debug("attempted to stop an not-running container")
   204  			return nil
   205  		}
   206  
   207  		h.logger.Error("failed to stop container", "error", err)
   208  		return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err)
   209  	}
   210  
   211  	h.logger.Info("stopped container")
   212  	return nil
   213  }
   214  
   215  func (h *taskHandle) shutdownLogger() {
   216  	if h.dlogger == nil {
   217  		return
   218  	}
   219  
   220  	if err := h.dlogger.Stop(); err != nil {
   221  		h.logger.Error("failed to stop docker logger process during StopTask",
   222  			"error", err, "logger_pid", h.dloggerPluginClient.ReattachConfig().Pid)
   223  	}
   224  	h.dloggerPluginClient.Kill()
   225  }
   226  
   227  func (h *taskHandle) run() {
   228  	defer h.shutdownLogger()
   229  
   230  	exitCode, werr := h.waitClient.WaitContainer(h.containerID)
   231  	if werr != nil {
   232  		h.logger.Error("failed to wait for container; already terminated")
   233  	}
   234  
   235  	if exitCode != 0 {
   236  		werr = fmt.Errorf("Docker container exited with non-zero exit code: %d", exitCode)
   237  	}
   238  
   239  	container, ierr := h.waitClient.InspectContainerWithOptions(docker.InspectContainerOptions{
   240  		ID: h.containerID,
   241  	})
   242  	oom := false
   243  	if ierr != nil {
   244  		h.logger.Error("failed to inspect container", "error", ierr)
   245  	} else if container.State.OOMKilled {
   246  		// Note that with cgroups.v2 the cgroup OOM killer is not
   247  		// observed by docker container status. But we can't test the
   248  		// exit code, as 137 is used for any SIGKILL
   249  		oom = true
   250  		werr = fmt.Errorf("OOM Killed")
   251  	}
   252  
   253  	// Shutdown stats collection
   254  	close(h.doneCh)
   255  
   256  	// Stop the container just incase the docker daemon's wait returned
   257  	// incorrectly
   258  	if err := h.client.StopContainer(h.containerID, 0); err != nil {
   259  		_, noSuchContainer := err.(*docker.NoSuchContainer)
   260  		_, containerNotRunning := err.(*docker.ContainerNotRunning)
   261  		if !containerNotRunning && !noSuchContainer {
   262  			h.logger.Error("error stopping container", "error", err)
   263  		}
   264  	}
   265  
   266  	// Set the result
   267  	h.exitResultLock.Lock()
   268  	h.exitResult = &drivers.ExitResult{
   269  		ExitCode:  exitCode,
   270  		Signal:    0,
   271  		OOMKilled: oom,
   272  		Err:       werr,
   273  	}
   274  	h.exitResultLock.Unlock()
   275  	close(h.waitCh)
   276  }