github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/drivers/docker/handle.go (about) 1 package docker 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "runtime" 8 "strings" 9 "sync" 10 "syscall" 11 "time" 12 13 "github.com/armon/circbuf" 14 docker "github.com/fsouza/go-dockerclient" 15 "github.com/hashicorp/consul-template/signals" 16 "github.com/hashicorp/go-hclog" 17 "github.com/hashicorp/go-plugin" 18 19 "github.com/hashicorp/nomad/drivers/docker/docklog" 20 "github.com/hashicorp/nomad/plugins/drivers" 21 pstructs "github.com/hashicorp/nomad/plugins/shared/structs" 22 ) 23 24 type taskHandle struct { 25 client *docker.Client 26 waitClient *docker.Client 27 logger hclog.Logger 28 dlogger docklog.DockerLogger 29 dloggerPluginClient *plugin.Client 30 task *drivers.TaskConfig 31 containerID string 32 containerImage string 33 doneCh chan bool 34 waitCh chan struct{} 35 removeContainerOnExit bool 36 net *drivers.DriverNetwork 37 38 exitResult *drivers.ExitResult 39 exitResultLock sync.Mutex 40 } 41 42 func (h *taskHandle) ExitResult() *drivers.ExitResult { 43 h.exitResultLock.Lock() 44 defer h.exitResultLock.Unlock() 45 return h.exitResult.Copy() 46 } 47 48 type taskHandleState struct { 49 // ReattachConfig for the docker logger plugin 50 ReattachConfig *pstructs.ReattachConfig 51 52 ContainerID string 53 DriverNetwork *drivers.DriverNetwork 54 } 55 56 func (h *taskHandle) buildState() *taskHandleState { 57 s := &taskHandleState{ 58 ContainerID: h.containerID, 59 DriverNetwork: h.net, 60 } 61 if h.dloggerPluginClient != nil { 62 s.ReattachConfig = pstructs.ReattachConfigFromGoPlugin(h.dloggerPluginClient.ReattachConfig()) 63 } 64 return s 65 } 66 67 func (h *taskHandle) Exec(ctx context.Context, cmd string, args []string) (*drivers.ExecTaskResult, error) { 68 fullCmd := make([]string, len(args)+1) 69 fullCmd[0] = cmd 70 copy(fullCmd[1:], args) 71 createExecOpts := docker.CreateExecOptions{ 72 AttachStdin: false, 73 AttachStdout: true, 74 AttachStderr: true, 75 Tty: false, 76 Cmd: fullCmd, 77 Container: h.containerID, 78 Context: ctx, 79 } 80 exec, err := h.client.CreateExec(createExecOpts) 81 if err != nil { 82 return nil, err 83 } 84 85 execResult := &drivers.ExecTaskResult{ExitResult: &drivers.ExitResult{}} 86 stdout, _ := circbuf.NewBuffer(int64(drivers.CheckBufSize)) 87 stderr, _ := circbuf.NewBuffer(int64(drivers.CheckBufSize)) 88 startOpts := docker.StartExecOptions{ 89 Detach: false, 90 Tty: false, 91 OutputStream: stdout, 92 ErrorStream: stderr, 93 Context: ctx, 94 } 95 if err := client.StartExec(exec.ID, startOpts); err != nil { 96 return nil, err 97 } 98 execResult.Stdout = stdout.Bytes() 99 execResult.Stderr = stderr.Bytes() 100 res, err := client.InspectExec(exec.ID) 101 if err != nil { 102 return execResult, err 103 } 104 105 execResult.ExitResult.ExitCode = res.ExitCode 106 return execResult, nil 107 } 108 109 func (h *taskHandle) Signal(ctx context.Context, s os.Signal) error { 110 // Convert types 111 sysSig, ok := s.(syscall.Signal) 112 if !ok { 113 return fmt.Errorf("Failed to determine signal number") 114 } 115 116 // TODO When we expose signals we will need a mapping layer that converts 117 // MacOS signals to the correct signal number for docker. Or we change the 118 // interface to take a signal string and leave it up to driver to map? 119 120 dockerSignal := docker.Signal(sysSig) 121 opts := docker.KillContainerOptions{ 122 ID: h.containerID, 123 Signal: dockerSignal, 124 Context: ctx, 125 } 126 return h.client.KillContainer(opts) 127 } 128 129 // parseSignal interprets the signal name into an os.Signal. If no name is 130 // provided, the docker driver defaults to SIGTERM. If the OS is Windows and 131 // SIGINT is provided, the signal is converted to SIGTERM. 132 func parseSignal(os, signal string) (os.Signal, error) { 133 // Unlike other drivers, docker defaults to SIGTERM, aiming for consistency 134 // with the 'docker stop' command. 135 // https://docs.docker.com/engine/reference/commandline/stop/#extended-description 136 if signal == "" { 137 signal = "SIGTERM" 138 } 139 140 // Windows Docker daemon does not support SIGINT, SIGTERM is the semantic equivalent that 141 // allows for graceful shutdown before being followed up by a SIGKILL. 142 // Supported signals: 143 // https://github.com/moby/moby/blob/0111ee70874a4947d93f64b672f66a2a35071ee2/pkg/signal/signal_windows.go#L17-L26 144 if os == "windows" && signal == "SIGINT" { 145 signal = "SIGTERM" 146 } 147 148 return signals.Parse(signal) 149 } 150 151 // Kill is used to terminate the task. 152 func (h *taskHandle) Kill(killTimeout time.Duration, signal string) error { 153 var err error 154 // Calling StopContainer lets docker handle the stop signal (specified 155 // in the Dockerfile or defaulting to SIGTERM). If kill_signal is specified, 156 // Signal is used to kill the container with the desired signal before 157 // calling StopContainer 158 if signal == "" { 159 err = h.client.StopContainer(h.containerID, uint(killTimeout.Seconds())) 160 } else { 161 ctx, cancel := context.WithTimeout(context.Background(), killTimeout) 162 defer cancel() 163 164 sig, parseErr := parseSignal(runtime.GOOS, signal) 165 if parseErr != nil { 166 return fmt.Errorf("failed to parse signal: %v", parseErr) 167 } 168 169 if err := h.Signal(ctx, sig); err != nil { 170 // Container has already been removed. 171 if strings.Contains(err.Error(), NoSuchContainerError) { 172 h.logger.Debug("attempted to signal nonexistent container") 173 return nil 174 } 175 // Container has already been stopped. 176 if strings.Contains(err.Error(), ContainerNotRunningError) { 177 h.logger.Debug("attempted to signal a not-running container") 178 return nil 179 } 180 181 h.logger.Error("failed to signal container while killing", "error", err) 182 return fmt.Errorf("Failed to signal container %q while killing: %v", h.containerID, err) 183 } 184 185 select { 186 case <-h.waitCh: 187 return nil 188 case <-ctx.Done(): 189 } 190 191 // Stop the container 192 err = h.client.StopContainer(h.containerID, 0) 193 } 194 195 if err != nil { 196 // Container has already been removed. 197 if strings.Contains(err.Error(), NoSuchContainerError) { 198 h.logger.Debug("attempted to stop nonexistent container") 199 return nil 200 } 201 // Container has already been stopped. 202 if strings.Contains(err.Error(), ContainerNotRunningError) { 203 h.logger.Debug("attempted to stop an not-running container") 204 return nil 205 } 206 207 h.logger.Error("failed to stop container", "error", err) 208 return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err) 209 } 210 211 h.logger.Info("stopped container") 212 return nil 213 } 214 215 func (h *taskHandle) shutdownLogger() { 216 if h.dlogger == nil { 217 return 218 } 219 220 if err := h.dlogger.Stop(); err != nil { 221 h.logger.Error("failed to stop docker logger process during StopTask", 222 "error", err, "logger_pid", h.dloggerPluginClient.ReattachConfig().Pid) 223 } 224 h.dloggerPluginClient.Kill() 225 } 226 227 func (h *taskHandle) run() { 228 defer h.shutdownLogger() 229 230 exitCode, werr := h.waitClient.WaitContainer(h.containerID) 231 if werr != nil { 232 h.logger.Error("failed to wait for container; already terminated") 233 } 234 235 if exitCode != 0 { 236 werr = fmt.Errorf("Docker container exited with non-zero exit code: %d", exitCode) 237 } 238 239 container, ierr := h.waitClient.InspectContainerWithOptions(docker.InspectContainerOptions{ 240 ID: h.containerID, 241 }) 242 oom := false 243 if ierr != nil { 244 h.logger.Error("failed to inspect container", "error", ierr) 245 } else if container.State.OOMKilled { 246 // Note that with cgroups.v2 the cgroup OOM killer is not 247 // observed by docker container status. But we can't test the 248 // exit code, as 137 is used for any SIGKILL 249 oom = true 250 werr = fmt.Errorf("OOM Killed") 251 } 252 253 // Shutdown stats collection 254 close(h.doneCh) 255 256 // Stop the container just incase the docker daemon's wait returned 257 // incorrectly 258 if err := h.client.StopContainer(h.containerID, 0); err != nil { 259 _, noSuchContainer := err.(*docker.NoSuchContainer) 260 _, containerNotRunning := err.(*docker.ContainerNotRunning) 261 if !containerNotRunning && !noSuchContainer { 262 h.logger.Error("error stopping container", "error", err) 263 } 264 } 265 266 // Set the result 267 h.exitResultLock.Lock() 268 h.exitResult = &drivers.ExitResult{ 269 ExitCode: exitCode, 270 Signal: 0, 271 OOMKilled: oom, 272 Err: werr, 273 } 274 h.exitResultLock.Unlock() 275 close(h.waitCh) 276 }