github.com/erriapo/docker@v1.6.0-rc2/daemon/monitor.go (about) 1 package daemon 2 3 import ( 4 "io" 5 "os/exec" 6 "sync" 7 "time" 8 9 log "github.com/Sirupsen/logrus" 10 "github.com/docker/docker/daemon/execdriver" 11 "github.com/docker/docker/pkg/common" 12 "github.com/docker/docker/runconfig" 13 ) 14 15 const defaultTimeIncrement = 100 16 17 // containerMonitor monitors the execution of a container's main process. 18 // If a restart policy is specified for the container the monitor will ensure that the 19 // process is restarted based on the rules of the policy. When the container is finally stopped 20 // the monitor will reset and cleanup any of the container resources such as networking allocations 21 // and the rootfs 22 type containerMonitor struct { 23 mux sync.Mutex 24 25 // container is the container being monitored 26 container *Container 27 28 // restartPolicy is the current policy being applied to the container monitor 29 restartPolicy runconfig.RestartPolicy 30 31 // failureCount is the number of times the container has failed to 32 // start in a row 33 failureCount int 34 35 // shouldStop signals the monitor that the next time the container exits it is 36 // either because docker or the user asked for the container to be stopped 37 shouldStop bool 38 39 // startSignal is a channel that is closes after the container initially starts 40 startSignal chan struct{} 41 42 // stopChan is used to signal to the monitor whenever there is a wait for the 43 // next restart so that the timeIncrement is not honored and the user is not 44 // left waiting for nothing to happen during this time 45 stopChan chan struct{} 46 47 // timeIncrement is the amount of time to wait between restarts 48 // this is in milliseconds 49 timeIncrement int 50 51 // lastStartTime is the time which the monitor last exec'd the container's process 52 lastStartTime time.Time 53 } 54 55 // newContainerMonitor returns an initialized containerMonitor for the provided container 56 // honoring the provided restart policy 57 func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *containerMonitor { 58 return &containerMonitor{ 59 container: container, 60 restartPolicy: policy, 61 timeIncrement: defaultTimeIncrement, 62 stopChan: make(chan struct{}), 63 startSignal: make(chan struct{}), 64 } 65 } 66 67 // Stop signals to the container monitor that it should stop monitoring the container 68 // for exits the next time the process dies 69 func (m *containerMonitor) ExitOnNext() { 70 m.mux.Lock() 71 72 // we need to protect having a double close of the channel when stop is called 73 // twice or else we will get a panic 74 if !m.shouldStop { 75 m.shouldStop = true 76 close(m.stopChan) 77 } 78 79 m.mux.Unlock() 80 } 81 82 // Close closes the container's resources such as networking allocations and 83 // unmounts the contatiner's root filesystem 84 func (m *containerMonitor) Close() error { 85 // Cleanup networking and mounts 86 m.container.cleanup() 87 88 // FIXME: here is race condition between two RUN instructions in Dockerfile 89 // because they share same runconfig and change image. Must be fixed 90 // in builder/builder.go 91 if err := m.container.toDisk(); err != nil { 92 log.Errorf("Error dumping container %s state to disk: %s", m.container.ID, err) 93 94 return err 95 } 96 97 return nil 98 } 99 100 // Start starts the containers process and monitors it according to the restart policy 101 func (m *containerMonitor) Start() error { 102 var ( 103 err error 104 exitStatus execdriver.ExitStatus 105 // this variable indicates where we in execution flow: 106 // before Run or after 107 afterRun bool 108 ) 109 110 // ensure that when the monitor finally exits we release the networking and unmount the rootfs 111 defer func() { 112 if afterRun { 113 m.container.Lock() 114 m.container.setStopped(&exitStatus) 115 defer m.container.Unlock() 116 } 117 m.Close() 118 }() 119 120 // reset the restart count 121 m.container.RestartCount = -1 122 123 for { 124 m.container.RestartCount++ 125 126 if err := m.container.startLogging(); err != nil { 127 m.resetContainer(false) 128 129 return err 130 } 131 132 pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin) 133 134 m.container.LogEvent("start") 135 136 m.lastStartTime = time.Now() 137 138 if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil { 139 // if we receive an internal error from the initial start of a container then lets 140 // return it instead of entering the restart loop 141 if m.container.RestartCount == 0 { 142 m.container.ExitCode = -1 143 m.resetContainer(false) 144 145 return err 146 } 147 148 log.Errorf("Error running container: %s", err) 149 } 150 151 // here container.Lock is already lost 152 afterRun = true 153 154 m.resetMonitor(err == nil && exitStatus.ExitCode == 0) 155 156 if m.shouldRestart(exitStatus.ExitCode) { 157 m.container.SetRestarting(&exitStatus) 158 if exitStatus.OOMKilled { 159 m.container.LogEvent("oom") 160 } 161 m.container.LogEvent("die") 162 m.resetContainer(true) 163 164 // sleep with a small time increment between each restart to help avoid issues cased by quickly 165 // restarting the container because of some types of errors ( networking cut out, etc... ) 166 m.waitForNextRestart() 167 168 // we need to check this before reentering the loop because the waitForNextRestart could have 169 // been terminated by a request from a user 170 if m.shouldStop { 171 return err 172 } 173 continue 174 } 175 if exitStatus.OOMKilled { 176 m.container.LogEvent("oom") 177 } 178 m.container.LogEvent("die") 179 m.resetContainer(true) 180 return err 181 } 182 } 183 184 // resetMonitor resets the stateful fields on the containerMonitor based on the 185 // previous runs success or failure. Regardless of success, if the container had 186 // an execution time of more than 10s then reset the timer back to the default 187 func (m *containerMonitor) resetMonitor(successful bool) { 188 executionTime := time.Now().Sub(m.lastStartTime).Seconds() 189 190 if executionTime > 10 { 191 m.timeIncrement = defaultTimeIncrement 192 } else { 193 // otherwise we need to increment the amount of time we wait before restarting 194 // the process. We will build up by multiplying the increment by 2 195 m.timeIncrement *= 2 196 } 197 198 // the container exited successfully so we need to reset the failure counter 199 if successful { 200 m.failureCount = 0 201 } else { 202 m.failureCount++ 203 } 204 } 205 206 // waitForNextRestart waits with the default time increment to restart the container unless 207 // a user or docker asks for the container to be stopped 208 func (m *containerMonitor) waitForNextRestart() { 209 select { 210 case <-time.After(time.Duration(m.timeIncrement) * time.Millisecond): 211 case <-m.stopChan: 212 } 213 } 214 215 // shouldRestart checks the restart policy and applies the rules to determine if 216 // the container's process should be restarted 217 func (m *containerMonitor) shouldRestart(exitCode int) bool { 218 m.mux.Lock() 219 defer m.mux.Unlock() 220 221 // do not restart if the user or docker has requested that this container be stopped 222 if m.shouldStop { 223 return false 224 } 225 226 switch m.restartPolicy.Name { 227 case "always": 228 return true 229 case "on-failure": 230 // the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count 231 if max := m.restartPolicy.MaximumRetryCount; max != 0 && m.failureCount > max { 232 log.Debugf("stopping restart of container %s because maximum failure could of %d has been reached", 233 common.TruncateID(m.container.ID), max) 234 return false 235 } 236 237 return exitCode != 0 238 } 239 240 return false 241 } 242 243 // callback ensures that the container's state is properly updated after we 244 // received ack from the execution drivers 245 func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid int) { 246 if processConfig.Tty { 247 // The callback is called after the process Start() 248 // so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave 249 // which we close here. 250 if c, ok := processConfig.Stdout.(io.Closer); ok { 251 c.Close() 252 } 253 } 254 255 m.container.setRunning(pid) 256 257 // signal that the process has started 258 // close channel only if not closed 259 select { 260 case <-m.startSignal: 261 default: 262 close(m.startSignal) 263 } 264 265 if err := m.container.ToDisk(); err != nil { 266 log.Debugf("%s", err) 267 } 268 } 269 270 // resetContainer resets the container's IO and ensures that the command is able to be executed again 271 // by copying the data into a new struct 272 // if lock is true, then container locked during reset 273 func (m *containerMonitor) resetContainer(lock bool) { 274 container := m.container 275 if lock { 276 container.Lock() 277 defer container.Unlock() 278 } 279 280 if container.Config.OpenStdin { 281 if err := container.stdin.Close(); err != nil { 282 log.Errorf("%s: Error close stdin: %s", container.ID, err) 283 } 284 } 285 286 if err := container.stdout.Clean(); err != nil { 287 log.Errorf("%s: Error close stdout: %s", container.ID, err) 288 } 289 290 if err := container.stderr.Clean(); err != nil { 291 log.Errorf("%s: Error close stderr: %s", container.ID, err) 292 } 293 294 if container.command != nil && container.command.ProcessConfig.Terminal != nil { 295 if err := container.command.ProcessConfig.Terminal.Close(); err != nil { 296 log.Errorf("%s: Error closing terminal: %s", container.ID, err) 297 } 298 } 299 300 // Re-create a brand new stdin pipe once the container exited 301 if container.Config.OpenStdin { 302 container.stdin, container.stdinPipe = io.Pipe() 303 } 304 305 if container.logDriver != nil { 306 if container.logCopier != nil { 307 exit := make(chan struct{}) 308 go func() { 309 container.logCopier.Wait() 310 close(exit) 311 }() 312 select { 313 case <-time.After(1 * time.Second): 314 log.Warnf("Logger didn't exit in time: logs may be truncated") 315 case <-exit: 316 } 317 } 318 container.logDriver.Close() 319 container.logCopier = nil 320 container.logDriver = nil 321 } 322 323 c := container.command.ProcessConfig.Cmd 324 325 container.command.ProcessConfig.Cmd = exec.Cmd{ 326 Stdin: c.Stdin, 327 Stdout: c.Stdout, 328 Stderr: c.Stderr, 329 Path: c.Path, 330 Env: c.Env, 331 ExtraFiles: c.ExtraFiles, 332 Args: c.Args, 333 Dir: c.Dir, 334 SysProcAttr: c.SysProcAttr, 335 } 336 }