github.com/endocode/docker@v1.4.2-0.20160113120958-46eb4700391e/daemon/execdriver/native/driver.go (about) 1 // +build linux,cgo 2 3 package native 4 5 import ( 6 "fmt" 7 "io" 8 "io/ioutil" 9 "os" 10 "os/exec" 11 "path/filepath" 12 "strings" 13 "sync" 14 "syscall" 15 "time" 16 17 "github.com/Sirupsen/logrus" 18 "github.com/docker/docker/daemon/execdriver" 19 "github.com/docker/docker/pkg/parsers" 20 "github.com/docker/docker/pkg/pools" 21 "github.com/docker/docker/pkg/reexec" 22 sysinfo "github.com/docker/docker/pkg/system" 23 "github.com/docker/docker/pkg/term" 24 "github.com/opencontainers/runc/libcontainer" 25 "github.com/opencontainers/runc/libcontainer/apparmor" 26 "github.com/opencontainers/runc/libcontainer/cgroups/systemd" 27 "github.com/opencontainers/runc/libcontainer/configs" 28 "github.com/opencontainers/runc/libcontainer/system" 29 "github.com/opencontainers/runc/libcontainer/utils" 30 ) 31 32 // Define constants for native driver 33 const ( 34 DriverName = "native" 35 Version = "0.2" 36 ) 37 38 // Driver contains all information for native driver, 39 // it implements execdriver.Driver. 40 type Driver struct { 41 root string 42 activeContainers map[string]libcontainer.Container 43 machineMemory int64 44 factory libcontainer.Factory 45 sync.Mutex 46 } 47 48 // NewDriver returns a new native driver, called from NewDriver of execdriver. 49 func NewDriver(root string, options []string) (*Driver, error) { 50 meminfo, err := sysinfo.ReadMemInfo() 51 if err != nil { 52 return nil, err 53 } 54 55 if err := sysinfo.MkdirAll(root, 0700); err != nil { 56 return nil, err 57 } 58 59 if apparmor.IsEnabled() { 60 if err := installAppArmorProfile(); err != nil { 61 apparmorProfiles := []string{"docker-default"} 62 63 // Allow daemon to run if loading failed, but are active 64 // (possibly through another run, manually, or via system startup) 65 for _, policy := range apparmorProfiles { 66 if err := hasAppArmorProfileLoaded(policy); err != nil { 67 return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy) 68 } 69 } 70 } 71 } 72 73 // choose cgroup manager 74 // this makes sure there are no breaking changes to people 75 // who upgrade from versions without native.cgroupdriver opt 76 cgm := libcontainer.Cgroupfs 77 78 // parse the options 79 for _, option := range options { 80 key, val, err := parsers.ParseKeyValueOpt(option) 81 if err != nil { 82 return nil, err 83 } 84 key = strings.ToLower(key) 85 switch key { 86 case "native.cgroupdriver": 87 // override the default if they set options 88 switch val { 89 case "systemd": 90 if systemd.UseSystemd() { 91 cgm = libcontainer.SystemdCgroups 92 } else { 93 // warn them that they chose the wrong driver 94 logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead") 95 } 96 case "cgroupfs": 97 cgm = libcontainer.Cgroupfs 98 default: 99 return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val) 100 } 101 default: 102 return nil, fmt.Errorf("Unknown option %s\n", key) 103 } 104 } 105 106 f, err := libcontainer.New( 107 root, 108 cgm, 109 libcontainer.InitPath(reexec.Self(), DriverName), 110 ) 111 if err != nil { 112 return nil, err 113 } 114 115 return &Driver{ 116 root: root, 117 activeContainers: make(map[string]libcontainer.Container), 118 machineMemory: meminfo.MemTotal, 119 factory: f, 120 }, nil 121 } 122 123 type execOutput struct { 124 exitCode int 125 err error 126 } 127 128 // Run implements the exec driver Driver interface, 129 // it calls libcontainer APIs to run a container. 130 func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) { 131 destroyed := false 132 var err error 133 c.TmpDir, err = ioutil.TempDir("", c.ID) 134 if err != nil { 135 return execdriver.ExitStatus{ExitCode: -1}, err 136 } 137 defer os.RemoveAll(c.TmpDir) 138 139 // take the Command and populate the libcontainer.Config from it 140 container, err := d.createContainer(c, hooks) 141 if err != nil { 142 return execdriver.ExitStatus{ExitCode: -1}, err 143 } 144 145 p := &libcontainer.Process{ 146 Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...), 147 Env: c.ProcessConfig.Env, 148 Cwd: c.WorkingDir, 149 User: c.ProcessConfig.User, 150 } 151 152 if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil { 153 return execdriver.ExitStatus{ExitCode: -1}, err 154 } 155 156 cont, err := d.factory.Create(c.ID, container) 157 if err != nil { 158 return execdriver.ExitStatus{ExitCode: -1}, err 159 } 160 d.Lock() 161 d.activeContainers[c.ID] = cont 162 d.Unlock() 163 defer func() { 164 if !destroyed { 165 cont.Destroy() 166 } 167 d.cleanContainer(c.ID) 168 }() 169 170 if err := cont.Start(p); err != nil { 171 return execdriver.ExitStatus{ExitCode: -1}, err 172 } 173 174 // 'oom' is used to emit 'oom' events to the eventstream, 'oomKilled' is used 175 // to set the 'OOMKilled' flag in state 176 oom := notifyOnOOM(cont) 177 oomKilled := notifyOnOOM(cont) 178 if hooks.Start != nil { 179 pid, err := p.Pid() 180 if err != nil { 181 p.Signal(os.Kill) 182 p.Wait() 183 return execdriver.ExitStatus{ExitCode: -1}, err 184 } 185 hooks.Start(&c.ProcessConfig, pid, oom) 186 } 187 188 waitF := p.Wait 189 if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) { 190 // we need such hack for tracking processes with inherited fds, 191 // because cmd.Wait() waiting for all streams to be copied 192 waitF = waitInPIDHost(p, cont) 193 } 194 ps, err := waitF() 195 if err != nil { 196 execErr, ok := err.(*exec.ExitError) 197 if !ok { 198 return execdriver.ExitStatus{ExitCode: -1}, err 199 } 200 ps = execErr.ProcessState 201 } 202 cont.Destroy() 203 destroyed = true 204 // oomKilled will have an oom event if any process within the container was 205 // OOM killed at any time, not only if the init process OOMed. 206 // 207 // Perhaps we only want the OOMKilled flag to be set if the OOM 208 // resulted in a container death, but there isn't a good way to do this 209 // because the kernel's cgroup oom notification does not provide information 210 // such as the PID. This could be heuristically done by checking that the OOM 211 // happened within some very small time slice for the container dying (and 212 // optionally exit-code 137), but I don't think the cgroup oom notification 213 // can be used to reliably determine this 214 // 215 // Even if there were multiple OOMs, it's sufficient to read one value 216 // because libcontainer's oom notify will discard the channel after the 217 // cgroup is destroyed 218 _, oomKill := <-oomKilled 219 return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil 220 } 221 222 // notifyOnOOM returns a channel that signals if the container received an OOM notification 223 // for any process. If it is unable to subscribe to OOM notifications then a closed 224 // channel is returned as it will be non-blocking and return the correct result when read. 225 func notifyOnOOM(container libcontainer.Container) <-chan struct{} { 226 oom, err := container.NotifyOOM() 227 if err != nil { 228 logrus.Warnf("Your kernel does not support OOM notifications: %s", err) 229 c := make(chan struct{}) 230 close(c) 231 return c 232 } 233 return oom 234 } 235 236 func killCgroupProcs(c libcontainer.Container) { 237 var procs []*os.Process 238 if err := c.Pause(); err != nil { 239 logrus.Warn(err) 240 } 241 pids, err := c.Processes() 242 if err != nil { 243 // don't care about childs if we can't get them, this is mostly because cgroup already deleted 244 logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err) 245 } 246 for _, pid := range pids { 247 if p, err := os.FindProcess(pid); err == nil { 248 procs = append(procs, p) 249 if err := p.Kill(); err != nil { 250 logrus.Warn(err) 251 } 252 } 253 } 254 if err := c.Resume(); err != nil { 255 logrus.Warn(err) 256 } 257 for _, p := range procs { 258 if _, err := p.Wait(); err != nil { 259 logrus.Warn(err) 260 } 261 } 262 } 263 264 func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) { 265 return func() (*os.ProcessState, error) { 266 pid, err := p.Pid() 267 if err != nil { 268 return nil, err 269 } 270 271 process, err := os.FindProcess(pid) 272 s, err := process.Wait() 273 if err != nil { 274 execErr, ok := err.(*exec.ExitError) 275 if !ok { 276 return s, err 277 } 278 s = execErr.ProcessState 279 } 280 killCgroupProcs(c) 281 p.Wait() 282 return s, err 283 } 284 } 285 286 // Kill implements the exec driver Driver interface. 287 func (d *Driver) Kill(c *execdriver.Command, sig int) error { 288 d.Lock() 289 active := d.activeContainers[c.ID] 290 d.Unlock() 291 if active == nil { 292 return fmt.Errorf("active container for %s does not exist", c.ID) 293 } 294 state, err := active.State() 295 if err != nil { 296 return err 297 } 298 return syscall.Kill(state.InitProcessPid, syscall.Signal(sig)) 299 } 300 301 // Pause implements the exec driver Driver interface, 302 // it calls libcontainer API to pause a container. 303 func (d *Driver) Pause(c *execdriver.Command) error { 304 d.Lock() 305 active := d.activeContainers[c.ID] 306 d.Unlock() 307 if active == nil { 308 return fmt.Errorf("active container for %s does not exist", c.ID) 309 } 310 return active.Pause() 311 } 312 313 // Unpause implements the exec driver Driver interface, 314 // it calls libcontainer API to unpause a container. 315 func (d *Driver) Unpause(c *execdriver.Command) error { 316 d.Lock() 317 active := d.activeContainers[c.ID] 318 d.Unlock() 319 if active == nil { 320 return fmt.Errorf("active container for %s does not exist", c.ID) 321 } 322 return active.Resume() 323 } 324 325 // Terminate implements the exec driver Driver interface. 326 func (d *Driver) Terminate(c *execdriver.Command) error { 327 defer d.cleanContainer(c.ID) 328 container, err := d.factory.Load(c.ID) 329 if err != nil { 330 return err 331 } 332 defer container.Destroy() 333 state, err := container.State() 334 if err != nil { 335 return err 336 } 337 pid := state.InitProcessPid 338 currentStartTime, err := system.GetProcessStartTime(pid) 339 if err != nil { 340 return err 341 } 342 if state.InitProcessStartTime == currentStartTime { 343 err = syscall.Kill(pid, 9) 344 syscall.Wait4(pid, nil, 0, nil) 345 } 346 return err 347 } 348 349 // Info implements the exec driver Driver interface. 350 func (d *Driver) Info(id string) execdriver.Info { 351 return &info{ 352 ID: id, 353 driver: d, 354 } 355 } 356 357 // Name implements the exec driver Driver interface. 358 func (d *Driver) Name() string { 359 return fmt.Sprintf("%s-%s", DriverName, Version) 360 } 361 362 // GetPidsForContainer implements the exec driver Driver interface. 363 func (d *Driver) GetPidsForContainer(id string) ([]int, error) { 364 d.Lock() 365 active := d.activeContainers[id] 366 d.Unlock() 367 368 if active == nil { 369 return nil, fmt.Errorf("active container for %s does not exist", id) 370 } 371 return active.Processes() 372 } 373 374 func (d *Driver) cleanContainer(id string) error { 375 d.Lock() 376 delete(d.activeContainers, id) 377 d.Unlock() 378 return os.RemoveAll(filepath.Join(d.root, id)) 379 } 380 381 func (d *Driver) createContainerRoot(id string) error { 382 return os.MkdirAll(filepath.Join(d.root, id), 0655) 383 } 384 385 // Clean implements the exec driver Driver interface. 386 func (d *Driver) Clean(id string) error { 387 return os.RemoveAll(filepath.Join(d.root, id)) 388 } 389 390 // Stats implements the exec driver Driver interface. 391 func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) { 392 d.Lock() 393 c := d.activeContainers[id] 394 d.Unlock() 395 if c == nil { 396 return nil, execdriver.ErrNotRunning 397 } 398 now := time.Now() 399 stats, err := c.Stats() 400 if err != nil { 401 return nil, err 402 } 403 memoryLimit := c.Config().Cgroups.Resources.Memory 404 // if the container does not have any memory limit specified set the 405 // limit to the machines memory 406 if memoryLimit == 0 { 407 memoryLimit = d.machineMemory 408 } 409 return &execdriver.ResourceStats{ 410 Stats: stats, 411 Read: now, 412 MemoryLimit: memoryLimit, 413 }, nil 414 } 415 416 // Update updates configs for a container 417 func (d *Driver) Update(c *execdriver.Command) error { 418 d.Lock() 419 cont := d.activeContainers[c.ID] 420 d.Unlock() 421 if cont == nil { 422 return execdriver.ErrNotRunning 423 } 424 config := cont.Config() 425 if err := execdriver.SetupCgroups(&config, c); err != nil { 426 return err 427 } 428 429 if err := cont.Set(config); err != nil { 430 return err 431 } 432 433 return nil 434 } 435 436 // TtyConsole implements the exec driver Terminal interface. 437 type TtyConsole struct { 438 console libcontainer.Console 439 } 440 441 // NewTtyConsole returns a new TtyConsole struct. 442 func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes) (*TtyConsole, error) { 443 tty := &TtyConsole{ 444 console: console, 445 } 446 447 if err := tty.AttachPipes(pipes); err != nil { 448 tty.Close() 449 return nil, err 450 } 451 452 return tty, nil 453 } 454 455 // Resize implements Resize method of Terminal interface 456 func (t *TtyConsole) Resize(h, w int) error { 457 return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) 458 } 459 460 // AttachPipes attaches given pipes to TtyConsole 461 func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error { 462 go func() { 463 if wb, ok := pipes.Stdout.(interface { 464 CloseWriters() error 465 }); ok { 466 defer wb.CloseWriters() 467 } 468 469 pools.Copy(pipes.Stdout, t.console) 470 }() 471 472 if pipes.Stdin != nil { 473 go func() { 474 pools.Copy(t.console, pipes.Stdin) 475 476 pipes.Stdin.Close() 477 }() 478 } 479 480 return nil 481 } 482 483 // Close implements Close method of Terminal interface 484 func (t *TtyConsole) Close() error { 485 return t.console.Close() 486 } 487 488 func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error { 489 490 rootuid, err := container.HostUID() 491 if err != nil { 492 return err 493 } 494 495 if processConfig.Tty { 496 cons, err := p.NewConsole(rootuid) 497 if err != nil { 498 return err 499 } 500 term, err := NewTtyConsole(cons, pipes) 501 if err != nil { 502 return err 503 } 504 processConfig.Terminal = term 505 return nil 506 } 507 // not a tty--set up stdio pipes 508 term := &execdriver.StdConsole{} 509 processConfig.Terminal = term 510 511 // if we are not in a user namespace, there is no reason to go through 512 // the hassle of setting up os-level pipes with proper (remapped) ownership 513 // so we will do the prior shortcut for non-userns containers 514 if rootuid == 0 { 515 p.Stdout = pipes.Stdout 516 p.Stderr = pipes.Stderr 517 518 r, w, err := os.Pipe() 519 if err != nil { 520 return err 521 } 522 if pipes.Stdin != nil { 523 go func() { 524 io.Copy(w, pipes.Stdin) 525 w.Close() 526 }() 527 p.Stdin = r 528 } 529 return nil 530 } 531 532 // if we have user namespaces enabled (rootuid != 0), we will set 533 // up os pipes for stderr, stdout, stdin so we can chown them to 534 // the proper ownership to allow for proper access to the underlying 535 // fds 536 var fds []int 537 538 //setup stdout 539 r, w, err := os.Pipe() 540 if err != nil { 541 return err 542 } 543 fds = append(fds, int(r.Fd()), int(w.Fd())) 544 if pipes.Stdout != nil { 545 go io.Copy(pipes.Stdout, r) 546 } 547 term.Closers = append(term.Closers, r) 548 p.Stdout = w 549 550 //setup stderr 551 r, w, err = os.Pipe() 552 if err != nil { 553 return err 554 } 555 fds = append(fds, int(r.Fd()), int(w.Fd())) 556 if pipes.Stderr != nil { 557 go io.Copy(pipes.Stderr, r) 558 } 559 term.Closers = append(term.Closers, r) 560 p.Stderr = w 561 562 //setup stdin 563 r, w, err = os.Pipe() 564 if err != nil { 565 return err 566 } 567 fds = append(fds, int(r.Fd()), int(w.Fd())) 568 if pipes.Stdin != nil { 569 go func() { 570 io.Copy(w, pipes.Stdin) 571 w.Close() 572 }() 573 p.Stdin = r 574 } 575 for _, fd := range fds { 576 if err := syscall.Fchown(fd, rootuid, rootuid); err != nil { 577 return fmt.Errorf("Failed to chown pipes fd: %v", err) 578 } 579 } 580 return nil 581 } 582 583 // SupportsHooks implements the execdriver Driver interface. 584 // The libcontainer/runC-based native execdriver does exploit the hook mechanism 585 func (d *Driver) SupportsHooks() bool { 586 return true 587 }