github.com/ncdc/docker@v0.10.1-0.20160129113957-6c6729ef5b74/daemon/execdriver/native/driver.go (about) 1 // +build linux,cgo 2 3 package native 4 5 import ( 6 "fmt" 7 "io" 8 "io/ioutil" 9 "os" 10 "os/exec" 11 "path/filepath" 12 "strings" 13 "sync" 14 "syscall" 15 "time" 16 17 "github.com/Sirupsen/logrus" 18 "github.com/docker/docker/daemon/execdriver" 19 "github.com/docker/docker/pkg/parsers" 20 "github.com/docker/docker/pkg/pools" 21 "github.com/docker/docker/pkg/reexec" 22 sysinfo "github.com/docker/docker/pkg/system" 23 "github.com/docker/docker/pkg/term" 24 aaprofile "github.com/docker/docker/profiles/apparmor" 25 "github.com/opencontainers/runc/libcontainer" 26 "github.com/opencontainers/runc/libcontainer/apparmor" 27 "github.com/opencontainers/runc/libcontainer/cgroups/systemd" 28 "github.com/opencontainers/runc/libcontainer/configs" 29 "github.com/opencontainers/runc/libcontainer/system" 30 "github.com/opencontainers/runc/libcontainer/utils" 31 ) 32 33 // Define constants for native driver 34 const ( 35 DriverName = "native" 36 Version = "0.2" 37 38 defaultApparmorProfile = "docker-default" 39 ) 40 41 // Driver contains all information for native driver, 42 // it implements execdriver.Driver. 43 type Driver struct { 44 root string 45 activeContainers map[string]libcontainer.Container 46 machineMemory int64 47 factory libcontainer.Factory 48 sync.Mutex 49 } 50 51 // NewDriver returns a new native driver, called from NewDriver of execdriver. 52 func NewDriver(root string, options []string) (*Driver, error) { 53 meminfo, err := sysinfo.ReadMemInfo() 54 if err != nil { 55 return nil, err 56 } 57 58 if err := sysinfo.MkdirAll(root, 0700); err != nil { 59 return nil, err 60 } 61 62 if apparmor.IsEnabled() { 63 if err := aaprofile.InstallDefault(defaultApparmorProfile); err != nil { 64 apparmorProfiles := []string{defaultApparmorProfile} 65 66 // Allow daemon to run if loading failed, but are active 67 // (possibly through another run, manually, or via system startup) 68 for _, policy := range apparmorProfiles { 69 if err := aaprofile.IsLoaded(policy); err != nil { 70 return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy) 71 } 72 } 73 } 74 } 75 76 // choose cgroup manager 77 // this makes sure there are no breaking changes to people 78 // who upgrade from versions without native.cgroupdriver opt 79 cgm := libcontainer.Cgroupfs 80 81 // parse the options 82 for _, option := range options { 83 key, val, err := parsers.ParseKeyValueOpt(option) 84 if err != nil { 85 return nil, err 86 } 87 key = strings.ToLower(key) 88 switch key { 89 case "native.cgroupdriver": 90 // override the default if they set options 91 switch val { 92 case "systemd": 93 if systemd.UseSystemd() { 94 cgm = libcontainer.SystemdCgroups 95 } else { 96 // warn them that they chose the wrong driver 97 logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead") 98 } 99 case "cgroupfs": 100 cgm = libcontainer.Cgroupfs 101 default: 102 return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val) 103 } 104 default: 105 return nil, fmt.Errorf("Unknown option %s\n", key) 106 } 107 } 108 109 f, err := libcontainer.New( 110 root, 111 cgm, 112 libcontainer.InitPath(reexec.Self(), DriverName), 113 ) 114 if err != nil { 115 return nil, err 116 } 117 118 return &Driver{ 119 root: root, 120 activeContainers: make(map[string]libcontainer.Container), 121 machineMemory: meminfo.MemTotal, 122 factory: f, 123 }, nil 124 } 125 126 type execOutput struct { 127 exitCode int 128 err error 129 } 130 131 // Run implements the exec driver Driver interface, 132 // it calls libcontainer APIs to run a container. 133 func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) { 134 destroyed := false 135 var err error 136 c.TmpDir, err = ioutil.TempDir("", c.ID) 137 if err != nil { 138 return execdriver.ExitStatus{ExitCode: -1}, err 139 } 140 defer os.RemoveAll(c.TmpDir) 141 142 // take the Command and populate the libcontainer.Config from it 143 container, err := d.createContainer(c, hooks) 144 if err != nil { 145 return execdriver.ExitStatus{ExitCode: -1}, err 146 } 147 148 p := &libcontainer.Process{ 149 Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...), 150 Env: c.ProcessConfig.Env, 151 Cwd: c.WorkingDir, 152 User: c.ProcessConfig.User, 153 } 154 155 if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil { 156 return execdriver.ExitStatus{ExitCode: -1}, err 157 } 158 159 cont, err := d.factory.Create(c.ID, container) 160 if err != nil { 161 return execdriver.ExitStatus{ExitCode: -1}, err 162 } 163 d.Lock() 164 d.activeContainers[c.ID] = cont 165 d.Unlock() 166 defer func() { 167 if !destroyed { 168 cont.Destroy() 169 } 170 d.cleanContainer(c.ID) 171 }() 172 173 if err := cont.Start(p); err != nil { 174 return execdriver.ExitStatus{ExitCode: -1}, err 175 } 176 177 // 'oom' is used to emit 'oom' events to the eventstream, 'oomKilled' is used 178 // to set the 'OOMKilled' flag in state 179 oom := notifyOnOOM(cont) 180 oomKilled := notifyOnOOM(cont) 181 if hooks.Start != nil { 182 pid, err := p.Pid() 183 if err != nil { 184 p.Signal(os.Kill) 185 p.Wait() 186 return execdriver.ExitStatus{ExitCode: -1}, err 187 } 188 hooks.Start(&c.ProcessConfig, pid, oom) 189 } 190 191 waitF := p.Wait 192 if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) { 193 // we need such hack for tracking processes with inherited fds, 194 // because cmd.Wait() waiting for all streams to be copied 195 waitF = waitInPIDHost(p, cont) 196 } 197 ps, err := waitF() 198 if err != nil { 199 execErr, ok := err.(*exec.ExitError) 200 if !ok { 201 return execdriver.ExitStatus{ExitCode: -1}, err 202 } 203 ps = execErr.ProcessState 204 } 205 cont.Destroy() 206 destroyed = true 207 // oomKilled will have an oom event if any process within the container was 208 // OOM killed at any time, not only if the init process OOMed. 209 // 210 // Perhaps we only want the OOMKilled flag to be set if the OOM 211 // resulted in a container death, but there isn't a good way to do this 212 // because the kernel's cgroup oom notification does not provide information 213 // such as the PID. This could be heuristically done by checking that the OOM 214 // happened within some very small time slice for the container dying (and 215 // optionally exit-code 137), but I don't think the cgroup oom notification 216 // can be used to reliably determine this 217 // 218 // Even if there were multiple OOMs, it's sufficient to read one value 219 // because libcontainer's oom notify will discard the channel after the 220 // cgroup is destroyed 221 _, oomKill := <-oomKilled 222 return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil 223 } 224 225 // notifyOnOOM returns a channel that signals if the container received an OOM notification 226 // for any process. If it is unable to subscribe to OOM notifications then a closed 227 // channel is returned as it will be non-blocking and return the correct result when read. 228 func notifyOnOOM(container libcontainer.Container) <-chan struct{} { 229 oom, err := container.NotifyOOM() 230 if err != nil { 231 logrus.Warnf("Your kernel does not support OOM notifications: %s", err) 232 c := make(chan struct{}) 233 close(c) 234 return c 235 } 236 return oom 237 } 238 239 func killCgroupProcs(c libcontainer.Container) { 240 var procs []*os.Process 241 if err := c.Pause(); err != nil { 242 logrus.Warn(err) 243 } 244 pids, err := c.Processes() 245 if err != nil { 246 // don't care about childs if we can't get them, this is mostly because cgroup already deleted 247 logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err) 248 } 249 for _, pid := range pids { 250 if p, err := os.FindProcess(pid); err == nil { 251 procs = append(procs, p) 252 if err := p.Kill(); err != nil { 253 logrus.Warn(err) 254 } 255 } 256 } 257 if err := c.Resume(); err != nil { 258 logrus.Warn(err) 259 } 260 for _, p := range procs { 261 if _, err := p.Wait(); err != nil { 262 logrus.Warn(err) 263 } 264 } 265 } 266 267 func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) { 268 return func() (*os.ProcessState, error) { 269 pid, err := p.Pid() 270 if err != nil { 271 return nil, err 272 } 273 274 process, err := os.FindProcess(pid) 275 s, err := process.Wait() 276 if err != nil { 277 execErr, ok := err.(*exec.ExitError) 278 if !ok { 279 return s, err 280 } 281 s = execErr.ProcessState 282 } 283 killCgroupProcs(c) 284 p.Wait() 285 return s, err 286 } 287 } 288 289 // Kill implements the exec driver Driver interface. 290 func (d *Driver) Kill(c *execdriver.Command, sig int) error { 291 d.Lock() 292 active := d.activeContainers[c.ID] 293 d.Unlock() 294 if active == nil { 295 return fmt.Errorf("active container for %s does not exist", c.ID) 296 } 297 state, err := active.State() 298 if err != nil { 299 return err 300 } 301 return syscall.Kill(state.InitProcessPid, syscall.Signal(sig)) 302 } 303 304 // Pause implements the exec driver Driver interface, 305 // it calls libcontainer API to pause a container. 306 func (d *Driver) Pause(c *execdriver.Command) error { 307 d.Lock() 308 active := d.activeContainers[c.ID] 309 d.Unlock() 310 if active == nil { 311 return fmt.Errorf("active container for %s does not exist", c.ID) 312 } 313 return active.Pause() 314 } 315 316 // Unpause implements the exec driver Driver interface, 317 // it calls libcontainer API to unpause a container. 318 func (d *Driver) Unpause(c *execdriver.Command) error { 319 d.Lock() 320 active := d.activeContainers[c.ID] 321 d.Unlock() 322 if active == nil { 323 return fmt.Errorf("active container for %s does not exist", c.ID) 324 } 325 return active.Resume() 326 } 327 328 // Terminate implements the exec driver Driver interface. 329 func (d *Driver) Terminate(c *execdriver.Command) error { 330 defer d.cleanContainer(c.ID) 331 container, err := d.factory.Load(c.ID) 332 if err != nil { 333 return err 334 } 335 defer container.Destroy() 336 state, err := container.State() 337 if err != nil { 338 return err 339 } 340 pid := state.InitProcessPid 341 currentStartTime, err := system.GetProcessStartTime(pid) 342 if err != nil { 343 return err 344 } 345 if state.InitProcessStartTime == currentStartTime { 346 err = syscall.Kill(pid, 9) 347 syscall.Wait4(pid, nil, 0, nil) 348 } 349 return err 350 } 351 352 // Info implements the exec driver Driver interface. 353 func (d *Driver) Info(id string) execdriver.Info { 354 return &info{ 355 ID: id, 356 driver: d, 357 } 358 } 359 360 // Name implements the exec driver Driver interface. 361 func (d *Driver) Name() string { 362 return fmt.Sprintf("%s-%s", DriverName, Version) 363 } 364 365 // GetPidsForContainer implements the exec driver Driver interface. 366 func (d *Driver) GetPidsForContainer(id string) ([]int, error) { 367 d.Lock() 368 active := d.activeContainers[id] 369 d.Unlock() 370 371 if active == nil { 372 return nil, fmt.Errorf("active container for %s does not exist", id) 373 } 374 return active.Processes() 375 } 376 377 func (d *Driver) cleanContainer(id string) error { 378 d.Lock() 379 delete(d.activeContainers, id) 380 d.Unlock() 381 return os.RemoveAll(filepath.Join(d.root, id)) 382 } 383 384 func (d *Driver) createContainerRoot(id string) error { 385 return os.MkdirAll(filepath.Join(d.root, id), 0655) 386 } 387 388 // Clean implements the exec driver Driver interface. 389 func (d *Driver) Clean(id string) error { 390 return os.RemoveAll(filepath.Join(d.root, id)) 391 } 392 393 // Stats implements the exec driver Driver interface. 394 func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) { 395 d.Lock() 396 c := d.activeContainers[id] 397 d.Unlock() 398 if c == nil { 399 return nil, execdriver.ErrNotRunning 400 } 401 now := time.Now() 402 stats, err := c.Stats() 403 if err != nil { 404 return nil, err 405 } 406 memoryLimit := c.Config().Cgroups.Resources.Memory 407 // if the container does not have any memory limit specified set the 408 // limit to the machines memory 409 if memoryLimit == 0 { 410 memoryLimit = d.machineMemory 411 } 412 return &execdriver.ResourceStats{ 413 Stats: stats, 414 Read: now, 415 MemoryLimit: memoryLimit, 416 }, nil 417 } 418 419 // Update updates configs for a container 420 func (d *Driver) Update(c *execdriver.Command) error { 421 d.Lock() 422 cont := d.activeContainers[c.ID] 423 d.Unlock() 424 if cont == nil { 425 return execdriver.ErrNotRunning 426 } 427 config := cont.Config() 428 if err := execdriver.SetupCgroups(&config, c); err != nil { 429 return err 430 } 431 432 if err := cont.Set(config); err != nil { 433 return err 434 } 435 436 return nil 437 } 438 439 // TtyConsole implements the exec driver Terminal interface. 440 type TtyConsole struct { 441 console libcontainer.Console 442 } 443 444 // NewTtyConsole returns a new TtyConsole struct. 445 func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes) (*TtyConsole, error) { 446 tty := &TtyConsole{ 447 console: console, 448 } 449 450 if err := tty.AttachPipes(pipes); err != nil { 451 tty.Close() 452 return nil, err 453 } 454 455 return tty, nil 456 } 457 458 // Resize implements Resize method of Terminal interface 459 func (t *TtyConsole) Resize(h, w int) error { 460 return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) 461 } 462 463 // AttachPipes attaches given pipes to TtyConsole 464 func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error { 465 go func() { 466 if wb, ok := pipes.Stdout.(interface { 467 CloseWriters() error 468 }); ok { 469 defer wb.CloseWriters() 470 } 471 472 pools.Copy(pipes.Stdout, t.console) 473 }() 474 475 if pipes.Stdin != nil { 476 go func() { 477 pools.Copy(t.console, pipes.Stdin) 478 479 pipes.Stdin.Close() 480 }() 481 } 482 483 return nil 484 } 485 486 // Close implements Close method of Terminal interface 487 func (t *TtyConsole) Close() error { 488 return t.console.Close() 489 } 490 491 func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error { 492 493 rootuid, err := container.HostUID() 494 if err != nil { 495 return err 496 } 497 498 if processConfig.Tty { 499 cons, err := p.NewConsole(rootuid) 500 if err != nil { 501 return err 502 } 503 term, err := NewTtyConsole(cons, pipes) 504 if err != nil { 505 return err 506 } 507 processConfig.Terminal = term 508 return nil 509 } 510 // not a tty--set up stdio pipes 511 term := &execdriver.StdConsole{} 512 processConfig.Terminal = term 513 514 // if we are not in a user namespace, there is no reason to go through 515 // the hassle of setting up os-level pipes with proper (remapped) ownership 516 // so we will do the prior shortcut for non-userns containers 517 if rootuid == 0 { 518 p.Stdout = pipes.Stdout 519 p.Stderr = pipes.Stderr 520 521 r, w, err := os.Pipe() 522 if err != nil { 523 return err 524 } 525 if pipes.Stdin != nil { 526 go func() { 527 io.Copy(w, pipes.Stdin) 528 w.Close() 529 }() 530 p.Stdin = r 531 } 532 return nil 533 } 534 535 // if we have user namespaces enabled (rootuid != 0), we will set 536 // up os pipes for stderr, stdout, stdin so we can chown them to 537 // the proper ownership to allow for proper access to the underlying 538 // fds 539 var fds []int 540 541 //setup stdout 542 r, w, err := os.Pipe() 543 if err != nil { 544 return err 545 } 546 fds = append(fds, int(r.Fd()), int(w.Fd())) 547 if pipes.Stdout != nil { 548 go io.Copy(pipes.Stdout, r) 549 } 550 term.Closers = append(term.Closers, r) 551 p.Stdout = w 552 553 //setup stderr 554 r, w, err = os.Pipe() 555 if err != nil { 556 return err 557 } 558 fds = append(fds, int(r.Fd()), int(w.Fd())) 559 if pipes.Stderr != nil { 560 go io.Copy(pipes.Stderr, r) 561 } 562 term.Closers = append(term.Closers, r) 563 p.Stderr = w 564 565 //setup stdin 566 r, w, err = os.Pipe() 567 if err != nil { 568 return err 569 } 570 fds = append(fds, int(r.Fd()), int(w.Fd())) 571 if pipes.Stdin != nil { 572 go func() { 573 io.Copy(w, pipes.Stdin) 574 w.Close() 575 }() 576 p.Stdin = r 577 } 578 for _, fd := range fds { 579 if err := syscall.Fchown(fd, rootuid, rootuid); err != nil { 580 return fmt.Errorf("Failed to chown pipes fd: %v", err) 581 } 582 } 583 return nil 584 } 585 586 // SupportsHooks implements the execdriver Driver interface. 587 // The libcontainer/runC-based native execdriver does exploit the hook mechanism 588 func (d *Driver) SupportsHooks() bool { 589 return true 590 }