github.com/hustcat/docker@v1.3.3-0.20160314103604-901c67a8eeab/daemon/execdriver/native/driver.go (about) 1 // +build linux,cgo 2 3 package native 4 5 import ( 6 "fmt" 7 "io" 8 "io/ioutil" 9 "os" 10 "os/exec" 11 "path/filepath" 12 "strings" 13 "sync" 14 "syscall" 15 "time" 16 17 "github.com/Sirupsen/logrus" 18 "github.com/docker/docker/daemon/execdriver" 19 "github.com/docker/docker/pkg/parsers" 20 "github.com/docker/docker/pkg/pools" 21 "github.com/docker/docker/pkg/reexec" 22 sysinfo "github.com/docker/docker/pkg/system" 23 "github.com/docker/docker/pkg/term" 24 aaprofile "github.com/docker/docker/profiles/apparmor" 25 "github.com/opencontainers/runc/libcontainer" 26 "github.com/opencontainers/runc/libcontainer/apparmor" 27 "github.com/opencontainers/runc/libcontainer/cgroups/systemd" 28 "github.com/opencontainers/runc/libcontainer/configs" 29 "github.com/opencontainers/runc/libcontainer/system" 30 "github.com/opencontainers/runc/libcontainer/utils" 31 ) 32 33 // Define constants for native driver 34 const ( 35 DriverName = "native" 36 Version = "0.2" 37 38 defaultApparmorProfile = "docker-default" 39 ) 40 41 // Driver contains all information for native driver, 42 // it implements execdriver.Driver. 43 type Driver struct { 44 root string 45 activeContainers map[string]libcontainer.Container 46 machineMemory int64 47 factory libcontainer.Factory 48 sync.Mutex 49 } 50 51 // NewDriver returns a new native driver, called from NewDriver of execdriver. 52 func NewDriver(root string, options []string) (*Driver, error) { 53 meminfo, err := sysinfo.ReadMemInfo() 54 if err != nil { 55 return nil, err 56 } 57 58 if err := sysinfo.MkdirAll(root, 0700); err != nil { 59 return nil, err 60 } 61 62 if apparmor.IsEnabled() { 63 if err := aaprofile.InstallDefault(defaultApparmorProfile); err != nil { 64 apparmorProfiles := []string{defaultApparmorProfile} 65 66 // Allow daemon to run if loading failed, but are active 67 // (possibly through another run, manually, or via system startup) 68 for _, policy := range apparmorProfiles { 69 if err := aaprofile.IsLoaded(policy); err != nil { 70 return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy) 71 } 72 } 73 } 74 } 75 76 // choose cgroup manager 77 // this makes sure there are no breaking changes to people 78 // who upgrade from versions without native.cgroupdriver opt 79 cgm := libcontainer.Cgroupfs 80 81 // parse the options 82 for _, option := range options { 83 key, val, err := parsers.ParseKeyValueOpt(option) 84 if err != nil { 85 return nil, err 86 } 87 key = strings.ToLower(key) 88 switch key { 89 case "native.cgroupdriver": 90 // override the default if they set options 91 switch val { 92 case "systemd": 93 if systemd.UseSystemd() { 94 cgm = libcontainer.SystemdCgroups 95 } else { 96 // warn them that they chose the wrong driver 97 logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead") 98 } 99 case "cgroupfs": 100 cgm = libcontainer.Cgroupfs 101 default: 102 return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val) 103 } 104 default: 105 return nil, fmt.Errorf("Unknown option %s\n", key) 106 } 107 } 108 109 f, err := libcontainer.New( 110 root, 111 cgm, 112 libcontainer.InitPath(reexec.Self(), DriverName), 113 ) 114 if err != nil { 115 return nil, err 116 } 117 118 return &Driver{ 119 root: root, 120 activeContainers: make(map[string]libcontainer.Container), 121 machineMemory: meminfo.MemTotal, 122 factory: f, 123 }, nil 124 } 125 126 // Run implements the exec driver Driver interface, 127 // it calls libcontainer APIs to run a container. 128 func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) { 129 destroyed := false 130 var err error 131 c.TmpDir, err = ioutil.TempDir("", c.ID) 132 if err != nil { 133 return execdriver.ExitStatus{ExitCode: -1}, err 134 } 135 defer os.RemoveAll(c.TmpDir) 136 137 // take the Command and populate the libcontainer.Config from it 138 container, err := d.createContainer(c, hooks) 139 if err != nil { 140 return execdriver.ExitStatus{ExitCode: -1}, err 141 } 142 143 p := &libcontainer.Process{ 144 Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...), 145 Env: c.ProcessConfig.Env, 146 Cwd: c.WorkingDir, 147 User: c.ProcessConfig.User, 148 } 149 150 wg := sync.WaitGroup{} 151 writers, err := setupPipes(container, &c.ProcessConfig, p, pipes, &wg) 152 if err != nil { 153 return execdriver.ExitStatus{ExitCode: -1}, err 154 } 155 156 cont, err := d.factory.Create(c.ID, container) 157 if err != nil { 158 return execdriver.ExitStatus{ExitCode: -1}, err 159 } 160 161 if err := cont.Start(p); err != nil { 162 return execdriver.ExitStatus{ExitCode: -1}, err 163 } 164 d.Lock() 165 d.activeContainers[c.ID] = cont 166 d.Unlock() 167 defer func() { 168 if !destroyed { 169 cont.Destroy() 170 } 171 d.cleanContainer(c.ID) 172 }() 173 174 //close the write end of any opened pipes now that they are dup'ed into the container 175 for _, writer := range writers { 176 writer.Close() 177 } 178 // 'oom' is used to emit 'oom' events to the eventstream, 'oomKilled' is used 179 // to set the 'OOMKilled' flag in state 180 oom := notifyOnOOM(cont) 181 oomKilled := notifyOnOOM(cont) 182 if hooks.Start != nil { 183 pid, err := p.Pid() 184 if err != nil { 185 p.Signal(os.Kill) 186 p.Wait() 187 return execdriver.ExitStatus{ExitCode: -1}, err 188 } 189 hooks.Start(&c.ProcessConfig, pid, oom) 190 } 191 192 waitF := p.Wait 193 if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) { 194 // we need such hack for tracking processes with inherited fds, 195 // because cmd.Wait() waiting for all streams to be copied 196 waitF = waitInPIDHost(p, cont) 197 } 198 ps, err := waitF() 199 if err != nil { 200 execErr, ok := err.(*exec.ExitError) 201 if !ok { 202 return execdriver.ExitStatus{ExitCode: -1}, err 203 } 204 ps = execErr.ProcessState 205 } 206 // wait for all IO goroutine copiers to finish 207 wg.Wait() 208 209 cont.Destroy() 210 destroyed = true 211 // oomKilled will have an oom event if any process within the container was 212 // OOM killed at any time, not only if the init process OOMed. 213 // 214 // Perhaps we only want the OOMKilled flag to be set if the OOM 215 // resulted in a container death, but there isn't a good way to do this 216 // because the kernel's cgroup oom notification does not provide information 217 // such as the PID. This could be heuristically done by checking that the OOM 218 // happened within some very small time slice for the container dying (and 219 // optionally exit-code 137), but I don't think the cgroup oom notification 220 // can be used to reliably determine this 221 // 222 // Even if there were multiple OOMs, it's sufficient to read one value 223 // because libcontainer's oom notify will discard the channel after the 224 // cgroup is destroyed 225 _, oomKill := <-oomKilled 226 return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil 227 } 228 229 // notifyOnOOM returns a channel that signals if the container received an OOM notification 230 // for any process. If it is unable to subscribe to OOM notifications then a closed 231 // channel is returned as it will be non-blocking and return the correct result when read. 232 func notifyOnOOM(container libcontainer.Container) <-chan struct{} { 233 oom, err := container.NotifyOOM() 234 if err != nil { 235 logrus.Warnf("Your kernel does not support OOM notifications: %s", err) 236 c := make(chan struct{}) 237 close(c) 238 return c 239 } 240 return oom 241 } 242 243 func killCgroupProcs(c libcontainer.Container) { 244 var procs []*os.Process 245 if err := c.Pause(); err != nil { 246 logrus.Warn(err) 247 } 248 pids, err := c.Processes() 249 if err != nil { 250 // don't care about childs if we can't get them, this is mostly because cgroup already deleted 251 logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err) 252 } 253 for _, pid := range pids { 254 if p, err := os.FindProcess(pid); err == nil { 255 procs = append(procs, p) 256 if err := p.Kill(); err != nil { 257 logrus.Warn(err) 258 } 259 } 260 } 261 if err := c.Resume(); err != nil { 262 logrus.Warn(err) 263 } 264 for _, p := range procs { 265 if _, err := p.Wait(); err != nil { 266 logrus.Warn(err) 267 } 268 } 269 } 270 271 func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) { 272 return func() (*os.ProcessState, error) { 273 pid, err := p.Pid() 274 if err != nil { 275 return nil, err 276 } 277 278 process, err := os.FindProcess(pid) 279 s, err := process.Wait() 280 if err != nil { 281 execErr, ok := err.(*exec.ExitError) 282 if !ok { 283 return s, err 284 } 285 s = execErr.ProcessState 286 } 287 killCgroupProcs(c) 288 p.Wait() 289 return s, err 290 } 291 } 292 293 // Kill implements the exec driver Driver interface. 294 func (d *Driver) Kill(c *execdriver.Command, sig int) error { 295 d.Lock() 296 active := d.activeContainers[c.ID] 297 d.Unlock() 298 if active == nil { 299 return fmt.Errorf("active container for %s does not exist", c.ID) 300 } 301 state, err := active.State() 302 if err != nil { 303 return err 304 } 305 if state.InitProcessPid == -1 { 306 return fmt.Errorf("avoid sending signal %d to container %s with pid -1", sig, c.ID) 307 } 308 return syscall.Kill(state.InitProcessPid, syscall.Signal(sig)) 309 } 310 311 // Pause implements the exec driver Driver interface, 312 // it calls libcontainer API to pause a container. 313 func (d *Driver) Pause(c *execdriver.Command) error { 314 d.Lock() 315 active := d.activeContainers[c.ID] 316 d.Unlock() 317 if active == nil { 318 return fmt.Errorf("active container for %s does not exist", c.ID) 319 } 320 return active.Pause() 321 } 322 323 // Unpause implements the exec driver Driver interface, 324 // it calls libcontainer API to unpause a container. 325 func (d *Driver) Unpause(c *execdriver.Command) error { 326 d.Lock() 327 active := d.activeContainers[c.ID] 328 d.Unlock() 329 if active == nil { 330 return fmt.Errorf("active container for %s does not exist", c.ID) 331 } 332 return active.Resume() 333 } 334 335 // Terminate implements the exec driver Driver interface. 336 func (d *Driver) Terminate(c *execdriver.Command) error { 337 defer d.cleanContainer(c.ID) 338 container, err := d.factory.Load(c.ID) 339 if err != nil { 340 return err 341 } 342 defer container.Destroy() 343 state, err := container.State() 344 if err != nil { 345 return err 346 } 347 pid := state.InitProcessPid 348 currentStartTime, err := system.GetProcessStartTime(pid) 349 if err != nil { 350 return err 351 } 352 if state.InitProcessStartTime == currentStartTime { 353 err = syscall.Kill(pid, 9) 354 syscall.Wait4(pid, nil, 0, nil) 355 } 356 return err 357 } 358 359 // Name implements the exec driver Driver interface. 360 func (d *Driver) Name() string { 361 return fmt.Sprintf("%s-%s", DriverName, Version) 362 } 363 364 // GetPidsForContainer implements the exec driver Driver interface. 365 func (d *Driver) GetPidsForContainer(id string) ([]int, error) { 366 d.Lock() 367 active := d.activeContainers[id] 368 d.Unlock() 369 370 if active == nil { 371 return nil, fmt.Errorf("active container for %s does not exist", id) 372 } 373 return active.Processes() 374 } 375 376 func (d *Driver) cleanContainer(id string) error { 377 d.Lock() 378 delete(d.activeContainers, id) 379 d.Unlock() 380 return os.RemoveAll(filepath.Join(d.root, id)) 381 } 382 383 func (d *Driver) createContainerRoot(id string) error { 384 return os.MkdirAll(filepath.Join(d.root, id), 0655) 385 } 386 387 // Clean implements the exec driver Driver interface. 388 func (d *Driver) Clean(id string) error { 389 return os.RemoveAll(filepath.Join(d.root, id)) 390 } 391 392 // Stats implements the exec driver Driver interface. 393 func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) { 394 d.Lock() 395 c := d.activeContainers[id] 396 d.Unlock() 397 if c == nil { 398 return nil, execdriver.ErrNotRunning 399 } 400 now := time.Now() 401 stats, err := c.Stats() 402 if err != nil { 403 return nil, err 404 } 405 memoryLimit := c.Config().Cgroups.Resources.Memory 406 // if the container does not have any memory limit specified set the 407 // limit to the machines memory 408 if memoryLimit == 0 { 409 memoryLimit = d.machineMemory 410 } 411 return &execdriver.ResourceStats{ 412 Stats: stats, 413 Read: now, 414 MemoryLimit: memoryLimit, 415 }, nil 416 } 417 418 // Update updates configs for a container 419 func (d *Driver) Update(c *execdriver.Command) error { 420 d.Lock() 421 cont := d.activeContainers[c.ID] 422 d.Unlock() 423 if cont == nil { 424 return execdriver.ErrNotRunning 425 } 426 config := cont.Config() 427 if err := execdriver.SetupCgroups(&config, c); err != nil { 428 return err 429 } 430 431 if err := cont.Set(config); err != nil { 432 return err 433 } 434 435 return nil 436 } 437 438 // TtyConsole implements the exec driver Terminal interface. 439 type TtyConsole struct { 440 console libcontainer.Console 441 } 442 443 // NewTtyConsole returns a new TtyConsole struct. 444 func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes, wg *sync.WaitGroup) (*TtyConsole, error) { 445 tty := &TtyConsole{ 446 console: console, 447 } 448 449 if err := tty.AttachPipes(pipes, wg); err != nil { 450 tty.Close() 451 return nil, err 452 } 453 454 return tty, nil 455 } 456 457 // Resize implements Resize method of Terminal interface 458 func (t *TtyConsole) Resize(h, w int) error { 459 return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) 460 } 461 462 // AttachPipes attaches given pipes to TtyConsole 463 func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes, wg *sync.WaitGroup) error { 464 wg.Add(1) 465 go func() { 466 defer wg.Done() 467 if wb, ok := pipes.Stdout.(interface { 468 CloseWriters() error 469 }); ok { 470 defer wb.CloseWriters() 471 } 472 473 pools.Copy(pipes.Stdout, t.console) 474 }() 475 476 if pipes.Stdin != nil { 477 go func() { 478 pools.Copy(t.console, pipes.Stdin) 479 480 pipes.Stdin.Close() 481 }() 482 } 483 484 return nil 485 } 486 487 // Close implements Close method of Terminal interface 488 func (t *TtyConsole) Close() error { 489 return t.console.Close() 490 } 491 492 func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes, wg *sync.WaitGroup) ([]io.WriteCloser, error) { 493 494 writers := []io.WriteCloser{} 495 496 rootuid, err := container.HostUID() 497 if err != nil { 498 return writers, err 499 } 500 501 if processConfig.Tty { 502 cons, err := p.NewConsole(rootuid) 503 if err != nil { 504 return writers, err 505 } 506 term, err := NewTtyConsole(cons, pipes, wg) 507 if err != nil { 508 return writers, err 509 } 510 processConfig.Terminal = term 511 return writers, nil 512 } 513 // not a tty--set up stdio pipes 514 term := &execdriver.StdConsole{} 515 processConfig.Terminal = term 516 517 // if we are not in a user namespace, there is no reason to go through 518 // the hassle of setting up os-level pipes with proper (remapped) ownership 519 // so we will do the prior shortcut for non-userns containers 520 if rootuid == 0 { 521 p.Stdout = pipes.Stdout 522 p.Stderr = pipes.Stderr 523 524 r, w, err := os.Pipe() 525 if err != nil { 526 return writers, err 527 } 528 if pipes.Stdin != nil { 529 go func() { 530 io.Copy(w, pipes.Stdin) 531 w.Close() 532 }() 533 p.Stdin = r 534 } 535 return writers, nil 536 } 537 538 // if we have user namespaces enabled (rootuid != 0), we will set 539 // up os pipes for stderr, stdout, stdin so we can chown them to 540 // the proper ownership to allow for proper access to the underlying 541 // fds 542 var fds []uintptr 543 544 copyPipes := func(out io.Writer, in io.ReadCloser) { 545 defer wg.Done() 546 io.Copy(out, in) 547 in.Close() 548 } 549 550 //setup stdout 551 r, w, err := os.Pipe() 552 if err != nil { 553 w.Close() 554 return writers, err 555 } 556 writers = append(writers, w) 557 fds = append(fds, r.Fd(), w.Fd()) 558 if pipes.Stdout != nil { 559 wg.Add(1) 560 go copyPipes(pipes.Stdout, r) 561 } 562 term.Closers = append(term.Closers, r) 563 p.Stdout = w 564 565 //setup stderr 566 r, w, err = os.Pipe() 567 if err != nil { 568 w.Close() 569 return writers, err 570 } 571 writers = append(writers, w) 572 fds = append(fds, r.Fd(), w.Fd()) 573 if pipes.Stderr != nil { 574 wg.Add(1) 575 go copyPipes(pipes.Stderr, r) 576 } 577 term.Closers = append(term.Closers, r) 578 p.Stderr = w 579 580 //setup stdin 581 r, w, err = os.Pipe() 582 if err != nil { 583 r.Close() 584 return writers, err 585 } 586 fds = append(fds, r.Fd(), w.Fd()) 587 if pipes.Stdin != nil { 588 go func() { 589 io.Copy(w, pipes.Stdin) 590 w.Close() 591 }() 592 p.Stdin = r 593 } 594 for _, fd := range fds { 595 if err := syscall.Fchown(int(fd), rootuid, rootuid); err != nil { 596 return writers, fmt.Errorf("Failed to chown pipes fd: %v", err) 597 } 598 } 599 return writers, nil 600 } 601 602 // SupportsHooks implements the execdriver Driver interface. 603 // The libcontainer/runC-based native execdriver does exploit the hook mechanism 604 func (d *Driver) SupportsHooks() bool { 605 return true 606 }