github.com/goern/docker@v1.9.0-rc1/daemon/execdriver/native/driver.go (about) 1 // +build linux,cgo 2 3 package native 4 5 import ( 6 "fmt" 7 "io" 8 "os" 9 "os/exec" 10 "path/filepath" 11 "strings" 12 "sync" 13 "syscall" 14 "time" 15 16 "github.com/Sirupsen/logrus" 17 "github.com/docker/docker/daemon/execdriver" 18 "github.com/docker/docker/pkg/parsers" 19 "github.com/docker/docker/pkg/pools" 20 "github.com/docker/docker/pkg/reexec" 21 sysinfo "github.com/docker/docker/pkg/system" 22 "github.com/docker/docker/pkg/term" 23 "github.com/opencontainers/runc/libcontainer" 24 "github.com/opencontainers/runc/libcontainer/apparmor" 25 "github.com/opencontainers/runc/libcontainer/cgroups/systemd" 26 "github.com/opencontainers/runc/libcontainer/configs" 27 "github.com/opencontainers/runc/libcontainer/system" 28 "github.com/opencontainers/runc/libcontainer/utils" 29 ) 30 31 // Define constants for native driver 32 const ( 33 DriverName = "native" 34 Version = "0.2" 35 ) 36 37 // Driver contains all information for native driver, 38 // it implements execdriver.Driver. 39 type Driver struct { 40 root string 41 initPath string 42 activeContainers map[string]libcontainer.Container 43 machineMemory int64 44 factory libcontainer.Factory 45 sync.Mutex 46 } 47 48 // NewDriver returns a new native driver, called from NewDriver of execdriver. 49 func NewDriver(root, initPath string, options []string) (*Driver, error) { 50 meminfo, err := sysinfo.ReadMemInfo() 51 if err != nil { 52 return nil, err 53 } 54 55 if err := sysinfo.MkdirAll(root, 0700); err != nil { 56 return nil, err 57 } 58 59 if apparmor.IsEnabled() { 60 if err := installAppArmorProfile(); err != nil { 61 apparmorProfiles := []string{"docker-default"} 62 63 // Allow daemon to run if loading failed, but are active 64 // (possibly through another run, manually, or via system startup) 65 for _, policy := range apparmorProfiles { 66 if err := hasAppArmorProfileLoaded(policy); err != nil { 67 return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy) 68 } 69 } 70 } 71 } 72 73 // choose cgroup manager 74 // this makes sure there are no breaking changes to people 75 // who upgrade from versions without native.cgroupdriver opt 76 cgm := libcontainer.Cgroupfs 77 if systemd.UseSystemd() { 78 cgm = libcontainer.SystemdCgroups 79 } 80 81 // parse the options 82 for _, option := range options { 83 key, val, err := parsers.ParseKeyValueOpt(option) 84 if err != nil { 85 return nil, err 86 } 87 key = strings.ToLower(key) 88 switch key { 89 case "native.cgroupdriver": 90 // override the default if they set options 91 switch val { 92 case "systemd": 93 if systemd.UseSystemd() { 94 cgm = libcontainer.SystemdCgroups 95 } else { 96 // warn them that they chose the wrong driver 97 logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead") 98 } 99 case "cgroupfs": 100 cgm = libcontainer.Cgroupfs 101 default: 102 return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val) 103 } 104 default: 105 return nil, fmt.Errorf("Unknown option %s\n", key) 106 } 107 } 108 109 f, err := libcontainer.New( 110 root, 111 cgm, 112 libcontainer.InitPath(reexec.Self(), DriverName), 113 ) 114 if err != nil { 115 return nil, err 116 } 117 118 return &Driver{ 119 root: root, 120 initPath: initPath, 121 activeContainers: make(map[string]libcontainer.Container), 122 machineMemory: meminfo.MemTotal, 123 factory: f, 124 }, nil 125 } 126 127 type execOutput struct { 128 exitCode int 129 err error 130 } 131 132 // Run implements the exec driver Driver interface, 133 // it calls libcontainer APIs to run a container. 134 func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) { 135 // take the Command and populate the libcontainer.Config from it 136 container, err := d.createContainer(c, hooks) 137 if err != nil { 138 return execdriver.ExitStatus{ExitCode: -1}, err 139 } 140 141 p := &libcontainer.Process{ 142 Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...), 143 Env: c.ProcessConfig.Env, 144 Cwd: c.WorkingDir, 145 User: c.ProcessConfig.User, 146 } 147 148 if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil { 149 return execdriver.ExitStatus{ExitCode: -1}, err 150 } 151 152 cont, err := d.factory.Create(c.ID, container) 153 if err != nil { 154 return execdriver.ExitStatus{ExitCode: -1}, err 155 } 156 d.Lock() 157 d.activeContainers[c.ID] = cont 158 d.Unlock() 159 defer func() { 160 cont.Destroy() 161 d.cleanContainer(c.ID) 162 }() 163 164 if err := cont.Start(p); err != nil { 165 return execdriver.ExitStatus{ExitCode: -1}, err 166 } 167 168 oom := notifyOnOOM(cont) 169 if hooks.Start != nil { 170 171 pid, err := p.Pid() 172 if err != nil { 173 p.Signal(os.Kill) 174 p.Wait() 175 return execdriver.ExitStatus{ExitCode: -1}, err 176 } 177 hooks.Start(&c.ProcessConfig, pid, oom) 178 } 179 180 waitF := p.Wait 181 if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) { 182 // we need such hack for tracking processes with inherited fds, 183 // because cmd.Wait() waiting for all streams to be copied 184 waitF = waitInPIDHost(p, cont) 185 } 186 ps, err := waitF() 187 if err != nil { 188 execErr, ok := err.(*exec.ExitError) 189 if !ok { 190 return execdriver.ExitStatus{ExitCode: -1}, err 191 } 192 ps = execErr.ProcessState 193 } 194 cont.Destroy() 195 _, oomKill := <-oom 196 return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil 197 } 198 199 // notifyOnOOM returns a channel that signals if the container received an OOM notification 200 // for any process. If it is unable to subscribe to OOM notifications then a closed 201 // channel is returned as it will be non-blocking and return the correct result when read. 202 func notifyOnOOM(container libcontainer.Container) <-chan struct{} { 203 oom, err := container.NotifyOOM() 204 if err != nil { 205 logrus.Warnf("Your kernel does not support OOM notifications: %s", err) 206 c := make(chan struct{}) 207 close(c) 208 return c 209 } 210 return oom 211 } 212 213 func killCgroupProcs(c libcontainer.Container) { 214 var procs []*os.Process 215 if err := c.Pause(); err != nil { 216 logrus.Warn(err) 217 } 218 pids, err := c.Processes() 219 if err != nil { 220 // don't care about childs if we can't get them, this is mostly because cgroup already deleted 221 logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err) 222 } 223 for _, pid := range pids { 224 if p, err := os.FindProcess(pid); err == nil { 225 procs = append(procs, p) 226 if err := p.Kill(); err != nil { 227 logrus.Warn(err) 228 } 229 } 230 } 231 if err := c.Resume(); err != nil { 232 logrus.Warn(err) 233 } 234 for _, p := range procs { 235 if _, err := p.Wait(); err != nil { 236 logrus.Warn(err) 237 } 238 } 239 } 240 241 func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) { 242 return func() (*os.ProcessState, error) { 243 pid, err := p.Pid() 244 if err != nil { 245 return nil, err 246 } 247 248 process, err := os.FindProcess(pid) 249 s, err := process.Wait() 250 if err != nil { 251 execErr, ok := err.(*exec.ExitError) 252 if !ok { 253 return s, err 254 } 255 s = execErr.ProcessState 256 } 257 killCgroupProcs(c) 258 p.Wait() 259 return s, err 260 } 261 } 262 263 // Kill implements the exec driver Driver interface. 264 func (d *Driver) Kill(c *execdriver.Command, sig int) error { 265 d.Lock() 266 active := d.activeContainers[c.ID] 267 d.Unlock() 268 if active == nil { 269 return fmt.Errorf("active container for %s does not exist", c.ID) 270 } 271 state, err := active.State() 272 if err != nil { 273 return err 274 } 275 return syscall.Kill(state.InitProcessPid, syscall.Signal(sig)) 276 } 277 278 // Pause implements the exec driver Driver interface, 279 // it calls libcontainer API to pause a container. 280 func (d *Driver) Pause(c *execdriver.Command) error { 281 d.Lock() 282 active := d.activeContainers[c.ID] 283 d.Unlock() 284 if active == nil { 285 return fmt.Errorf("active container for %s does not exist", c.ID) 286 } 287 return active.Pause() 288 } 289 290 // Unpause implements the exec driver Driver interface, 291 // it calls libcontainer API to unpause a container. 292 func (d *Driver) Unpause(c *execdriver.Command) error { 293 d.Lock() 294 active := d.activeContainers[c.ID] 295 d.Unlock() 296 if active == nil { 297 return fmt.Errorf("active container for %s does not exist", c.ID) 298 } 299 return active.Resume() 300 } 301 302 // Terminate implements the exec driver Driver interface. 303 func (d *Driver) Terminate(c *execdriver.Command) error { 304 defer d.cleanContainer(c.ID) 305 container, err := d.factory.Load(c.ID) 306 if err != nil { 307 return err 308 } 309 defer container.Destroy() 310 state, err := container.State() 311 if err != nil { 312 return err 313 } 314 pid := state.InitProcessPid 315 currentStartTime, err := system.GetProcessStartTime(pid) 316 if err != nil { 317 return err 318 } 319 if state.InitProcessStartTime == currentStartTime { 320 err = syscall.Kill(pid, 9) 321 syscall.Wait4(pid, nil, 0, nil) 322 } 323 return err 324 } 325 326 // Info implements the exec driver Driver interface. 327 func (d *Driver) Info(id string) execdriver.Info { 328 return &info{ 329 ID: id, 330 driver: d, 331 } 332 } 333 334 // Name implements the exec driver Driver interface. 335 func (d *Driver) Name() string { 336 return fmt.Sprintf("%s-%s", DriverName, Version) 337 } 338 339 // GetPidsForContainer implements the exec driver Driver interface. 340 func (d *Driver) GetPidsForContainer(id string) ([]int, error) { 341 d.Lock() 342 active := d.activeContainers[id] 343 d.Unlock() 344 345 if active == nil { 346 return nil, fmt.Errorf("active container for %s does not exist", id) 347 } 348 return active.Processes() 349 } 350 351 func (d *Driver) cleanContainer(id string) error { 352 d.Lock() 353 delete(d.activeContainers, id) 354 d.Unlock() 355 return os.RemoveAll(filepath.Join(d.root, id)) 356 } 357 358 func (d *Driver) createContainerRoot(id string) error { 359 return os.MkdirAll(filepath.Join(d.root, id), 0655) 360 } 361 362 // Clean implements the exec driver Driver interface. 363 func (d *Driver) Clean(id string) error { 364 return os.RemoveAll(filepath.Join(d.root, id)) 365 } 366 367 // Stats implements the exec driver Driver interface. 368 func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) { 369 d.Lock() 370 c := d.activeContainers[id] 371 d.Unlock() 372 if c == nil { 373 return nil, execdriver.ErrNotRunning 374 } 375 now := time.Now() 376 stats, err := c.Stats() 377 if err != nil { 378 return nil, err 379 } 380 memoryLimit := c.Config().Cgroups.Memory 381 // if the container does not have any memory limit specified set the 382 // limit to the machines memory 383 if memoryLimit == 0 { 384 memoryLimit = d.machineMemory 385 } 386 return &execdriver.ResourceStats{ 387 Stats: stats, 388 Read: now, 389 MemoryLimit: memoryLimit, 390 }, nil 391 } 392 393 // TtyConsole implements the exec driver Terminal interface. 394 type TtyConsole struct { 395 console libcontainer.Console 396 } 397 398 // NewTtyConsole returns a new TtyConsole struct. 399 func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes) (*TtyConsole, error) { 400 tty := &TtyConsole{ 401 console: console, 402 } 403 404 if err := tty.AttachPipes(pipes); err != nil { 405 tty.Close() 406 return nil, err 407 } 408 409 return tty, nil 410 } 411 412 // Resize implements Resize method of Terminal interface 413 func (t *TtyConsole) Resize(h, w int) error { 414 return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) 415 } 416 417 // AttachPipes attaches given pipes to TtyConsole 418 func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error { 419 go func() { 420 if wb, ok := pipes.Stdout.(interface { 421 CloseWriters() error 422 }); ok { 423 defer wb.CloseWriters() 424 } 425 426 pools.Copy(pipes.Stdout, t.console) 427 }() 428 429 if pipes.Stdin != nil { 430 go func() { 431 pools.Copy(t.console, pipes.Stdin) 432 433 pipes.Stdin.Close() 434 }() 435 } 436 437 return nil 438 } 439 440 // Close implements Close method of Terminal interface 441 func (t *TtyConsole) Close() error { 442 return t.console.Close() 443 } 444 445 func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error { 446 447 rootuid, err := container.HostUID() 448 if err != nil { 449 return err 450 } 451 452 if processConfig.Tty { 453 cons, err := p.NewConsole(rootuid) 454 if err != nil { 455 return err 456 } 457 term, err := NewTtyConsole(cons, pipes) 458 if err != nil { 459 return err 460 } 461 processConfig.Terminal = term 462 return nil 463 } 464 // not a tty--set up stdio pipes 465 term := &execdriver.StdConsole{} 466 processConfig.Terminal = term 467 468 // if we are not in a user namespace, there is no reason to go through 469 // the hassle of setting up os-level pipes with proper (remapped) ownership 470 // so we will do the prior shortcut for non-userns containers 471 if rootuid == 0 { 472 p.Stdout = pipes.Stdout 473 p.Stderr = pipes.Stderr 474 475 r, w, err := os.Pipe() 476 if err != nil { 477 return err 478 } 479 if pipes.Stdin != nil { 480 go func() { 481 io.Copy(w, pipes.Stdin) 482 w.Close() 483 }() 484 p.Stdin = r 485 } 486 return nil 487 } 488 489 // if we have user namespaces enabled (rootuid != 0), we will set 490 // up os pipes for stderr, stdout, stdin so we can chown them to 491 // the proper ownership to allow for proper access to the underlying 492 // fds 493 var fds []int 494 495 //setup stdout 496 r, w, err := os.Pipe() 497 if err != nil { 498 return err 499 } 500 fds = append(fds, int(r.Fd()), int(w.Fd())) 501 if pipes.Stdout != nil { 502 go io.Copy(pipes.Stdout, r) 503 } 504 term.Closers = append(term.Closers, r) 505 p.Stdout = w 506 507 //setup stderr 508 r, w, err = os.Pipe() 509 if err != nil { 510 return err 511 } 512 fds = append(fds, int(r.Fd()), int(w.Fd())) 513 if pipes.Stderr != nil { 514 go io.Copy(pipes.Stderr, r) 515 } 516 term.Closers = append(term.Closers, r) 517 p.Stderr = w 518 519 //setup stdin 520 r, w, err = os.Pipe() 521 if err != nil { 522 return err 523 } 524 fds = append(fds, int(r.Fd()), int(w.Fd())) 525 if pipes.Stdin != nil { 526 go func() { 527 io.Copy(w, pipes.Stdin) 528 w.Close() 529 }() 530 p.Stdin = r 531 } 532 for _, fd := range fds { 533 if err := syscall.Fchown(fd, rootuid, rootuid); err != nil { 534 return fmt.Errorf("Failed to chown pipes fd: %v", err) 535 } 536 } 537 return nil 538 } 539 540 // SupportsHooks implements the execdriver Driver interface. 541 // The libcontainer/runC-based native execdriver does exploit the hook mechanism 542 func (d *Driver) SupportsHooks() bool { 543 return true 544 }