github.com/walkingsparrow/docker@v1.4.2-0.20151218153551-b708a2249bfa/daemon/execdriver/native/driver.go (about) 1 // +build linux,cgo 2 3 package native 4 5 import ( 6 "fmt" 7 "io" 8 "io/ioutil" 9 "os" 10 "os/exec" 11 "path/filepath" 12 "strings" 13 "sync" 14 "syscall" 15 "time" 16 17 "github.com/Sirupsen/logrus" 18 "github.com/docker/docker/daemon/execdriver" 19 "github.com/docker/docker/daemon/execdriver/native/template" 20 "github.com/docker/docker/pkg/parsers" 21 "github.com/docker/docker/pkg/pools" 22 "github.com/docker/docker/pkg/reexec" 23 sysinfo "github.com/docker/docker/pkg/system" 24 "github.com/docker/docker/pkg/term" 25 "github.com/opencontainers/runc/libcontainer" 26 "github.com/opencontainers/runc/libcontainer/apparmor" 27 "github.com/opencontainers/runc/libcontainer/cgroups/systemd" 28 "github.com/opencontainers/runc/libcontainer/configs" 29 "github.com/opencontainers/runc/libcontainer/system" 30 "github.com/opencontainers/runc/libcontainer/utils" 31 ) 32 33 // Define constants for native driver 34 const ( 35 DriverName = "native" 36 Version = "0.2" 37 ) 38 39 // Driver contains all information for native driver, 40 // it implements execdriver.Driver. 41 type Driver struct { 42 root string 43 activeContainers map[string]libcontainer.Container 44 machineMemory int64 45 factory libcontainer.Factory 46 sync.Mutex 47 } 48 49 // NewDriver returns a new native driver, called from NewDriver of execdriver. 50 func NewDriver(root string, options []string) (*Driver, error) { 51 meminfo, err := sysinfo.ReadMemInfo() 52 if err != nil { 53 return nil, err 54 } 55 56 if err := sysinfo.MkdirAll(root, 0700); err != nil { 57 return nil, err 58 } 59 60 if apparmor.IsEnabled() { 61 if err := installAppArmorProfile(); err != nil { 62 apparmorProfiles := []string{"docker-default"} 63 64 // Allow daemon to run if loading failed, but are active 65 // (possibly through another run, manually, or via system startup) 66 for _, policy := range apparmorProfiles { 67 if err := hasAppArmorProfileLoaded(policy); err != nil { 68 return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy) 69 } 70 } 71 } 72 } 73 74 // choose cgroup manager 75 // this makes sure there are no breaking changes to people 76 // who upgrade from versions without native.cgroupdriver opt 77 cgm := libcontainer.Cgroupfs 78 79 // parse the options 80 for _, option := range options { 81 key, val, err := parsers.ParseKeyValueOpt(option) 82 if err != nil { 83 return nil, err 84 } 85 key = strings.ToLower(key) 86 switch key { 87 case "native.cgroupdriver": 88 // override the default if they set options 89 switch val { 90 case "systemd": 91 if systemd.UseSystemd() { 92 cgm = libcontainer.SystemdCgroups 93 template.SystemdCgroups = true 94 } else { 95 // warn them that they chose the wrong driver 96 logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead") 97 } 98 case "cgroupfs": 99 cgm = libcontainer.Cgroupfs 100 default: 101 return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val) 102 } 103 default: 104 return nil, fmt.Errorf("Unknown option %s\n", key) 105 } 106 } 107 108 f, err := libcontainer.New( 109 root, 110 cgm, 111 libcontainer.InitPath(reexec.Self(), DriverName), 112 ) 113 if err != nil { 114 return nil, err 115 } 116 117 return &Driver{ 118 root: root, 119 activeContainers: make(map[string]libcontainer.Container), 120 machineMemory: meminfo.MemTotal, 121 factory: f, 122 }, nil 123 } 124 125 type execOutput struct { 126 exitCode int 127 err error 128 } 129 130 // Run implements the exec driver Driver interface, 131 // it calls libcontainer APIs to run a container. 132 func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) { 133 destroyed := false 134 var err error 135 c.TmpDir, err = ioutil.TempDir("", c.ID) 136 if err != nil { 137 return execdriver.ExitStatus{ExitCode: -1}, err 138 } 139 defer os.RemoveAll(c.TmpDir) 140 141 // take the Command and populate the libcontainer.Config from it 142 container, err := d.createContainer(c, hooks) 143 if err != nil { 144 return execdriver.ExitStatus{ExitCode: -1}, err 145 } 146 147 p := &libcontainer.Process{ 148 Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...), 149 Env: c.ProcessConfig.Env, 150 Cwd: c.WorkingDir, 151 User: c.ProcessConfig.User, 152 } 153 154 if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil { 155 return execdriver.ExitStatus{ExitCode: -1}, err 156 } 157 158 cont, err := d.factory.Create(c.ID, container) 159 if err != nil { 160 return execdriver.ExitStatus{ExitCode: -1}, err 161 } 162 d.Lock() 163 d.activeContainers[c.ID] = cont 164 d.Unlock() 165 defer func() { 166 if !destroyed { 167 cont.Destroy() 168 } 169 d.cleanContainer(c.ID) 170 }() 171 172 if err := cont.Start(p); err != nil { 173 return execdriver.ExitStatus{ExitCode: -1}, err 174 } 175 176 oom := notifyOnOOM(cont) 177 if hooks.Start != nil { 178 pid, err := p.Pid() 179 if err != nil { 180 p.Signal(os.Kill) 181 p.Wait() 182 return execdriver.ExitStatus{ExitCode: -1}, err 183 } 184 hooks.Start(&c.ProcessConfig, pid, oom) 185 } 186 187 waitF := p.Wait 188 if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) { 189 // we need such hack for tracking processes with inherited fds, 190 // because cmd.Wait() waiting for all streams to be copied 191 waitF = waitInPIDHost(p, cont) 192 } 193 ps, err := waitF() 194 if err != nil { 195 execErr, ok := err.(*exec.ExitError) 196 if !ok { 197 return execdriver.ExitStatus{ExitCode: -1}, err 198 } 199 ps = execErr.ProcessState 200 } 201 cont.Destroy() 202 destroyed = true 203 _, oomKill := <-oom 204 return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil 205 } 206 207 // notifyOnOOM returns a channel that signals if the container received an OOM notification 208 // for any process. If it is unable to subscribe to OOM notifications then a closed 209 // channel is returned as it will be non-blocking and return the correct result when read. 210 func notifyOnOOM(container libcontainer.Container) <-chan struct{} { 211 oom, err := container.NotifyOOM() 212 if err != nil { 213 logrus.Warnf("Your kernel does not support OOM notifications: %s", err) 214 c := make(chan struct{}) 215 close(c) 216 return c 217 } 218 return oom 219 } 220 221 func killCgroupProcs(c libcontainer.Container) { 222 var procs []*os.Process 223 if err := c.Pause(); err != nil { 224 logrus.Warn(err) 225 } 226 pids, err := c.Processes() 227 if err != nil { 228 // don't care about childs if we can't get them, this is mostly because cgroup already deleted 229 logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err) 230 } 231 for _, pid := range pids { 232 if p, err := os.FindProcess(pid); err == nil { 233 procs = append(procs, p) 234 if err := p.Kill(); err != nil { 235 logrus.Warn(err) 236 } 237 } 238 } 239 if err := c.Resume(); err != nil { 240 logrus.Warn(err) 241 } 242 for _, p := range procs { 243 if _, err := p.Wait(); err != nil { 244 logrus.Warn(err) 245 } 246 } 247 } 248 249 func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) { 250 return func() (*os.ProcessState, error) { 251 pid, err := p.Pid() 252 if err != nil { 253 return nil, err 254 } 255 256 process, err := os.FindProcess(pid) 257 s, err := process.Wait() 258 if err != nil { 259 execErr, ok := err.(*exec.ExitError) 260 if !ok { 261 return s, err 262 } 263 s = execErr.ProcessState 264 } 265 killCgroupProcs(c) 266 p.Wait() 267 return s, err 268 } 269 } 270 271 // Kill implements the exec driver Driver interface. 272 func (d *Driver) Kill(c *execdriver.Command, sig int) error { 273 d.Lock() 274 active := d.activeContainers[c.ID] 275 d.Unlock() 276 if active == nil { 277 return fmt.Errorf("active container for %s does not exist", c.ID) 278 } 279 state, err := active.State() 280 if err != nil { 281 return err 282 } 283 return syscall.Kill(state.InitProcessPid, syscall.Signal(sig)) 284 } 285 286 // Pause implements the exec driver Driver interface, 287 // it calls libcontainer API to pause a container. 288 func (d *Driver) Pause(c *execdriver.Command) error { 289 d.Lock() 290 active := d.activeContainers[c.ID] 291 d.Unlock() 292 if active == nil { 293 return fmt.Errorf("active container for %s does not exist", c.ID) 294 } 295 return active.Pause() 296 } 297 298 // Unpause implements the exec driver Driver interface, 299 // it calls libcontainer API to unpause a container. 300 func (d *Driver) Unpause(c *execdriver.Command) error { 301 d.Lock() 302 active := d.activeContainers[c.ID] 303 d.Unlock() 304 if active == nil { 305 return fmt.Errorf("active container for %s does not exist", c.ID) 306 } 307 return active.Resume() 308 } 309 310 // Terminate implements the exec driver Driver interface. 311 func (d *Driver) Terminate(c *execdriver.Command) error { 312 defer d.cleanContainer(c.ID) 313 container, err := d.factory.Load(c.ID) 314 if err != nil { 315 return err 316 } 317 defer container.Destroy() 318 state, err := container.State() 319 if err != nil { 320 return err 321 } 322 pid := state.InitProcessPid 323 currentStartTime, err := system.GetProcessStartTime(pid) 324 if err != nil { 325 return err 326 } 327 if state.InitProcessStartTime == currentStartTime { 328 err = syscall.Kill(pid, 9) 329 syscall.Wait4(pid, nil, 0, nil) 330 } 331 return err 332 } 333 334 // Info implements the exec driver Driver interface. 335 func (d *Driver) Info(id string) execdriver.Info { 336 return &info{ 337 ID: id, 338 driver: d, 339 } 340 } 341 342 // Name implements the exec driver Driver interface. 343 func (d *Driver) Name() string { 344 return fmt.Sprintf("%s-%s", DriverName, Version) 345 } 346 347 // GetPidsForContainer implements the exec driver Driver interface. 348 func (d *Driver) GetPidsForContainer(id string) ([]int, error) { 349 d.Lock() 350 active := d.activeContainers[id] 351 d.Unlock() 352 353 if active == nil { 354 return nil, fmt.Errorf("active container for %s does not exist", id) 355 } 356 return active.Processes() 357 } 358 359 func (d *Driver) cleanContainer(id string) error { 360 d.Lock() 361 delete(d.activeContainers, id) 362 d.Unlock() 363 return os.RemoveAll(filepath.Join(d.root, id)) 364 } 365 366 func (d *Driver) createContainerRoot(id string) error { 367 return os.MkdirAll(filepath.Join(d.root, id), 0655) 368 } 369 370 // Clean implements the exec driver Driver interface. 371 func (d *Driver) Clean(id string) error { 372 return os.RemoveAll(filepath.Join(d.root, id)) 373 } 374 375 // Stats implements the exec driver Driver interface. 376 func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) { 377 d.Lock() 378 c := d.activeContainers[id] 379 d.Unlock() 380 if c == nil { 381 return nil, execdriver.ErrNotRunning 382 } 383 now := time.Now() 384 stats, err := c.Stats() 385 if err != nil { 386 return nil, err 387 } 388 memoryLimit := c.Config().Cgroups.Memory 389 // if the container does not have any memory limit specified set the 390 // limit to the machines memory 391 if memoryLimit == 0 { 392 memoryLimit = d.machineMemory 393 } 394 return &execdriver.ResourceStats{ 395 Stats: stats, 396 Read: now, 397 MemoryLimit: memoryLimit, 398 }, nil 399 } 400 401 // TtyConsole implements the exec driver Terminal interface. 402 type TtyConsole struct { 403 console libcontainer.Console 404 } 405 406 // NewTtyConsole returns a new TtyConsole struct. 407 func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes) (*TtyConsole, error) { 408 tty := &TtyConsole{ 409 console: console, 410 } 411 412 if err := tty.AttachPipes(pipes); err != nil { 413 tty.Close() 414 return nil, err 415 } 416 417 return tty, nil 418 } 419 420 // Resize implements Resize method of Terminal interface 421 func (t *TtyConsole) Resize(h, w int) error { 422 return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) 423 } 424 425 // AttachPipes attaches given pipes to TtyConsole 426 func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error { 427 go func() { 428 if wb, ok := pipes.Stdout.(interface { 429 CloseWriters() error 430 }); ok { 431 defer wb.CloseWriters() 432 } 433 434 pools.Copy(pipes.Stdout, t.console) 435 }() 436 437 if pipes.Stdin != nil { 438 go func() { 439 pools.Copy(t.console, pipes.Stdin) 440 441 pipes.Stdin.Close() 442 }() 443 } 444 445 return nil 446 } 447 448 // Close implements Close method of Terminal interface 449 func (t *TtyConsole) Close() error { 450 return t.console.Close() 451 } 452 453 func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error { 454 455 rootuid, err := container.HostUID() 456 if err != nil { 457 return err 458 } 459 460 if processConfig.Tty { 461 cons, err := p.NewConsole(rootuid) 462 if err != nil { 463 return err 464 } 465 term, err := NewTtyConsole(cons, pipes) 466 if err != nil { 467 return err 468 } 469 processConfig.Terminal = term 470 return nil 471 } 472 // not a tty--set up stdio pipes 473 term := &execdriver.StdConsole{} 474 processConfig.Terminal = term 475 476 // if we are not in a user namespace, there is no reason to go through 477 // the hassle of setting up os-level pipes with proper (remapped) ownership 478 // so we will do the prior shortcut for non-userns containers 479 if rootuid == 0 { 480 p.Stdout = pipes.Stdout 481 p.Stderr = pipes.Stderr 482 483 r, w, err := os.Pipe() 484 if err != nil { 485 return err 486 } 487 if pipes.Stdin != nil { 488 go func() { 489 io.Copy(w, pipes.Stdin) 490 w.Close() 491 }() 492 p.Stdin = r 493 } 494 return nil 495 } 496 497 // if we have user namespaces enabled (rootuid != 0), we will set 498 // up os pipes for stderr, stdout, stdin so we can chown them to 499 // the proper ownership to allow for proper access to the underlying 500 // fds 501 var fds []int 502 503 //setup stdout 504 r, w, err := os.Pipe() 505 if err != nil { 506 return err 507 } 508 fds = append(fds, int(r.Fd()), int(w.Fd())) 509 if pipes.Stdout != nil { 510 go io.Copy(pipes.Stdout, r) 511 } 512 term.Closers = append(term.Closers, r) 513 p.Stdout = w 514 515 //setup stderr 516 r, w, err = os.Pipe() 517 if err != nil { 518 return err 519 } 520 fds = append(fds, int(r.Fd()), int(w.Fd())) 521 if pipes.Stderr != nil { 522 go io.Copy(pipes.Stderr, r) 523 } 524 term.Closers = append(term.Closers, r) 525 p.Stderr = w 526 527 //setup stdin 528 r, w, err = os.Pipe() 529 if err != nil { 530 return err 531 } 532 fds = append(fds, int(r.Fd()), int(w.Fd())) 533 if pipes.Stdin != nil { 534 go func() { 535 io.Copy(w, pipes.Stdin) 536 w.Close() 537 }() 538 p.Stdin = r 539 } 540 for _, fd := range fds { 541 if err := syscall.Fchown(fd, rootuid, rootuid); err != nil { 542 return fmt.Errorf("Failed to chown pipes fd: %v", err) 543 } 544 } 545 return nil 546 } 547 548 // SupportsHooks implements the execdriver Driver interface. 549 // The libcontainer/runC-based native execdriver does exploit the hook mechanism 550 func (d *Driver) SupportsHooks() bool { 551 return true 552 }