github.com/kardianos/nomad@v0.1.3-0.20151022182107-b13df73ee850/client/executor/exec_linux.go (about) 1 package executor 2 3 import ( 4 "bytes" 5 "encoding/json" 6 "errors" 7 "fmt" 8 "io" 9 "os" 10 "os/exec" 11 "os/user" 12 "path/filepath" 13 "strconv" 14 "strings" 15 "syscall" 16 17 "github.com/hashicorp/go-multierror" 18 "github.com/hashicorp/nomad/client/allocdir" 19 "github.com/hashicorp/nomad/client/driver/args" 20 "github.com/hashicorp/nomad/client/driver/environment" 21 "github.com/hashicorp/nomad/command" 22 "github.com/hashicorp/nomad/helper/discover" 23 "github.com/hashicorp/nomad/nomad/structs" 24 25 cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs" 26 cgroupConfig "github.com/opencontainers/runc/libcontainer/configs" 27 ) 28 29 const ( 30 cgroupMount = "/sys/fs/cgroup" 31 ) 32 33 var ( 34 // A mapping of directories on the host OS to attempt to embed inside each 35 // task's chroot. 36 chrootEnv = map[string]string{ 37 "/bin": "/bin", 38 "/etc": "/etc", 39 "/lib": "/lib", 40 "/lib32": "/lib32", 41 "/lib64": "/lib64", 42 "/usr/bin": "/usr/bin", 43 "/usr/lib": "/usr/lib", 44 } 45 ) 46 47 func NewExecutor() Executor { 48 e := LinuxExecutor{} 49 50 // TODO: In a follow-up PR make it so this only happens once per client. 51 // Fingerprinting shouldn't happen per task. 52 53 // Check that cgroups are available. 54 if _, err := os.Stat(cgroupMount); err == nil { 55 e.cgroupEnabled = true 56 } 57 58 return &e 59 } 60 61 // Linux executor is designed to run on linux kernel 2.8+. 62 type LinuxExecutor struct { 63 cmd 64 user *user.User 65 66 // Finger print capabilities. 67 cgroupEnabled bool 68 69 // Isolation configurations. 70 groups *cgroupConfig.Cgroup 71 alloc *allocdir.AllocDir 72 taskName string 73 taskDir string 74 75 // Tracking of child process. 76 spawnChild exec.Cmd 77 spawnOutputWriter *os.File 78 spawnOutputReader *os.File 79 80 // Track whether there are filesystems mounted in the task dir. 81 mounts bool 82 } 83 84 func (e *LinuxExecutor) Limit(resources *structs.Resources) error { 85 if resources == nil { 86 return errNoResources 87 } 88 89 if e.cgroupEnabled { 90 return e.configureCgroups(resources) 91 } 92 93 return nil 94 } 95 96 func (e *LinuxExecutor) ConfigureTaskDir(taskName string, alloc *allocdir.AllocDir) error { 97 e.taskName = taskName 98 taskDir, ok := alloc.TaskDirs[taskName] 99 if !ok { 100 fmt.Errorf("Couldn't find task directory for task %v", taskName) 101 } 102 e.taskDir = taskDir 103 104 if err := alloc.MountSharedDir(taskName); err != nil { 105 return err 106 } 107 108 if err := alloc.Embed(taskName, chrootEnv); err != nil { 109 return err 110 } 111 112 // Mount dev 113 dev := filepath.Join(taskDir, "dev") 114 if err := os.Mkdir(dev, 0777); err != nil { 115 return fmt.Errorf("Mkdir(%v) failed: %v", dev, err) 116 } 117 118 if err := syscall.Mount("", dev, "devtmpfs", syscall.MS_RDONLY, ""); err != nil { 119 return fmt.Errorf("Couldn't mount /dev to %v: %v", dev, err) 120 } 121 122 // Mount proc 123 proc := filepath.Join(taskDir, "proc") 124 if err := os.Mkdir(proc, 0777); err != nil { 125 return fmt.Errorf("Mkdir(%v) failed: %v", proc, err) 126 } 127 128 if err := syscall.Mount("", proc, "proc", syscall.MS_RDONLY, ""); err != nil { 129 return fmt.Errorf("Couldn't mount /proc to %v: %v", proc, err) 130 } 131 132 // Set the tasks AllocDir environment variable. 133 env, err := environment.ParseFromList(e.Cmd.Env) 134 if err != nil { 135 return err 136 } 137 env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName)) 138 e.Cmd.Env = env.List() 139 140 e.alloc = alloc 141 e.mounts = true 142 return nil 143 } 144 145 func (e *LinuxExecutor) cleanTaskDir() error { 146 if e.alloc == nil { 147 return errors.New("ConfigureTaskDir() must be called before Start()") 148 } 149 150 if !e.mounts { 151 return nil 152 } 153 154 // Unmount dev. 155 errs := new(multierror.Error) 156 dev := filepath.Join(e.taskDir, "dev") 157 if err := syscall.Unmount(dev, 0); err != nil { 158 errs = multierror.Append(errs, fmt.Errorf("Failed to unmount dev (%v): %v", dev, err)) 159 } 160 161 // Unmount proc. 162 proc := filepath.Join(e.taskDir, "proc") 163 if err := syscall.Unmount(proc, 0); err != nil { 164 errs = multierror.Append(errs, fmt.Errorf("Failed to unmount proc (%v): %v", proc, err)) 165 } 166 167 e.mounts = false 168 return errs.ErrorOrNil() 169 } 170 171 func (e *LinuxExecutor) configureCgroups(resources *structs.Resources) error { 172 if !e.cgroupEnabled { 173 return nil 174 } 175 176 e.groups = &cgroupConfig.Cgroup{} 177 178 // Groups will be created in a heiarchy according to the resource being 179 // constrained, current session, and then this unique name. Restraints are 180 // then placed in the corresponding files. 181 // Ex: restricting a process to 2048Mhz CPU and 2MB of memory: 182 // $ cat /sys/fs/cgroup/cpu/user/1000.user/4.session/<uuid>/cpu.shares 183 // 2028 184 // $ cat /sys/fs/cgroup/memory/user/1000.user/4.session/<uuid>/memory.limit_in_bytes 185 // 2097152 186 e.groups.Name = structs.GenerateUUID() 187 188 // TODO: verify this is needed for things like network access 189 e.groups.AllowAllDevices = true 190 191 if resources.MemoryMB > 0 { 192 // Total amount of memory allowed to consume 193 e.groups.Memory = int64(resources.MemoryMB * 1024 * 1024) 194 // Disable swap to avoid issues on the machine 195 e.groups.MemorySwap = int64(-1) 196 } 197 198 if resources.CPU != 0 { 199 if resources.CPU < 2 { 200 return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU) 201 } 202 203 // Set the relative CPU shares for this cgroup. 204 // The simplest scale is 1 share to 1 MHz so 1024 = 1GHz. This means any 205 // given process will have at least that amount of resources, but likely 206 // more since it is (probably) rare that the machine will run at 100% 207 // CPU. This scale will cease to work if a node is overprovisioned. 208 e.groups.CpuShares = int64(resources.CPU) 209 } 210 211 if resources.IOPS != 0 { 212 // Validate it is in an acceptable range. 213 if resources.IOPS < 10 || resources.IOPS > 1000 { 214 return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS) 215 } 216 217 e.groups.BlkioWeight = uint16(resources.IOPS) 218 } 219 220 return nil 221 } 222 223 func (e *LinuxExecutor) runAs(userid string) error { 224 errs := new(multierror.Error) 225 226 // First, try to lookup the user by uid 227 u, err := user.LookupId(userid) 228 if err == nil { 229 e.user = u 230 return nil 231 } else { 232 errs = multierror.Append(errs, err) 233 } 234 235 // Lookup failed, so try by username instead 236 u, err = user.Lookup(userid) 237 if err == nil { 238 e.user = u 239 return nil 240 } else { 241 errs = multierror.Append(errs, err) 242 } 243 244 // If we got here we failed to lookup based on id and username, so we'll 245 // return those errors. 246 return fmt.Errorf("Failed to identify user to run as: %s", errs) 247 } 248 249 func (e *LinuxExecutor) Start() error { 250 // Run as "nobody" user so we don't leak root privilege to the 251 // spawned process. 252 if err := e.runAs("nobody"); err == nil && e.user != nil { 253 e.cmd.SetUID(e.user.Uid) 254 e.cmd.SetGID(e.user.Gid) 255 } 256 257 if e.alloc == nil { 258 return errors.New("ConfigureTaskDir() must be called before Start()") 259 } 260 261 // Parse the commands arguments and replace instances of Nomad environment 262 // variables. 263 envVars, err := environment.ParseFromList(e.Cmd.Env) 264 if err != nil { 265 return err 266 } 267 268 combined := strings.Join(e.Cmd.Args, " ") 269 parsed, err := args.ParseAndReplace(combined, envVars.Map()) 270 if err != nil { 271 return err 272 } 273 e.Cmd.Args = parsed 274 275 return e.spawnDaemon() 276 } 277 278 // spawnDaemon executes a double fork to start the user command with proper 279 // isolation. Stores the child process for use in Wait. 280 func (e *LinuxExecutor) spawnDaemon() error { 281 bin, err := discover.NomadExecutable() 282 if err != nil { 283 return fmt.Errorf("Failed to determine the nomad executable: %v", err) 284 } 285 286 // Serialize the cmd and the cgroup configuration so it can be passed to the 287 // sub-process. 288 var buffer bytes.Buffer 289 enc := json.NewEncoder(&buffer) 290 291 c := command.DaemonConfig{ 292 Cmd: e.cmd.Cmd, 293 Chroot: e.taskDir, 294 StdoutFile: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stdout", e.taskName)), 295 StderrFile: filepath.Join(e.taskDir, allocdir.TaskLocal, fmt.Sprintf("%v.stderr", e.taskName)), 296 StdinFile: "/dev/null", 297 } 298 if err := enc.Encode(c); err != nil { 299 return fmt.Errorf("Failed to serialize daemon configuration: %v", err) 300 } 301 302 // Create a pipe to capture Stdout. 303 pr, pw, err := os.Pipe() 304 if err != nil { 305 return err 306 } 307 e.spawnOutputWriter = pw 308 e.spawnOutputReader = pr 309 310 // Call ourselves using a hidden flag. The new instance of nomad will join 311 // the passed cgroup, forkExec the cmd, and output status codes through 312 // Stdout. 313 escaped := strconv.Quote(buffer.String()) 314 spawn := exec.Command(bin, "spawn-daemon", escaped) 315 spawn.Stdout = e.spawnOutputWriter 316 317 // Capture its Stdin. 318 spawnStdIn, err := spawn.StdinPipe() 319 if err != nil { 320 return err 321 } 322 323 if err := spawn.Start(); err != nil { 324 fmt.Errorf("Failed to call spawn-daemon on nomad executable: %v", err) 325 } 326 327 // Join the spawn-daemon to the cgroup. 328 if e.groups != nil { 329 manager := cgroupFs.Manager{} 330 manager.Cgroups = e.groups 331 332 // Apply will place the current pid into the tasks file for each of the 333 // created cgroups: 334 // /sys/fs/cgroup/memory/user/1000.user/4.session/<uuid>/tasks 335 // 336 // Apply requires superuser permissions, and may fail if Nomad is not run with 337 // the required permissions 338 if err := manager.Apply(spawn.Process.Pid); err != nil { 339 errs := new(multierror.Error) 340 errs = multierror.Append(errs, fmt.Errorf("Failed to join spawn-daemon to the cgroup (config => %+v): %v", manager.Cgroups, err)) 341 342 if err := sendAbortCommand(spawnStdIn); err != nil { 343 errs = multierror.Append(errs, err) 344 } 345 346 return errs 347 } 348 } 349 350 // Tell it to start. 351 if err := sendStartCommand(spawnStdIn); err != nil { 352 return err 353 } 354 355 // Parse the response. 356 dec := json.NewDecoder(e.spawnOutputReader) 357 var resp command.SpawnStartStatus 358 if err := dec.Decode(&resp); err != nil { 359 return fmt.Errorf("Failed to parse spawn-daemon start response: %v", err) 360 } 361 362 if resp.ErrorMsg != "" { 363 return fmt.Errorf("Failed to execute user command: %s", resp.ErrorMsg) 364 } 365 366 e.spawnChild = *spawn 367 return nil 368 } 369 370 func sendStartCommand(w io.Writer) error { 371 enc := json.NewEncoder(w) 372 if err := enc.Encode(true); err != nil { 373 return fmt.Errorf("Failed to serialize start command: %v", err) 374 } 375 376 return nil 377 } 378 379 func sendAbortCommand(w io.Writer) error { 380 enc := json.NewEncoder(w) 381 if err := enc.Encode(false); err != nil { 382 return fmt.Errorf("Failed to serialize abort command: %v", err) 383 } 384 385 return nil 386 } 387 388 // Open's behavior is to kill all processes associated with the id and return an 389 // error. This is done because it is not possible to re-attach to the 390 // spawn-daemon's stdout to retrieve status messages. 391 func (e *LinuxExecutor) Open(id string) error { 392 parts := strings.SplitN(id, ":", 2) 393 if len(parts) != 2 { 394 return fmt.Errorf("Invalid id: %v", id) 395 } 396 397 switch parts[0] { 398 case "PID": 399 pid, err := strconv.Atoi(parts[1]) 400 if err != nil { 401 return fmt.Errorf("Invalid id: failed to parse pid %v", parts[1]) 402 } 403 404 process, err := os.FindProcess(pid) 405 if err != nil { 406 return fmt.Errorf("Failed to find Pid %v: %v", pid, err) 407 } 408 409 if err := process.Kill(); err != nil { 410 return fmt.Errorf("Failed to kill Pid %v: %v", pid, err) 411 } 412 case "CGROUP": 413 if !e.cgroupEnabled { 414 return errors.New("Passed a a cgroup identifier, but cgroups are disabled") 415 } 416 417 // De-serialize the cgroup configuration. 418 dec := json.NewDecoder(strings.NewReader(parts[1])) 419 var groups cgroupConfig.Cgroup 420 if err := dec.Decode(&groups); err != nil { 421 return fmt.Errorf("Failed to parse cgroup configuration: %v", err) 422 } 423 424 e.groups = &groups 425 if err := e.destroyCgroup(); err != nil { 426 return err 427 } 428 // TODO: cleanTaskDir is a little more complicated here because the OS 429 // may have already unmounted in the case of a restart. Need to scan. 430 default: 431 return fmt.Errorf("Invalid id type: %v", parts[0]) 432 } 433 434 return errors.New("Could not re-open to id (intended).") 435 } 436 437 func (e *LinuxExecutor) Wait() error { 438 if e.spawnChild.Process == nil { 439 return errors.New("Can not find child to wait on") 440 } 441 442 defer e.spawnOutputWriter.Close() 443 defer e.spawnOutputReader.Close() 444 445 errs := new(multierror.Error) 446 if err := e.spawnChild.Wait(); err != nil { 447 errs = multierror.Append(errs, fmt.Errorf("Wait failed on pid %v: %v", e.spawnChild.Process.Pid, err)) 448 } 449 450 // If they fork/exec and then exit, wait will return but they will be still 451 // running processes so we need to kill the full cgroup. 452 if e.groups != nil { 453 if err := e.destroyCgroup(); err != nil { 454 errs = multierror.Append(errs, err) 455 } 456 } 457 458 if err := e.cleanTaskDir(); err != nil { 459 errs = multierror.Append(errs, err) 460 } 461 462 return errs.ErrorOrNil() 463 } 464 465 // If cgroups are used, the ID is the cgroup structurue. Otherwise, it is the 466 // PID of the spawn-daemon process. An error is returned if the process was 467 // never started. 468 func (e *LinuxExecutor) ID() (string, error) { 469 if e.spawnChild.Process != nil { 470 if e.cgroupEnabled && e.groups != nil { 471 // Serialize the cgroup structure so it can be undone on suabsequent 472 // opens. 473 var buffer bytes.Buffer 474 enc := json.NewEncoder(&buffer) 475 if err := enc.Encode(e.groups); err != nil { 476 return "", fmt.Errorf("Failed to serialize daemon configuration: %v", err) 477 } 478 479 return fmt.Sprintf("CGROUP:%v", buffer.String()), nil 480 } 481 482 return fmt.Sprintf("PID:%d", e.spawnChild.Process.Pid), nil 483 } 484 485 return "", fmt.Errorf("Process has finished or was never started") 486 } 487 488 func (e *LinuxExecutor) Shutdown() error { 489 return e.ForceStop() 490 } 491 492 func (e *LinuxExecutor) ForceStop() error { 493 if e.spawnOutputReader != nil { 494 e.spawnOutputReader.Close() 495 } 496 497 if e.spawnOutputWriter != nil { 498 e.spawnOutputWriter.Close() 499 } 500 501 // If the task is not running inside a cgroup then just the spawn-daemon child is killed. 502 // TODO: Find a good way to kill the children of the spawn-daemon. 503 if e.groups == nil { 504 if err := e.spawnChild.Process.Kill(); err != nil { 505 return fmt.Errorf("Failed to kill child (%v): %v", e.spawnChild.Process.Pid, err) 506 } 507 508 return nil 509 } 510 511 errs := new(multierror.Error) 512 if e.groups != nil { 513 if err := e.destroyCgroup(); err != nil { 514 errs = multierror.Append(errs, err) 515 } 516 } 517 518 if err := e.cleanTaskDir(); err != nil { 519 errs = multierror.Append(errs, err) 520 } 521 522 return errs.ErrorOrNil() 523 } 524 525 func (e *LinuxExecutor) destroyCgroup() error { 526 if e.groups == nil { 527 return errors.New("Can't destroy: cgroup configuration empty") 528 } 529 530 manager := cgroupFs.Manager{} 531 manager.Cgroups = e.groups 532 pids, err := manager.GetPids() 533 if err != nil { 534 return fmt.Errorf("Failed to get pids in the cgroup %v: %v", e.groups.Name, err) 535 } 536 537 errs := new(multierror.Error) 538 for _, pid := range pids { 539 process, err := os.FindProcess(pid) 540 if err != nil { 541 multierror.Append(errs, fmt.Errorf("Failed to find Pid %v: %v", pid, err)) 542 continue 543 } 544 545 if err := process.Kill(); err != nil { 546 multierror.Append(errs, fmt.Errorf("Failed to kill Pid %v: %v", pid, err)) 547 continue 548 } 549 550 if _, err := process.Wait(); err != nil { 551 multierror.Append(errs, fmt.Errorf("Failed to wait Pid %v: %v", pid, err)) 552 continue 553 } 554 } 555 556 // Remove the cgroup. 557 if err := manager.Destroy(); err != nil { 558 multierror.Append(errs, fmt.Errorf("Failed to delete the cgroup directories: %v", err)) 559 } 560 561 if len(errs.Errors) != 0 { 562 return fmt.Errorf("Failed to destroy cgroup: %v", errs) 563 } 564 565 return nil 566 } 567 568 func (e *LinuxExecutor) Command() *cmd { 569 return &e.cmd 570 }