github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/client/driver/executor/executor_linux.go (about) 1 package executor 2 3 import ( 4 "fmt" 5 "os" 6 "os/user" 7 "path/filepath" 8 "strconv" 9 "strings" 10 "syscall" 11 "time" 12 13 "github.com/hashicorp/go-multierror" 14 "github.com/mitchellh/go-ps" 15 "github.com/opencontainers/runc/libcontainer/cgroups" 16 cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs" 17 cgroupConfig "github.com/opencontainers/runc/libcontainer/configs" 18 "github.com/opencontainers/runc/libcontainer/system" 19 20 "github.com/hashicorp/nomad/client/allocdir" 21 "github.com/hashicorp/nomad/client/stats" 22 cstructs "github.com/hashicorp/nomad/client/structs" 23 "github.com/hashicorp/nomad/nomad/structs" 24 ) 25 26 var ( 27 // A mapping of directories on the host OS to attempt to embed inside each 28 // task's chroot. 29 chrootEnv = map[string]string{ 30 "/bin": "/bin", 31 "/etc": "/etc", 32 "/lib": "/lib", 33 "/lib32": "/lib32", 34 "/lib64": "/lib64", 35 "/run/resolvconf": "/run/resolvconf", 36 "/sbin": "/sbin", 37 "/usr": "/usr", 38 } 39 40 // clockTicks is the clocks per second of the machine 41 clockTicks = uint64(system.GetClockTicks()) 42 43 // The statistics the executor exposes when using cgroups 44 ExecutorCgroupMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage", "Kernel Usage", "Kernel Max Usage"} 45 ExecutorCgroupMeasuredCpuStats = []string{"System Mode", "User Mode", "Throttled Periods", "Throttled Time", "Percent"} 46 ) 47 48 // configureIsolation configures chroot and creates cgroups 49 func (e *UniversalExecutor) configureIsolation() error { 50 if e.command.FSIsolation { 51 if err := e.configureChroot(); err != nil { 52 return err 53 } 54 } 55 56 if e.command.ResourceLimits { 57 if err := e.configureCgroups(e.ctx.Task.Resources); err != nil { 58 return fmt.Errorf("error creating cgroups: %v", err) 59 } 60 } 61 return nil 62 } 63 64 // applyLimits puts a process in a pre-configured cgroup 65 func (e *UniversalExecutor) applyLimits(pid int) error { 66 if !e.command.ResourceLimits { 67 return nil 68 } 69 70 // Entering the process in the cgroup 71 manager := getCgroupManager(e.resConCtx.groups, nil) 72 if err := manager.Apply(pid); err != nil { 73 e.logger.Printf("[ERR] executor: error applying pid to cgroup: %v", err) 74 if er := e.removeChrootMounts(); er != nil { 75 e.logger.Printf("[ERR] executor: error removing chroot: %v", er) 76 } 77 return err 78 } 79 e.resConCtx.cgPaths = manager.GetPaths() 80 cgConfig := cgroupConfig.Config{Cgroups: e.resConCtx.groups} 81 if err := manager.Set(&cgConfig); err != nil { 82 e.logger.Printf("[ERR] executor: error setting cgroup config: %v", err) 83 if er := DestroyCgroup(e.resConCtx.groups, e.resConCtx.cgPaths, os.Getpid()); er != nil { 84 e.logger.Printf("[ERR] executor: error destroying cgroup: %v", er) 85 } 86 if er := e.removeChrootMounts(); er != nil { 87 e.logger.Printf("[ERR] executor: error removing chroot: %v", er) 88 } 89 return err 90 } 91 return nil 92 } 93 94 // configureCgroups converts a Nomad Resources specification into the equivalent 95 // cgroup configuration. It returns an error if the resources are invalid. 96 func (e *UniversalExecutor) configureCgroups(resources *structs.Resources) error { 97 e.resConCtx.groups = &cgroupConfig.Cgroup{} 98 e.resConCtx.groups.Resources = &cgroupConfig.Resources{} 99 cgroupName := structs.GenerateUUID() 100 e.resConCtx.groups.Path = filepath.Join("/nomad", cgroupName) 101 102 // TODO: verify this is needed for things like network access 103 e.resConCtx.groups.Resources.AllowAllDevices = true 104 105 if resources.MemoryMB > 0 { 106 // Total amount of memory allowed to consume 107 e.resConCtx.groups.Resources.Memory = int64(resources.MemoryMB * 1024 * 1024) 108 // Disable swap to avoid issues on the machine 109 e.resConCtx.groups.Resources.MemorySwap = int64(-1) 110 } 111 112 if resources.CPU < 2 { 113 return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU) 114 } 115 116 // Set the relative CPU shares for this cgroup. 117 e.resConCtx.groups.Resources.CpuShares = int64(resources.CPU) 118 119 if resources.IOPS != 0 { 120 // Validate it is in an acceptable range. 121 if resources.IOPS < 10 || resources.IOPS > 1000 { 122 return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS) 123 } 124 125 e.resConCtx.groups.Resources.BlkioWeight = uint16(resources.IOPS) 126 } 127 128 return nil 129 } 130 131 // Stats reports the resource utilization of the cgroup. If there is no resource 132 // isolation we aggregate the resource utilization of all the pids launched by 133 // the executor. 134 func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) { 135 if !e.command.ResourceLimits { 136 pidStats, err := e.pidStats() 137 if err != nil { 138 return nil, err 139 } 140 return e.aggregatedResourceUsage(pidStats), nil 141 } 142 ts := time.Now() 143 manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths) 144 stats, err := manager.GetStats() 145 if err != nil { 146 return nil, err 147 } 148 149 // Memory Related Stats 150 swap := stats.MemoryStats.SwapUsage 151 maxUsage := stats.MemoryStats.Usage.MaxUsage 152 rss := stats.MemoryStats.Stats["rss"] 153 cache := stats.MemoryStats.Stats["cache"] 154 ms := &cstructs.MemoryStats{ 155 RSS: rss, 156 Cache: cache, 157 Swap: swap.Usage, 158 MaxUsage: maxUsage, 159 KernelUsage: stats.MemoryStats.KernelUsage.Usage, 160 KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage, 161 Measured: ExecutorCgroupMeasuredMemStats, 162 } 163 164 // CPU Related Stats 165 totalProcessCPUUsage := float64(stats.CpuStats.CpuUsage.TotalUsage) 166 userModeTime := float64(stats.CpuStats.CpuUsage.UsageInUsermode) 167 kernelModeTime := float64(stats.CpuStats.CpuUsage.UsageInKernelmode) 168 169 totalPercent := e.totalCpuStats.Percent(totalProcessCPUUsage) 170 cs := &cstructs.CpuStats{ 171 SystemMode: e.systemCpuStats.Percent(kernelModeTime), 172 UserMode: e.userCpuStats.Percent(userModeTime), 173 Percent: totalPercent, 174 ThrottledPeriods: stats.CpuStats.ThrottlingData.ThrottledPeriods, 175 ThrottledTime: stats.CpuStats.ThrottlingData.ThrottledTime, 176 TotalTicks: e.systemCpuStats.TicksConsumed(totalPercent), 177 Measured: ExecutorCgroupMeasuredCpuStats, 178 } 179 taskResUsage := cstructs.TaskResourceUsage{ 180 ResourceUsage: &cstructs.ResourceUsage{ 181 MemoryStats: ms, 182 CpuStats: cs, 183 }, 184 Timestamp: ts.UTC().UnixNano(), 185 } 186 if pidStats, err := e.pidStats(); err == nil { 187 taskResUsage.Pids = pidStats 188 } 189 return &taskResUsage, nil 190 } 191 192 // runAs takes a user id as a string and looks up the user, and sets the command 193 // to execute as that user. 194 func (e *UniversalExecutor) runAs(userid string) error { 195 u, err := user.Lookup(userid) 196 if err != nil { 197 return fmt.Errorf("Failed to identify user %v: %v", userid, err) 198 } 199 200 // Convert the uid and gid 201 uid, err := strconv.ParseUint(u.Uid, 10, 32) 202 if err != nil { 203 return fmt.Errorf("Unable to convert userid to uint32: %s", err) 204 } 205 gid, err := strconv.ParseUint(u.Gid, 10, 32) 206 if err != nil { 207 return fmt.Errorf("Unable to convert groupid to uint32: %s", err) 208 } 209 210 // Set the command to run as that user and group. 211 if e.cmd.SysProcAttr == nil { 212 e.cmd.SysProcAttr = &syscall.SysProcAttr{} 213 } 214 if e.cmd.SysProcAttr.Credential == nil { 215 e.cmd.SysProcAttr.Credential = &syscall.Credential{} 216 } 217 e.cmd.SysProcAttr.Credential.Uid = uint32(uid) 218 e.cmd.SysProcAttr.Credential.Gid = uint32(gid) 219 220 return nil 221 } 222 223 // configureChroot configures a chroot 224 func (e *UniversalExecutor) configureChroot() error { 225 allocDir := e.ctx.AllocDir 226 if err := allocDir.MountSharedDir(e.ctx.Task.Name); err != nil { 227 return err 228 } 229 230 chroot := chrootEnv 231 if len(e.ctx.ChrootEnv) > 0 { 232 chroot = e.ctx.ChrootEnv 233 } 234 235 if err := allocDir.Embed(e.ctx.Task.Name, chroot); err != nil { 236 return err 237 } 238 239 // Set the tasks AllocDir environment variable. 240 e.ctx.TaskEnv. 241 SetAllocDir(filepath.Join("/", allocdir.SharedAllocName)). 242 SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal)). 243 SetSecretsDir(filepath.Join("/", allocdir.TaskSecrets)). 244 Build() 245 246 if e.cmd.SysProcAttr == nil { 247 e.cmd.SysProcAttr = &syscall.SysProcAttr{} 248 } 249 e.cmd.SysProcAttr.Chroot = e.taskDir 250 e.cmd.Dir = "/" 251 252 if err := allocDir.MountSpecialDirs(e.taskDir); err != nil { 253 return err 254 } 255 256 e.fsIsolationEnforced = true 257 return nil 258 } 259 260 // cleanTaskDir is an idempotent operation to clean the task directory and 261 // should be called when tearing down the task. 262 func (e *UniversalExecutor) removeChrootMounts() error { 263 // Prevent a race between Wait/ForceStop 264 e.resConCtx.cgLock.Lock() 265 defer e.resConCtx.cgLock.Unlock() 266 return e.ctx.AllocDir.UnmountAll() 267 } 268 269 // getAllPids returns the pids of all the processes spun up by the executor. We 270 // use the libcontainer apis to get the pids when the user is using cgroup 271 // isolation and we scan the entire process table if the user is not using any 272 // isolation 273 func (e *UniversalExecutor) getAllPids() (map[int]*nomadPid, error) { 274 if e.command.ResourceLimits { 275 manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths) 276 pids, err := manager.GetAllPids() 277 if err != nil { 278 return nil, err 279 } 280 np := make(map[int]*nomadPid, len(pids)) 281 for _, pid := range pids { 282 np[pid] = &nomadPid{ 283 pid: pid, 284 cpuStatsTotal: stats.NewCpuStats(), 285 cpuStatsSys: stats.NewCpuStats(), 286 cpuStatsUser: stats.NewCpuStats(), 287 } 288 } 289 return np, nil 290 } 291 allProcesses, err := ps.Processes() 292 if err != nil { 293 return nil, err 294 } 295 return e.scanPids(os.Getpid(), allProcesses) 296 } 297 298 // destroyCgroup kills all processes in the cgroup and removes the cgroup 299 // configuration from the host. This function is idempotent. 300 func DestroyCgroup(groups *cgroupConfig.Cgroup, cgPaths map[string]string, executorPid int) error { 301 mErrs := new(multierror.Error) 302 if groups == nil { 303 return fmt.Errorf("Can't destroy: cgroup configuration empty") 304 } 305 306 // Move the executor into the global cgroup so that the task specific 307 // cgroup can be destroyed. 308 nilGroup := &cgroupConfig.Cgroup{} 309 nilGroup.Path = "/" 310 nilGroup.Resources = groups.Resources 311 nilManager := getCgroupManager(nilGroup, nil) 312 err := nilManager.Apply(executorPid) 313 if err != nil && !strings.Contains(err.Error(), "no such process") { 314 return fmt.Errorf("failed to remove executor pid %d: %v", executorPid, err) 315 } 316 317 // Freeze the Cgroup so that it can not continue to fork/exec. 318 manager := getCgroupManager(groups, cgPaths) 319 err = manager.Freeze(cgroupConfig.Frozen) 320 if err != nil && !strings.Contains(err.Error(), "no such file or directory") { 321 return fmt.Errorf("failed to freeze cgroup: %v", err) 322 } 323 324 var procs []*os.Process 325 pids, err := manager.GetAllPids() 326 if err != nil { 327 multierror.Append(mErrs, fmt.Errorf("error getting pids: %v", err)) 328 329 // Unfreeze the cgroup. 330 err = manager.Freeze(cgroupConfig.Thawed) 331 if err != nil && !strings.Contains(err.Error(), "no such file or directory") { 332 multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err)) 333 } 334 return mErrs.ErrorOrNil() 335 } 336 337 // Kill the processes in the cgroup 338 for _, pid := range pids { 339 proc, err := os.FindProcess(pid) 340 if err != nil { 341 multierror.Append(mErrs, fmt.Errorf("error finding process %v: %v", pid, err)) 342 continue 343 } 344 345 procs = append(procs, proc) 346 if e := proc.Kill(); e != nil { 347 multierror.Append(mErrs, fmt.Errorf("error killing process %v: %v", pid, e)) 348 } 349 } 350 351 // Unfreeze the cgroug so we can wait. 352 err = manager.Freeze(cgroupConfig.Thawed) 353 if err != nil && !strings.Contains(err.Error(), "no such file or directory") { 354 multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err)) 355 } 356 357 // Wait on the killed processes to ensure they are cleaned up. 358 for _, proc := range procs { 359 // Don't capture the error because we expect this to fail for 360 // processes we didn't fork. 361 proc.Wait() 362 } 363 364 // Remove the cgroup. 365 if err := manager.Destroy(); err != nil { 366 multierror.Append(mErrs, fmt.Errorf("failed to delete the cgroup directories: %v", err)) 367 } 368 return mErrs.ErrorOrNil() 369 } 370 371 // getCgroupManager returns the correct libcontainer cgroup manager. 372 func getCgroupManager(groups *cgroupConfig.Cgroup, paths map[string]string) cgroups.Manager { 373 return &cgroupFs.Manager{Cgroups: groups, Paths: paths} 374 }