github.com/anuvu/nomad@v0.8.7-atom1/client/driver/executor/executor_linux.go (about) 1 package executor 2 3 import ( 4 "fmt" 5 "os" 6 "os/user" 7 "path/filepath" 8 "strconv" 9 "strings" 10 "syscall" 11 "time" 12 13 "github.com/hashicorp/go-multierror" 14 "github.com/mitchellh/go-ps" 15 cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs" 16 cgroupConfig "github.com/opencontainers/runc/libcontainer/configs" 17 18 "github.com/hashicorp/nomad/client/stats" 19 cstructs "github.com/hashicorp/nomad/client/structs" 20 "github.com/hashicorp/nomad/helper/uuid" 21 "github.com/hashicorp/nomad/nomad/structs" 22 ) 23 24 var ( 25 // The statistics the executor exposes when using cgroups 26 ExecutorCgroupMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage", "Kernel Usage", "Kernel Max Usage"} 27 ExecutorCgroupMeasuredCpuStats = []string{"System Mode", "User Mode", "Throttled Periods", "Throttled Time", "Percent"} 28 ) 29 30 // configureIsolation configures chroot and creates cgroups 31 func (e *UniversalExecutor) configureIsolation() error { 32 if e.command.FSIsolation { 33 if err := e.configureChroot(); err != nil { 34 return err 35 } 36 } 37 38 if e.command.ResourceLimits || e.command.BasicProcessCgroup { 39 if err := e.configureCgroups(e.ctx.Task.Resources); err != nil { 40 return fmt.Errorf("error creating cgroups: %v", err) 41 } 42 } 43 return nil 44 } 45 46 // applyLimits puts a process in a pre-configured cgroup 47 func (e *UniversalExecutor) applyLimits(pid int) error { 48 if !(e.command.ResourceLimits || e.command.BasicProcessCgroup) { 49 return nil 50 } 51 52 // Entering the process in the cgroup 53 manager := getCgroupManager(e.resConCtx.groups, nil) 54 if err := manager.Apply(pid); err != nil { 55 e.logger.Printf("[ERR] executor: error applying pid to cgroup: %v", err) 56 return err 57 } 58 59 e.resConCtx.cgPaths = manager.GetPaths() 60 61 // Don't enter all the cgroups since we will inherit resources limits. Only 62 // use devices (required by libcontainer) and freezer. Freezer allows us to 63 // capture all pids and stop any fork/execs from happening while we are 64 // cleaning up. 65 if !e.command.ResourceLimits { 66 // Move the executor into the global cgroup so that the task specific 67 // cgroup can be destroyed. 68 nilGroup := &cgroupConfig.Cgroup{} 69 nilGroup.Path = "/" 70 nilGroup.Resources = e.resConCtx.groups.Resources 71 nilManager := getCgroupManager(nilGroup, nil) 72 err := nilManager.Apply(pid) 73 if err != nil { 74 return fmt.Errorf("failed to remove executor pid %d: %v", pid, err) 75 } 76 77 // Grab the freezer and devices cgroup paths. We do this from the old 78 // manager after the executor pid has been applied since there is no 79 // other way to determine what the proper cgroup paths would be. 80 freezer := &cgroupFs.FreezerGroup{} 81 devices := &cgroupFs.DevicesGroup{} 82 freezerName, devicesName := freezer.Name(), devices.Name() 83 newPath := map[string]string{ 84 freezerName: e.resConCtx.cgPaths[freezerName], 85 devicesName: e.resConCtx.cgPaths[devicesName], 86 } 87 88 // Clear the cgroups paths so that everything is properly cleaned except 89 // the groups we want our process to stay in. This will delete the 90 // directories from disk. 91 manager.Cgroups.Paths = nil 92 delete(manager.Paths, freezerName) 93 delete(manager.Paths, devicesName) 94 if err := manager.Destroy(); err != nil { 95 e.logger.Printf("[ERR] executor: failed to destroy original: %v", err) 96 return err 97 } 98 99 // Update our context such that the new cgroup manager only is tracking 100 // the paths we care about now. 101 e.resConCtx.cgPaths = newPath 102 e.resConCtx.groups.Paths = newPath 103 104 // Apply just the freezer and devices now 105 manager = getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths) 106 if err := manager.Apply(pid); err != nil { 107 e.logger.Printf("[ERR] executor: error applying pid to cgroup subset %v: %v", e.resConCtx.cgPaths, err) 108 return err 109 } 110 } 111 112 cgConfig := cgroupConfig.Config{Cgroups: e.resConCtx.groups} 113 if err := manager.Set(&cgConfig); err != nil { 114 e.logger.Printf("[ERR] executor: error setting cgroup config: %v", err) 115 if er := DestroyCgroup(e.resConCtx.groups, e.resConCtx.cgPaths, os.Getpid()); er != nil { 116 e.logger.Printf("[ERR] executor: error destroying cgroup: %v", er) 117 } 118 return err 119 } 120 return nil 121 } 122 123 // configureCgroups converts a Nomad Resources specification into the equivalent 124 // cgroup configuration. It returns an error if the resources are invalid. 125 func (e *UniversalExecutor) configureCgroups(resources *structs.Resources) error { 126 e.resConCtx.groups = &cgroupConfig.Cgroup{} 127 e.resConCtx.groups.Resources = &cgroupConfig.Resources{} 128 cgroupName := uuid.Generate() 129 e.resConCtx.groups.Path = filepath.Join("/nomad", cgroupName) 130 131 // Allow access to /dev/ 132 e.resConCtx.groups.Resources.AllowAllDevices = true 133 134 // Use a cgroup but don't apply limits 135 if !e.command.ResourceLimits { 136 return nil 137 } 138 139 if resources.MemoryMB > 0 { 140 // Total amount of memory allowed to consume 141 e.resConCtx.groups.Resources.Memory = int64(resources.MemoryMB * 1024 * 1024) 142 // Disable swap to avoid issues on the machine 143 var memSwappiness int64 = 0 144 e.resConCtx.groups.Resources.MemorySwappiness = &memSwappiness 145 } 146 147 if resources.CPU < 2 { 148 return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU) 149 } 150 151 // Set the relative CPU shares for this cgroup. 152 e.resConCtx.groups.Resources.CpuShares = int64(resources.CPU) 153 154 if resources.IOPS != 0 { 155 // Validate it is in an acceptable range. 156 if resources.IOPS < 10 || resources.IOPS > 1000 { 157 return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS) 158 } 159 160 e.resConCtx.groups.Resources.BlkioWeight = uint16(resources.IOPS) 161 } 162 163 return nil 164 } 165 166 // Stats reports the resource utilization of the cgroup. If there is no resource 167 // isolation we aggregate the resource utilization of all the pids launched by 168 // the executor. 169 func (e *UniversalExecutor) Stats() (*cstructs.TaskResourceUsage, error) { 170 // If we don't use full resource limits fallback to normal collection. It is 171 // not enough to be in the Cgroup since you must be in the memory, cpu, and 172 // cpuacct cgroup to gather the correct statistics. 173 if !e.command.ResourceLimits { 174 pidStats, err := e.pidStats() 175 if err != nil { 176 return nil, err 177 } 178 return e.aggregatedResourceUsage(pidStats), nil 179 } 180 ts := time.Now() 181 manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths) 182 stats, err := manager.GetStats() 183 if err != nil { 184 return nil, err 185 } 186 187 // Memory Related Stats 188 swap := stats.MemoryStats.SwapUsage 189 maxUsage := stats.MemoryStats.Usage.MaxUsage 190 rss := stats.MemoryStats.Stats["rss"] 191 cache := stats.MemoryStats.Stats["cache"] 192 ms := &cstructs.MemoryStats{ 193 RSS: rss, 194 Cache: cache, 195 Swap: swap.Usage, 196 MaxUsage: maxUsage, 197 KernelUsage: stats.MemoryStats.KernelUsage.Usage, 198 KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage, 199 Measured: ExecutorCgroupMeasuredMemStats, 200 } 201 202 // CPU Related Stats 203 totalProcessCPUUsage := float64(stats.CpuStats.CpuUsage.TotalUsage) 204 userModeTime := float64(stats.CpuStats.CpuUsage.UsageInUsermode) 205 kernelModeTime := float64(stats.CpuStats.CpuUsage.UsageInKernelmode) 206 207 totalPercent := e.totalCpuStats.Percent(totalProcessCPUUsage) 208 cs := &cstructs.CpuStats{ 209 SystemMode: e.systemCpuStats.Percent(kernelModeTime), 210 UserMode: e.userCpuStats.Percent(userModeTime), 211 Percent: totalPercent, 212 ThrottledPeriods: stats.CpuStats.ThrottlingData.ThrottledPeriods, 213 ThrottledTime: stats.CpuStats.ThrottlingData.ThrottledTime, 214 TotalTicks: e.systemCpuStats.TicksConsumed(totalPercent), 215 Measured: ExecutorCgroupMeasuredCpuStats, 216 } 217 taskResUsage := cstructs.TaskResourceUsage{ 218 ResourceUsage: &cstructs.ResourceUsage{ 219 MemoryStats: ms, 220 CpuStats: cs, 221 }, 222 Timestamp: ts.UTC().UnixNano(), 223 } 224 if pidStats, err := e.pidStats(); err == nil { 225 taskResUsage.Pids = pidStats 226 } 227 return &taskResUsage, nil 228 } 229 230 // runAs takes a user id as a string and looks up the user, and sets the command 231 // to execute as that user. 232 func (e *UniversalExecutor) runAs(userid string) error { 233 u, err := user.Lookup(userid) 234 if err != nil { 235 return fmt.Errorf("Failed to identify user %v: %v", userid, err) 236 } 237 238 // Get the groups the user is a part of 239 gidStrings, err := u.GroupIds() 240 if err != nil { 241 return fmt.Errorf("Unable to lookup user's group membership: %v", err) 242 } 243 244 gids := make([]uint32, len(gidStrings)) 245 for _, gidString := range gidStrings { 246 u, err := strconv.Atoi(gidString) 247 if err != nil { 248 return fmt.Errorf("Unable to convert user's group to int %s: %v", gidString, err) 249 } 250 251 gids = append(gids, uint32(u)) 252 } 253 254 // Convert the uid and gid 255 uid, err := strconv.ParseUint(u.Uid, 10, 32) 256 if err != nil { 257 return fmt.Errorf("Unable to convert userid to uint32: %s", err) 258 } 259 gid, err := strconv.ParseUint(u.Gid, 10, 32) 260 if err != nil { 261 return fmt.Errorf("Unable to convert groupid to uint32: %s", err) 262 } 263 264 // Set the command to run as that user and group. 265 if e.cmd.SysProcAttr == nil { 266 e.cmd.SysProcAttr = &syscall.SysProcAttr{} 267 } 268 if e.cmd.SysProcAttr.Credential == nil { 269 e.cmd.SysProcAttr.Credential = &syscall.Credential{} 270 } 271 e.cmd.SysProcAttr.Credential.Uid = uint32(uid) 272 e.cmd.SysProcAttr.Credential.Gid = uint32(gid) 273 e.cmd.SysProcAttr.Credential.Groups = gids 274 275 e.logger.Printf("[DEBUG] executor: running as user:group %d:%d with group membership in %v", uid, gid, gids) 276 277 return nil 278 } 279 280 // configureChroot configures a chroot 281 func (e *UniversalExecutor) configureChroot() error { 282 if e.cmd.SysProcAttr == nil { 283 e.cmd.SysProcAttr = &syscall.SysProcAttr{} 284 } 285 e.cmd.SysProcAttr.Chroot = e.ctx.TaskDir 286 e.cmd.Dir = "/" 287 288 e.fsIsolationEnforced = true 289 return nil 290 } 291 292 // getAllPids returns the pids of all the processes spun up by the executor. We 293 // use the libcontainer apis to get the pids when the user is using cgroup 294 // isolation and we scan the entire process table if the user is not using any 295 // isolation 296 func (e *UniversalExecutor) getAllPids() (map[int]*nomadPid, error) { 297 if e.command.ResourceLimits || e.command.BasicProcessCgroup { 298 manager := getCgroupManager(e.resConCtx.groups, e.resConCtx.cgPaths) 299 pids, err := manager.GetAllPids() 300 if err != nil { 301 return nil, err 302 } 303 np := make(map[int]*nomadPid, len(pids)) 304 for _, pid := range pids { 305 np[pid] = &nomadPid{ 306 pid: pid, 307 cpuStatsTotal: stats.NewCpuStats(), 308 cpuStatsSys: stats.NewCpuStats(), 309 cpuStatsUser: stats.NewCpuStats(), 310 } 311 } 312 return np, nil 313 } 314 allProcesses, err := ps.Processes() 315 if err != nil { 316 return nil, err 317 } 318 return e.scanPids(os.Getpid(), allProcesses) 319 } 320 321 // destroyCgroup kills all processes in the cgroup and removes the cgroup 322 // configuration from the host. This function is idempotent. 323 func DestroyCgroup(groups *cgroupConfig.Cgroup, cgPaths map[string]string, executorPid int) error { 324 mErrs := new(multierror.Error) 325 if groups == nil { 326 return fmt.Errorf("Can't destroy: cgroup configuration empty") 327 } 328 329 // Move the executor into the global cgroup so that the task specific 330 // cgroup can be destroyed. 331 nilGroup := &cgroupConfig.Cgroup{} 332 nilGroup.Path = "/" 333 nilGroup.Resources = groups.Resources 334 nilManager := getCgroupManager(nilGroup, nil) 335 err := nilManager.Apply(executorPid) 336 if err != nil && !strings.Contains(err.Error(), "no such process") { 337 return fmt.Errorf("failed to remove executor pid %d: %v", executorPid, err) 338 } 339 340 // Freeze the Cgroup so that it can not continue to fork/exec. 341 manager := getCgroupManager(groups, cgPaths) 342 err = manager.Freeze(cgroupConfig.Frozen) 343 if err != nil && !strings.Contains(err.Error(), "no such file or directory") { 344 return fmt.Errorf("failed to freeze cgroup: %v", err) 345 } 346 347 var procs []*os.Process 348 pids, err := manager.GetAllPids() 349 if err != nil { 350 multierror.Append(mErrs, fmt.Errorf("error getting pids: %v", err)) 351 352 // Unfreeze the cgroup. 353 err = manager.Freeze(cgroupConfig.Thawed) 354 if err != nil && !strings.Contains(err.Error(), "no such file or directory") { 355 multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err)) 356 } 357 return mErrs.ErrorOrNil() 358 } 359 360 // Kill the processes in the cgroup 361 for _, pid := range pids { 362 proc, err := os.FindProcess(pid) 363 if err != nil { 364 multierror.Append(mErrs, fmt.Errorf("error finding process %v: %v", pid, err)) 365 continue 366 } 367 368 procs = append(procs, proc) 369 if e := proc.Kill(); e != nil { 370 multierror.Append(mErrs, fmt.Errorf("error killing process %v: %v", pid, e)) 371 } 372 } 373 374 // Unfreeze the cgroug so we can wait. 375 err = manager.Freeze(cgroupConfig.Thawed) 376 if err != nil && !strings.Contains(err.Error(), "no such file or directory") { 377 multierror.Append(mErrs, fmt.Errorf("failed to unfreeze cgroup: %v", err)) 378 } 379 380 // Wait on the killed processes to ensure they are cleaned up. 381 for _, proc := range procs { 382 // Don't capture the error because we expect this to fail for 383 // processes we didn't fork. 384 proc.Wait() 385 } 386 387 // Clear the cgroups paths so that everything is properly cleaned 388 manager.Cgroups.Paths = nil 389 390 // Remove the cgroup. 391 if err := manager.Destroy(); err != nil { 392 multierror.Append(mErrs, fmt.Errorf("failed to delete the cgroup directories: %v", err)) 393 } 394 return mErrs.ErrorOrNil() 395 } 396 397 // getCgroupManager returns the correct libcontainer cgroup manager. 398 func getCgroupManager(groups *cgroupConfig.Cgroup, paths map[string]string) *cgroupFs.Manager { 399 return &cgroupFs.Manager{Cgroups: groups, Paths: paths} 400 }