github.com/rkt/rkt@v1.30.1-0.20200224141603-171c416fac02/stage1/init/common/pod.go (about) 1 // Copyright 2014 The rkt Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //+build linux 16 17 package common 18 19 import ( 20 "errors" 21 "fmt" 22 "io" 23 "io/ioutil" 24 "os" 25 "path" 26 "path/filepath" 27 "regexp" 28 "strconv" 29 "strings" 30 "syscall" 31 32 "github.com/rkt/rkt/pkg/acl" 33 stage1commontypes "github.com/rkt/rkt/stage1/common/types" 34 35 "github.com/appc/spec/schema" 36 "github.com/appc/spec/schema/types" 37 "github.com/coreos/go-systemd/unit" 38 "github.com/hashicorp/errwrap" 39 40 "github.com/rkt/rkt/common" 41 "github.com/rkt/rkt/pkg/fileutil" 42 "github.com/rkt/rkt/pkg/user" 43 ) 44 45 const ( 46 // FlavorFile names the file storing the pod's flavor 47 FlavorFile = "flavor" 48 ) 49 50 // execEscape uses Golang's string quoting for ", \, \n, and regex for special cases 51 func execEscape(i int, str string) string { 52 escapeMap := map[string]string{ 53 `'`: `\`, 54 } 55 56 if i > 0 { // These are escaped only after the first argument 57 escapeMap[`$`] = `$` 58 escapeMap[`%`] = `%` 59 } 60 61 escArg := fmt.Sprintf("%q", str) 62 for k := range escapeMap { 63 reStr := `([` + regexp.QuoteMeta(k) + `])` 64 re := regexp.MustCompile(reStr) 65 escArg = re.ReplaceAllStringFunc(escArg, func(s string) string { 66 escaped := escapeMap[s] + s 67 return escaped 68 }) 69 } 70 return escArg 71 } 72 73 // quoteExec returns an array of quoted strings appropriate for systemd execStart usage 74 func quoteExec(exec []string) string { 75 if len(exec) == 0 { 76 // existing callers always include at least the binary so this shouldn't occur. 77 panic("empty exec") 78 } 79 80 var qexec []string 81 for i, arg := range exec { 82 escArg := execEscape(i, arg) 83 qexec = append(qexec, escArg) 84 } 85 return strings.Join(qexec, " ") 86 } 87 88 func writeAppReaper(p *stage1commontypes.Pod, appName string, appRootDirectory string, binPath string) error { 89 opts := []*unit.UnitOption{ 90 unit.NewUnitOption("Unit", "Description", fmt.Sprintf("%s Reaper", appName)), 91 unit.NewUnitOption("Unit", "DefaultDependencies", "false"), 92 unit.NewUnitOption("Unit", "StopWhenUnneeded", "yes"), 93 unit.NewUnitOption("Unit", "Wants", "shutdown.service"), 94 unit.NewUnitOption("Unit", "After", "shutdown.service"), 95 unit.NewUnitOption("Unit", "Conflicts", "exit.target"), 96 unit.NewUnitOption("Unit", "Conflicts", "halt.target"), 97 unit.NewUnitOption("Unit", "Conflicts", "poweroff.target"), 98 unit.NewUnitOption("Service", "RemainAfterExit", "yes"), 99 unit.NewUnitOption("Service", "ExecStop", fmt.Sprintf("/reaper.sh \"%s\" \"%s\" \"%s\"", appName, appRootDirectory, binPath)), 100 } 101 102 unitsPath := filepath.Join(common.Stage1RootfsPath(p.Root), UnitsDir) 103 file, err := os.OpenFile(filepath.Join(unitsPath, fmt.Sprintf("reaper-%s.service", appName)), os.O_WRONLY|os.O_CREATE, 0644) 104 if err != nil { 105 return errwrap.Wrap(errors.New("failed to create service unit file"), err) 106 } 107 defer file.Close() 108 109 if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { 110 return errwrap.Wrap(errors.New("failed to write service unit file"), err) 111 } 112 113 return nil 114 } 115 116 // SetJournalPermissions sets ACLs and permissions so the rkt group can access 117 // the pod's logs 118 func SetJournalPermissions(p *stage1commontypes.Pod) error { 119 s1 := common.Stage1ImagePath(p.Root) 120 121 rktgid, err := common.LookupGid(common.RktGroup) 122 if err != nil { 123 return fmt.Errorf("group %q not found", common.RktGroup) 124 } 125 126 journalPath := filepath.Join(s1, "rootfs", "var", "log", "journal") 127 if err := os.MkdirAll(journalPath, os.FileMode(0755)); err != nil { 128 return errwrap.Wrap(errors.New("error creating journal dir"), err) 129 } 130 131 a, err := acl.InitACL() 132 if err != nil { 133 return err 134 } 135 defer a.Free() 136 137 if err := a.ParseACL(fmt.Sprintf("g:%d:r-x,m:r-x", rktgid)); err != nil { 138 return errwrap.Wrap(errors.New("error parsing ACL string"), err) 139 } 140 141 if err := a.AddBaseEntries(journalPath); err != nil { 142 return errwrap.Wrap(errors.New("error adding base ACL entries"), err) 143 } 144 145 if err := a.Valid(); err != nil { 146 return err 147 } 148 149 if err := a.SetFileACLDefault(journalPath); err != nil { 150 return errwrap.Wrap(fmt.Errorf("error setting default ACLs on %q", journalPath), err) 151 } 152 153 return nil 154 } 155 156 func generateGidArg(gid int, supplGid []int) string { 157 arg := []string{strconv.Itoa(gid)} 158 for _, sg := range supplGid { 159 arg = append(arg, strconv.Itoa(sg)) 160 } 161 return strings.Join(arg, ",") 162 } 163 164 // findHostPort returns the port number on the host that corresponds to an 165 // image manifest port identified by name 166 func findHostPort(pm schema.PodManifest, name types.ACName) uint { 167 var port uint 168 for _, p := range pm.Ports { 169 if p.Name == name { 170 port = p.HostPort 171 } 172 } 173 return port 174 } 175 176 // generateSysusers generates systemd sysusers files for a given app so that 177 // corresponding entries in /etc/passwd and /etc/group are created in stage1. 178 // This is needed to use the "User=" and "Group=" options in the systemd 179 // service files of apps. 180 // If there're several apps defining the same UIDs/GIDs, systemd will take care 181 // of only generating one /etc/{passwd,group} entry 182 func generateSysusers(p *stage1commontypes.Pod, ra *schema.RuntimeApp, uid_ int, gid_ int, uidRange *user.UidRange) error { 183 var toShift []string 184 185 app := ra.App 186 appName := ra.Name 187 188 sysusersDir := path.Join(common.Stage1RootfsPath(p.Root), "usr/lib/sysusers.d") 189 toShift = append(toShift, sysusersDir) 190 if err := os.MkdirAll(sysusersDir, 0755); err != nil { 191 return err 192 } 193 194 gids := append(app.SupplementaryGIDs, gid_) 195 196 // Create the Unix user and group 197 var sysusersConf []string 198 199 for _, g := range gids { 200 groupname := "gen" + strconv.Itoa(g) 201 sysusersConf = append(sysusersConf, fmt.Sprintf("g %s %d\n", groupname, g)) 202 } 203 204 username := "gen" + strconv.Itoa(uid_) 205 sysusersConf = append(sysusersConf, fmt.Sprintf("u %s %d \"%s\"\n", username, uid_, username)) 206 207 sysusersFile := path.Join(common.Stage1RootfsPath(p.Root), "usr/lib/sysusers.d", ServiceUnitName(appName)+".conf") 208 toShift = append(toShift, sysusersFile) 209 if err := ioutil.WriteFile(sysusersFile, []byte(strings.Join(sysusersConf, "\n")), 0640); err != nil { 210 return err 211 } 212 213 if err := user.ShiftFiles(toShift, uidRange); err != nil { 214 return err 215 } 216 217 return nil 218 } 219 220 // lookupPathInsideApp returns the path (relative to the app rootfs) of the 221 // given binary. It will look up on "paths" (also relative to the app rootfs) 222 // and evaluate possible symlinks to check if the resulting path is actually 223 // executable. 224 func lookupPathInsideApp(bin string, paths string, appRootfs string, workDir string) (string, error) { 225 pathsArr := filepath.SplitList(paths) 226 var appPathsArr []string 227 for _, p := range pathsArr { 228 if !filepath.IsAbs(p) { 229 p = filepath.Join(workDir, p) 230 } 231 appPathsArr = append(appPathsArr, filepath.Join(appRootfs, p)) 232 } 233 for _, path := range appPathsArr { 234 binPath := filepath.Join(path, bin) 235 stage2Path := strings.TrimPrefix(binPath, appRootfs) 236 binRealPath, err := EvaluateSymlinksInsideApp(appRootfs, stage2Path) 237 if err != nil { 238 return "", errwrap.Wrap(fmt.Errorf("could not evaluate path %v", stage2Path), err) 239 } 240 binRealPath = filepath.Join(appRootfs, binRealPath) 241 if fileutil.IsExecutable(binRealPath) { 242 // The real path is executable, return the path relative to the app 243 return stage2Path, nil 244 } 245 } 246 return "", fmt.Errorf("unable to find %q in %q", bin, paths) 247 } 248 249 // appSearchPaths returns a list of paths where we should search for 250 // non-absolute exec binaries 251 func appSearchPaths(p *stage1commontypes.Pod, workDir string, app types.App) []string { 252 appEnv := app.Environment 253 254 if imgPath, ok := appEnv.Get("PATH"); ok { 255 return strings.Split(imgPath, ":") 256 } 257 258 // emulate exec(3) behavior, first check working directory and then the 259 // list of directories returned by confstr(_CS_PATH). That's typically 260 // "/bin:/usr/bin" so let's use that. 261 return []string{workDir, "/bin", "/usr/bin"} 262 } 263 264 // FindBinPath takes a binary path and returns a the absolute path of the 265 // binary relative to the app rootfs. This can be passed to ExecStart on the 266 // app's systemd service file directly. 267 func FindBinPath(p *stage1commontypes.Pod, ra *schema.RuntimeApp) (string, error) { 268 if len(ra.App.Exec) == 0 { 269 return "", errors.New("app has no executable") 270 } 271 272 bin := ra.App.Exec[0] 273 274 var binPath string 275 switch { 276 // absolute path, just use it 277 case filepath.IsAbs(bin): 278 binPath = bin 279 // non-absolute path containing a slash, look in the working dir 280 case strings.Contains(bin, "/"): 281 binPath = filepath.Join(ra.App.WorkingDirectory, bin) 282 // filename, search in the app's $PATH 283 default: 284 absRoot, err := filepath.Abs(p.Root) 285 if err != nil { 286 return "", errwrap.Wrap(errors.New("could not get pod's root absolute path"), err) 287 } 288 appRootfs := common.AppRootfsPath(absRoot, ra.Name) 289 appPathDirs := appSearchPaths(p, ra.App.WorkingDirectory, *ra.App) 290 appPath := strings.Join(appPathDirs, ":") 291 292 binPath, err = lookupPathInsideApp(bin, appPath, appRootfs, ra.App.WorkingDirectory) 293 if err != nil { 294 return "", errwrap.Wrap(fmt.Errorf("error looking up %q", bin), err) 295 } 296 } 297 298 return binPath, nil 299 } 300 301 // generateDeviceAllows generates a DeviceAllow= line for an app. 302 // To make it work, the path needs to start with "/dev" but the device won't 303 // exist inside the container. So for a given mount, if the volume is a device 304 // node, we create a symlink to its target in "/rkt/volumes". Later, 305 // prepare-app will copy those to "/dev/.rkt/" so that's what we use in the 306 // DeviceAllow= line. 307 func generateDeviceAllows(root string, appName types.ACName, mountPoints []types.MountPoint, mounts []Mount, uidRange *user.UidRange) ([]string, error) { 308 var devAllow []string 309 310 rktVolumeLinksPath := filepath.Join(root, "rkt", "volumes") 311 if err := os.MkdirAll(rktVolumeLinksPath, 0600); err != nil { 312 return nil, err 313 } 314 if err := user.ShiftFiles([]string{rktVolumeLinksPath}, uidRange); err != nil { 315 return nil, err 316 } 317 318 for _, m := range mounts { 319 if m.Volume.Kind != "host" { 320 continue 321 } 322 if fileutil.IsDeviceNode(m.Volume.Source) { 323 mode := "r" 324 if !m.ReadOnly { 325 mode += "w" 326 } 327 328 tgt := filepath.Join(common.RelAppRootfsPath(appName), m.Mount.Path) 329 // the DeviceAllow= line needs the link path in /dev/.rkt/ 330 linkRel := filepath.Join("/dev/.rkt", m.Volume.Name.String()) 331 // the real link should be in /rkt/volumes for now 332 link := filepath.Join(rktVolumeLinksPath, m.Volume.Name.String()) 333 334 err := os.Symlink(tgt, link) 335 // if the link already exists, we don't need to do anything 336 if err != nil && !os.IsExist(err) { 337 return nil, err 338 } 339 340 devAllow = append(devAllow, linkRel+" "+mode) 341 } 342 } 343 344 return devAllow, nil 345 } 346 347 // supportsNotify returns true if in the image manifest appc.io/executor/supports-systemd-notify is set to true 348 func supportsNotify(p *stage1commontypes.Pod, appName string) bool { 349 appImg := p.Images[appName] 350 if appImg == nil { 351 return false 352 } 353 supportNotifyAnnotation, ok := appImg.Annotations.Get("appc.io/executor/supports-systemd-notify") 354 supportNotify, err := strconv.ParseBool(supportNotifyAnnotation) 355 if ok && supportNotify && err == nil { 356 return true 357 } 358 return false 359 } 360 361 // ParseUserGroup parses the User and Group fields of an App and returns its 362 // UID and GID. 363 // The User and Group fields accept several formats: 364 // 1. the hardcoded string "root" 365 // 2. a path 366 // 3. a number 367 // 4. a name in reference to /etc/{group,passwd} in the image 368 // See https://github.com/appc/spec/blob/master/spec/aci.md#image-manifest-schema 369 func ParseUserGroup(p *stage1commontypes.Pod, ra *schema.RuntimeApp) (int, int, error) { 370 var uidResolver, gidResolver user.Resolver 371 var uid, gid int 372 var err error 373 374 root := common.AppRootfsPath(p.Root, ra.Name) 375 376 uidResolver, err = user.NumericIDs(ra.App.User) 377 if err != nil { 378 uidResolver, err = user.IDsFromStat(root, ra.App.User, &p.UidRange) 379 } 380 381 if err != nil { 382 uidResolver, err = user.IDsFromEtc(root, ra.App.User, "") 383 } 384 385 if err != nil { // give up 386 return -1, -1, errwrap.Wrap(fmt.Errorf("invalid user %q", ra.App.User), err) 387 } 388 389 if uid, _, err = uidResolver.IDs(); err != nil { 390 return -1, -1, errwrap.Wrap(fmt.Errorf("failed to configure user %q", ra.App.User), err) 391 } 392 393 gidResolver, err = user.NumericIDs(ra.App.Group) 394 if err != nil { 395 gidResolver, err = user.IDsFromStat(root, ra.App.Group, &p.UidRange) 396 } 397 398 if err != nil { 399 gidResolver, err = user.IDsFromEtc(root, "", ra.App.Group) 400 } 401 402 if err != nil { // give up 403 return -1, -1, errwrap.Wrap(fmt.Errorf("invalid group %q", ra.App.Group), err) 404 } 405 406 if _, gid, err = gidResolver.IDs(); err != nil { 407 // If we can't resolve the GID, it might be an image converted from 408 // docker. 409 // 410 // Docker uses the UID as GID if you only specify the "user". In that 411 // case, docker2aci sets the group name to the user name because the 412 // appc spec requires both user and group to be set. This will fail 413 // because that group name won't be found in /etc/group. Let's detect 414 // if the image was converted from docker and set the GID to the UID in 415 // that case. 416 // 417 // We only do this if the group in RuntimeApp is the same as the one in 418 // the image, otherwise we trust that the user knows what they're 419 // doing. 420 img := p.Images[ra.Name.String()] 421 if ConvertedFromDocker(img) && img.App.User == img.App.Group && ra.App.Group == img.App.Group { 422 gid = uid 423 } else { 424 return -1, -1, errwrap.Wrap(fmt.Errorf("failed to configure group %q", ra.App.Group), err) 425 } 426 } 427 428 return uid, gid, nil 429 } 430 431 // EvaluateSymlinksInsideApp tries to resolve symlinks within the path. 432 // It returns the actual path relative to the app rootfs for the given path. 433 // This is needed for absolute symlinks - we are in a different rootfs. 434 func EvaluateSymlinksInsideApp(appRootfs, path string) (string, error) { 435 chroot, err := newChroot(appRootfs) 436 if err != nil { 437 return "", errwrap.Wrapf(fmt.Sprintf("chroot to %q failed", appRootfs), err) 438 } 439 440 target, err := fileutil.EvalSymlinksAlways(path) 441 if err != nil { 442 return "", errwrap.Wrapf(fmt.Sprintf("evaluating symlinks of %q failed", path), err) 443 } 444 445 // EvalSymlinksAlways might return a relative path 446 abs, err := filepath.Abs(target) 447 if err != nil { 448 return "", errwrap.Wrapf(fmt.Sprintf("failed to get absolute representation of %q", target), err) 449 } 450 451 if err := chroot.escape(); err != nil { 452 return "", errwrap.Wrapf(fmt.Sprintf("escaping chroot %q failed", appRootfs), err) 453 } 454 455 return abs, nil 456 } 457 458 // appToNspawnArgs transforms the given app manifest, with the given associated 459 // app name, into a subset of applicable systemd-nspawn argument 460 func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp) ([]string, error) { 461 var args []string 462 appName := ra.Name 463 app := ra.App 464 465 sharedVolPath, err := common.CreateSharedVolumesPath(p.Root) 466 if err != nil { 467 return nil, err 468 } 469 470 vols := make(map[types.ACName]types.Volume) 471 for _, v := range p.Manifest.Volumes { 472 vols[v.Name] = v 473 } 474 475 imageManifest := p.Images[appName.String()] 476 mounts, err := GenerateMounts(ra, p.Manifest.Volumes, ConvertedFromDocker(imageManifest)) 477 if err != nil { 478 return nil, errwrap.Wrap(fmt.Errorf("could not generate app %q mounts", appName), err) 479 } 480 for _, m := range mounts { 481 shPath := filepath.Join(sharedVolPath, m.Volume.Name.String()) 482 483 absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs. 484 if err != nil { 485 return nil, errwrap.Wrap(errors.New("could not get pod's root absolute path"), err) 486 } 487 488 appRootfs := common.AppRootfsPath(absRoot, appName) 489 490 // Evaluate symlinks within the app's rootfs. This is needed because symlinks 491 // within the container can be absolute, which will, of course, be wrong in our ns. 492 // Systemd also gets this wrong, see https://github.com/systemd/systemd/issues/2860 493 // When the above issue is fixed, we can pass the un-evaluated path to --bind instead. 494 mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Mount.Path) 495 if err != nil { 496 return nil, errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Mount.Path), err) 497 } 498 mntAbsPath := filepath.Join(appRootfs, mntPath) 499 500 if err := PrepareMountpoints(shPath, mntAbsPath, &m.Volume, m.DockerImplicit); err != nil { 501 return nil, err 502 } 503 504 opt := make([]string, 6) 505 506 if m.ReadOnly { 507 opt[0] = "--bind-ro=" 508 } else { 509 opt[0] = "--bind=" 510 } 511 512 opt[1] = m.Source(absRoot) 513 opt[2] = ":" 514 opt[3] = filepath.Join(common.RelAppRootfsPath(appName), mntPath) 515 opt[4] = ":" 516 517 // If Recursive is not set, default to recursive. 518 recursive := true 519 if m.Volume.Recursive != nil { 520 recursive = *m.Volume.Recursive 521 } 522 523 // rbind/norbind options exist since systemd-nspawn v226 524 if recursive { 525 opt[5] = "rbind" 526 } else { 527 opt[5] = "norbind" 528 } 529 args = append(args, strings.Join(opt, "")) 530 } 531 532 if !p.InsecureOptions.DisableCapabilities { 533 capabilitiesStr, err := getAppCapabilities(app.Isolators) 534 if err != nil { 535 return nil, err 536 } 537 capList := strings.Join(capabilitiesStr, ",") 538 args = append(args, "--capability="+capList) 539 } 540 541 return args, nil 542 } 543 544 // PodToNspawnArgs renders a prepared Pod as a systemd-nspawn 545 // argument list ready to be executed 546 func PodToNspawnArgs(p *stage1commontypes.Pod) ([]string, error) { 547 args := []string{ 548 "--uuid=" + p.UUID.String(), 549 "--machine=" + GetMachineID(p), 550 "--directory=" + common.Stage1RootfsPath(p.Root), 551 } 552 553 for i := range p.Manifest.Apps { 554 aa, err := appToNspawnArgs(p, &p.Manifest.Apps[i]) 555 if err != nil { 556 return nil, err 557 } 558 args = append(args, aa...) 559 } 560 561 if p.InsecureOptions.DisableCapabilities { 562 args = append(args, "--capability=all") 563 } 564 565 return args, nil 566 } 567 568 // GetFlavor populates a flavor string based on the flavor itself and respectively the systemd version 569 // If the systemd version couldn't be guessed, it will be set to 0. 570 func GetFlavor(p *stage1commontypes.Pod) (flavor string, systemdVersion int, err error) { 571 flavor, err = os.Readlink(filepath.Join(common.Stage1RootfsPath(p.Root), "flavor")) 572 if err != nil { 573 return "", -1, errwrap.Wrap(errors.New("unable to determine stage1 flavor"), err) 574 } 575 576 if flavor == "host" { 577 // This flavor does not contain systemd, parse "systemctl --version" 578 systemctlBin, err := common.LookupPath("systemctl", os.Getenv("PATH")) 579 if err != nil { 580 return "", -1, err 581 } 582 583 systemdVersion, err := common.SystemdVersion(systemctlBin) 584 if err != nil { 585 return "", -1, errwrap.Wrap(errors.New("error finding systemctl version"), err) 586 } 587 588 return flavor, systemdVersion, nil 589 } 590 591 systemdVersionBytes, err := ioutil.ReadFile(filepath.Join(common.Stage1RootfsPath(p.Root), "systemd-version")) 592 if err != nil { 593 return "", -1, errwrap.Wrap(errors.New("unable to determine stage1's systemd version"), err) 594 } 595 systemdVersionString := strings.Trim(string(systemdVersionBytes), " \n") 596 597 // systemdVersionString is either a tag name or a branch name. If it's a 598 // tag name it's of the form "v229", remove the first character to get the 599 // number. 600 systemdVersion, err = strconv.Atoi(systemdVersionString[1:]) 601 if err != nil { 602 // If we get a syntax error, it means the parsing of the version string 603 // of the form "v229" failed, set it to 0 to indicate we couldn't guess 604 // it. 605 if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrSyntax { 606 systemdVersion = 0 607 } else { 608 return "", -1, errwrap.Wrap(errors.New("error parsing stage1's systemd version"), err) 609 } 610 } 611 return flavor, systemdVersion, nil 612 } 613 614 // GetAppHashes returns a list of hashes of the apps in this pod 615 func GetAppHashes(p *stage1commontypes.Pod) []types.Hash { 616 var names []types.Hash 617 for _, a := range p.Manifest.Apps { 618 names = append(names, a.Image.ID) 619 } 620 621 return names 622 } 623 624 // GetMachineID returns the machine id string of the pod to be passed to 625 // systemd-nspawn 626 func GetMachineID(p *stage1commontypes.Pod) string { 627 return "rkt-" + p.UUID.String() 628 } 629 630 // getAppCapabilities computes the set of Linux capabilities that an app 631 // should have based on its isolators. Only the following capabalities matter: 632 // - os/linux/capabilities-retain-set 633 // - os/linux/capabilities-remove-set 634 // 635 // The resulting capabilities are generated following the rules from the spec: 636 // See: https://github.com/appc/spec/blob/master/spec/ace.md#linux-isolators 637 func getAppCapabilities(isolators types.Isolators) ([]string, error) { 638 var capsToRetain []string 639 var capsToRemove []string 640 641 // Default caps defined in 642 // https://github.com/appc/spec/blob/master/spec/ace.md#linux-isolators 643 appDefaultCapabilities := []string{ 644 "CAP_AUDIT_WRITE", 645 "CAP_CHOWN", 646 "CAP_DAC_OVERRIDE", 647 "CAP_FSETID", 648 "CAP_FOWNER", 649 "CAP_KILL", 650 "CAP_MKNOD", 651 "CAP_NET_RAW", 652 "CAP_NET_BIND_SERVICE", 653 "CAP_SETUID", 654 "CAP_SETGID", 655 "CAP_SETPCAP", 656 "CAP_SETFCAP", 657 "CAP_SYS_CHROOT", 658 } 659 660 // Iterate over the isolators defined in 661 // https://github.com/appc/spec/blob/master/spec/ace.md#linux-isolators 662 // Only read the capababilities isolators: 663 // - os/linux/capabilities-retain-set 664 // - os/linux/capabilities-remove-set 665 for _, isolator := range isolators { 666 if capSet, ok := isolator.Value().(types.LinuxCapabilitiesSet); ok { 667 switch isolator.Name { 668 case types.LinuxCapabilitiesRetainSetName: 669 capsToRetain = append(capsToRetain, parseLinuxCapabilitiesSet(capSet)...) 670 case types.LinuxCapabilitiesRevokeSetName: 671 capsToRemove = append(capsToRemove, parseLinuxCapabilitiesSet(capSet)...) 672 } 673 } 674 } 675 676 // appc/spec does not allow to have both the retain set and the remove 677 // set defined. 678 if len(capsToRetain) > 0 && len(capsToRemove) > 0 { 679 return nil, errors.New("cannot have both os/linux/capabilities-retain-set and os/linux/capabilities-remove-set") 680 } 681 682 // Neither the retain set or the remove set are defined 683 if len(capsToRetain) == 0 && len(capsToRemove) == 0 { 684 return appDefaultCapabilities, nil 685 } 686 687 if len(capsToRetain) > 0 { 688 return capsToRetain, nil 689 } 690 691 if len(capsToRemove) == 0 { 692 panic("len(capsToRetain) is negative. This cannot happen.") 693 } 694 695 caps := appDefaultCapabilities 696 for _, rc := range capsToRemove { 697 // backward loop to be safe against deletion 698 for i := len(caps) - 1; i >= 0; i-- { 699 if caps[i] == rc { 700 caps = append(caps[:i], caps[i+1:]...) 701 } 702 } 703 } 704 return caps, nil 705 } 706 707 // parseLinuxCapabilitySet parses a LinuxCapabilitiesSet into string slice 708 func parseLinuxCapabilitiesSet(capSet types.LinuxCapabilitiesSet) []string { 709 var capsStr []string 710 for _, cap := range capSet.Set() { 711 capsStr = append(capsStr, string(cap)) 712 } 713 return capsStr 714 } 715 716 func getAppNoNewPrivileges(isolators types.Isolators) bool { 717 for _, isolator := range isolators { 718 noNewPrivileges, ok := isolator.Value().(*types.LinuxNoNewPrivileges) 719 720 if ok && bool(*noNewPrivileges) { 721 return true 722 } 723 } 724 725 return false 726 } 727 728 // chroot is the struct that represents a chroot environment 729 type chroot struct { 730 wd string // the working directory in the outer root 731 root *os.File // the outer root directory 732 } 733 734 // newChroot creates a new chroot environment for the given path. 735 // Unless the caller calls Escape() all system operations will be invoked in that environment. 736 // It stores the working directory at the point it was invoked. 737 func newChroot(path string) (*chroot, error) { 738 var err error 739 var c chroot 740 741 c.wd, err = os.Getwd() 742 if err != nil { 743 return nil, errwrap.Wrapf("getwd before chroot failed", err) 744 } 745 746 c.root, err = os.Open("/") 747 if err != nil { 748 return nil, errwrap.Wrapf("error opening outer root", err) 749 } 750 751 if err := syscall.Chroot(path); err != nil { 752 return nil, errwrap.Wrapf("chroot to "+path+" failed", err) 753 } 754 755 if err := os.Chdir("/"); err != nil { 756 return nil, errwrap.Wrapf("chdir to \"/\" failed", err) 757 } 758 759 return &c, nil 760 } 761 762 // Escape escapes the chroot environment changing back to the original working directory where newChroot was invoked. 763 func (c *chroot) escape() error { 764 // change directory to outer root and close it 765 if err := syscall.Fchdir(int(c.root.Fd())); err != nil { 766 return errwrap.Wrapf("changing directory to outer root failed", err) 767 } 768 769 if err := c.root.Close(); err != nil { 770 return errwrap.Wrapf("closing outer root failed", err) 771 } 772 773 // chroot to current directory aka "." being the outer root 774 if err := syscall.Chroot("."); err != nil { 775 return errwrap.Wrapf("chroot to current directory failed", err) 776 } 777 778 // chdir into previous working directory 779 if err := os.Chdir(c.wd); err != nil { 780 return errwrap.Wrapf("chdir to working directory failed", err) 781 } 782 783 return nil 784 }