github.com/blixtra/rkt@v0.8.1-0.20160204105720-ab0d1add1a43/stage1/init/init.go (about) 1 // Copyright 2014 The rkt Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //+build linux 16 17 package main 18 19 // this implements /init of stage1/nspawn+systemd 20 21 import ( 22 "errors" 23 "flag" 24 "fmt" 25 "io" 26 "io/ioutil" 27 "net" 28 "os" 29 "os/exec" 30 "path/filepath" 31 "runtime" 32 "strconv" 33 "strings" 34 "syscall" 35 36 "github.com/appc/goaci/proj2aci" 37 "github.com/appc/spec/schema/types" 38 "github.com/coreos/go-systemd/util" 39 "github.com/godbus/dbus" 40 "github.com/godbus/dbus/introspect" 41 "github.com/hashicorp/errwrap" 42 43 stage1common "github.com/coreos/rkt/stage1/common" 44 stage1commontypes "github.com/coreos/rkt/stage1/common/types" 45 stage1initcommon "github.com/coreos/rkt/stage1/init/common" 46 47 "github.com/coreos/rkt/common" 48 "github.com/coreos/rkt/common/cgroup" 49 "github.com/coreos/rkt/networking" 50 rktlog "github.com/coreos/rkt/pkg/log" 51 "github.com/coreos/rkt/pkg/sys" 52 "github.com/coreos/rkt/stage1/init/kvm" 53 ) 54 55 const ( 56 // Path to systemd-nspawn binary within the stage1 rootfs 57 nspawnBin = "/usr/bin/systemd-nspawn" 58 // Path to the interpreter within the stage1 rootfs 59 interpBin = "/usr/lib/ld-linux-x86-64.so.2" 60 // Path to the localtime file/symlink in host 61 localtimePath = "/etc/localtime" 62 ) 63 64 // mirrorLocalZoneInfo tries to reproduce the /etc/localtime target in stage1/ to satisfy systemd-nspawn 65 func mirrorLocalZoneInfo(root string) { 66 zif, err := os.Readlink(localtimePath) 67 if err != nil { 68 return 69 } 70 71 // On some systems /etc/localtime is a relative symlink, make it absolute 72 if !filepath.IsAbs(zif) { 73 zif = filepath.Join(filepath.Dir(localtimePath), zif) 74 zif = filepath.Clean(zif) 75 } 76 77 src, err := os.Open(zif) 78 if err != nil { 79 return 80 } 81 defer src.Close() 82 83 destp := filepath.Join(common.Stage1RootfsPath(root), zif) 84 85 if err = os.MkdirAll(filepath.Dir(destp), 0755); err != nil { 86 return 87 } 88 89 dest, err := os.OpenFile(destp, os.O_CREATE|os.O_WRONLY, 0644) 90 if err != nil { 91 return 92 } 93 defer dest.Close() 94 95 _, _ = io.Copy(dest, src) 96 } 97 98 var ( 99 debug bool 100 netList common.NetList 101 interactive bool 102 privateUsers string 103 mdsToken string 104 localhostIP net.IP 105 localConfig string 106 log *rktlog.Logger 107 diag *rktlog.Logger 108 ) 109 110 func init() { 111 flag.BoolVar(&debug, "debug", false, "Run in debug mode") 112 flag.Var(&netList, "net", "Setup networking") 113 flag.BoolVar(&interactive, "interactive", false, "The pod is interactive") 114 flag.StringVar(&privateUsers, "private-users", "", "Run within user namespace. Can be set to [=UIDBASE[:NUIDS]]") 115 flag.StringVar(&mdsToken, "mds-token", "", "MDS auth token") 116 flag.StringVar(&localConfig, "local-config", common.DefaultLocalConfigDir, "Local config path") 117 // this ensures that main runs only on main thread (thread group leader). 118 // since namespace ops (unshare, setns) are done for a single thread, we 119 // must ensure that the goroutine does not jump from OS thread to thread 120 runtime.LockOSThread() 121 122 localhostIP = net.ParseIP("127.0.0.1") 123 if localhostIP == nil { 124 panic("localhost IP failed to parse") 125 } 126 } 127 128 // machinedRegister checks if nspawn should register the pod to machined 129 func machinedRegister() bool { 130 // machined has a D-Bus interface following versioning guidelines, see: 131 // http://www.freedesktop.org/wiki/Software/systemd/machined/ 132 // Therefore we can just check if the D-Bus method we need exists and we 133 // don't need to check the signature. 134 var found int 135 136 conn, err := dbus.SystemBus() 137 if err != nil { 138 return false 139 } 140 node, err := introspect.Call(conn.Object("org.freedesktop.machine1", "/org/freedesktop/machine1")) 141 if err != nil { 142 return false 143 } 144 for _, iface := range node.Interfaces { 145 if iface.Name != "org.freedesktop.machine1.Manager" { 146 continue 147 } 148 // machined v215 supports methods "RegisterMachine" and "CreateMachine" called by nspawn v215. 149 // machined v216+ (since commit 5aa4bb) additionally supports methods "CreateMachineWithNetwork" 150 // and "RegisterMachineWithNetwork", called by nspawn v216+. 151 for _, method := range iface.Methods { 152 if method.Name == "CreateMachineWithNetwork" || method.Name == "RegisterMachineWithNetwork" { 153 found++ 154 } 155 } 156 break 157 } 158 return found == 2 159 } 160 161 func lookupPath(bin string, paths string) (string, error) { 162 pathsArr := filepath.SplitList(paths) 163 for _, path := range pathsArr { 164 binPath := filepath.Join(path, bin) 165 binAbsPath, err := filepath.Abs(binPath) 166 if err != nil { 167 return "", fmt.Errorf("unable to find absolute path for %s", binPath) 168 } 169 d, err := os.Stat(binAbsPath) 170 if err != nil { 171 continue 172 } 173 // Check the executable bit, inspired by os.exec.LookPath() 174 if m := d.Mode(); !m.IsDir() && m&0111 != 0 { 175 return binAbsPath, nil 176 } 177 } 178 return "", fmt.Errorf("unable to find %q in %q", bin, paths) 179 } 180 181 func installAssets() error { 182 systemctlBin, err := lookupPath("systemctl", os.Getenv("PATH")) 183 if err != nil { 184 return err 185 } 186 bashBin, err := lookupPath("bash", os.Getenv("PATH")) 187 if err != nil { 188 return err 189 } 190 // More paths could be added in that list if some Linux distributions install it in a different path 191 // Note that we look in /usr/lib/... first because of the merge: 192 // http://www.freedesktop.org/wiki/Software/systemd/TheCaseForTheUsrMerge/ 193 systemdShutdownBin, err := lookupPath("systemd-shutdown", "/usr/lib/systemd:/lib/systemd") 194 if err != nil { 195 return err 196 } 197 systemdBin, err := lookupPath("systemd", "/usr/lib/systemd:/lib/systemd") 198 if err != nil { 199 return err 200 } 201 systemdJournaldBin, err := lookupPath("systemd-journald", "/usr/lib/systemd:/lib/systemd") 202 if err != nil { 203 return err 204 } 205 206 systemdUnitsPath := "/lib/systemd/system" 207 assets := []string{ 208 proj2aci.GetAssetString("/usr/lib/systemd/systemd", systemdBin), 209 proj2aci.GetAssetString("/usr/bin/systemctl", systemctlBin), 210 proj2aci.GetAssetString("/usr/lib/systemd/systemd-journald", systemdJournaldBin), 211 proj2aci.GetAssetString("/usr/bin/bash", bashBin), 212 proj2aci.GetAssetString(fmt.Sprintf("%s/systemd-journald.service", systemdUnitsPath), fmt.Sprintf("%s/systemd-journald.service", systemdUnitsPath)), 213 proj2aci.GetAssetString(fmt.Sprintf("%s/systemd-journald.socket", systemdUnitsPath), fmt.Sprintf("%s/systemd-journald.socket", systemdUnitsPath)), 214 proj2aci.GetAssetString(fmt.Sprintf("%s/systemd-journald-dev-log.socket", systemdUnitsPath), fmt.Sprintf("%s/systemd-journald-dev-log.socket", systemdUnitsPath)), 215 proj2aci.GetAssetString(fmt.Sprintf("%s/systemd-journald-audit.socket", systemdUnitsPath), fmt.Sprintf("%s/systemd-journald-audit.socket", systemdUnitsPath)), 216 // systemd-shutdown has to be installed at the same path as on the host 217 // because it depends on systemd build flag -DSYSTEMD_SHUTDOWN_BINARY_PATH= 218 proj2aci.GetAssetString(systemdShutdownBin, systemdShutdownBin), 219 } 220 221 return proj2aci.PrepareAssets(assets, "./stage1/rootfs/", nil) 222 } 223 224 // getArgsEnv returns the nspawn or lkvm args and env according to the flavor used 225 func getArgsEnv(p *stage1commontypes.Pod, flavor string, debug bool, n *networking.Networking) ([]string, []string, error) { 226 var args []string 227 env := os.Environ() 228 229 // We store the pod's flavor so we can later garbage collect it correctly 230 if err := os.Symlink(flavor, filepath.Join(p.Root, stage1initcommon.FlavorFile)); err != nil { 231 return nil, nil, errwrap.Wrap(errors.New("failed to create flavor symlink"), err) 232 } 233 234 switch flavor { 235 case "kvm": 236 if privateUsers != "" { 237 return nil, nil, fmt.Errorf("flag --private-users cannot be used with an lkvm stage1") 238 } 239 240 // kernel and lkvm are relative path, because init has /var/lib/rkt/..../uuid as its working directory 241 // TODO: move to path.go 242 kernelPath := filepath.Join(common.Stage1RootfsPath(p.Root), "bzImage") 243 lkvmPath := filepath.Join(common.Stage1RootfsPath(p.Root), "lkvm") 244 netDescriptions := kvm.GetNetworkDescriptions(n) 245 lkvmNetArgs, err := kvm.GetKVMNetArgs(netDescriptions) 246 if err != nil { 247 return nil, nil, err 248 } 249 250 cpu, mem := kvm.GetAppsResources(p.Manifest.Apps) 251 252 kernelParams := []string{ 253 "console=hvc0", 254 "init=/usr/lib/systemd/systemd", 255 "no_timer_check", 256 "noreplace-smp", 257 "systemd.default_standard_error=journal+console", 258 "systemd.default_standard_output=journal+console", 259 // "systemd.default_standard_output=tty", 260 "tsc=reliable", 261 "MACHINEID=" + p.UUID.String(), 262 } 263 264 if debug { 265 kernelParams = append(kernelParams, []string{ 266 "debug", 267 "systemd.log_level=debug", 268 "systemd.show_status=true", 269 // "systemd.confirm_spawn=true", 270 }...) 271 } else { 272 kernelParams = append(kernelParams, "quiet") 273 } 274 275 args = append(args, []string{ 276 "./" + lkvmPath, // relative path 277 "run", 278 "--name", "rkt-" + p.UUID.String(), 279 "--no-dhcp", // speed bootup 280 "--cpu", strconv.FormatInt(cpu, 10), 281 "--mem", strconv.FormatInt(mem, 10), 282 "--console=virtio", 283 "--kernel", kernelPath, 284 "--disk", "stage1/rootfs", // relative to run/pods/uuid dir this is a place where systemd resides 285 // MACHINEID will be available as environment variable 286 "--params", strings.Join(kernelParams, " "), 287 }..., 288 ) 289 args = append(args, lkvmNetArgs...) 290 291 if debug { 292 args = append(args, "--debug") 293 } 294 295 // host volume sharing with 9p 296 nsargs := stage1initcommon.VolumesToKvmDiskArgs(p.Manifest.Volumes) 297 args = append(args, nsargs...) 298 299 // lkvm requires $HOME to be defined, 300 // see https://github.com/coreos/rkt/issues/1393 301 if os.Getenv("HOME") == "" { 302 env = append(env, "HOME=/root") 303 } 304 305 return args, env, nil 306 307 case "coreos": 308 args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), interpBin)) 309 args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), nspawnBin)) 310 args = append(args, "--boot") // Launch systemd in the pod 311 312 if context := os.Getenv(common.EnvSELinuxContext); context != "" { 313 args = append(args, fmt.Sprintf("-Z%s", context)) 314 } 315 316 if machinedRegister() { 317 args = append(args, fmt.Sprintf("--register=true")) 318 } else { 319 args = append(args, fmt.Sprintf("--register=false")) 320 } 321 322 // use only dynamic libraries provided in the image 323 env = append(env, "LD_LIBRARY_PATH="+filepath.Join(common.Stage1RootfsPath(p.Root), "usr/lib")) 324 325 case "src": 326 args = append(args, filepath.Join(common.Stage1RootfsPath(p.Root), nspawnBin)) 327 args = append(args, "--boot") // Launch systemd in the pod 328 329 if context := os.Getenv(common.EnvSELinuxContext); context != "" { 330 args = append(args, fmt.Sprintf("-Z%s", context)) 331 } 332 333 if machinedRegister() { 334 args = append(args, fmt.Sprintf("--register=true")) 335 } else { 336 args = append(args, fmt.Sprintf("--register=false")) 337 } 338 339 case "host": 340 hostNspawnBin, err := lookupPath("systemd-nspawn", os.Getenv("PATH")) 341 if err != nil { 342 return nil, nil, err 343 } 344 345 // Check dynamically which version is installed on the host 346 // Support version >= 220 347 versionBytes, err := exec.Command(hostNspawnBin, "--version").CombinedOutput() 348 if err != nil { 349 return nil, nil, errwrap.Wrap(fmt.Errorf("unable to probe %s version", hostNspawnBin), err) 350 } 351 versionStr := strings.SplitN(string(versionBytes), "\n", 2)[0] 352 var version int 353 n, err := fmt.Sscanf(versionStr, "systemd %d", &version) 354 if err != nil { 355 return nil, nil, fmt.Errorf("cannot parse version: %q", versionStr) 356 } 357 if n != 1 || version < 220 { 358 return nil, nil, fmt.Errorf("rkt needs systemd-nspawn >= 220. %s version not supported: %v", hostNspawnBin, versionStr) 359 } 360 361 // Copy systemd, bash, etc. in stage1 at run-time 362 if err := installAssets(); err != nil { 363 return nil, nil, errwrap.Wrap(errors.New("cannot install assets from the host"), err) 364 } 365 366 args = append(args, hostNspawnBin) 367 args = append(args, "--boot") // Launch systemd in the pod 368 args = append(args, fmt.Sprintf("--register=true")) 369 370 if context := os.Getenv(common.EnvSELinuxContext); context != "" { 371 args = append(args, fmt.Sprintf("-Z%s", context)) 372 } 373 374 default: 375 return nil, nil, fmt.Errorf("unrecognized stage1 flavor: %q", flavor) 376 } 377 378 // link journal only if the host is running systemd 379 if util.IsRunningSystemd() { 380 // we write /etc/machine-id here because systemd-nspawn needs it to link 381 // the container's journal to the host 382 mPath := filepath.Join(common.Stage1RootfsPath(p.Root), "etc", "machine-id") 383 mID := strings.Replace(p.UUID.String(), "-", "", -1) 384 385 if err := ioutil.WriteFile(mPath, []byte(mID), 0644); err != nil { 386 log.FatalE("error writing /etc/machine-id", err) 387 } 388 389 args = append(args, "--link-journal=try-guest") 390 391 keepUnit, err := util.RunningFromSystemService() 392 if err != nil { 393 if err == util.ErrSoNotFound { 394 diag.Print("warning: libsystemd not found even though systemd is running. Cgroup limits set by the environment (e.g. a systemd service) won't be enforced.") 395 } else { 396 return nil, nil, errwrap.Wrap(errors.New("error determining if we're running from a system service"), err) 397 } 398 } 399 400 if keepUnit { 401 args = append(args, "--keep-unit") 402 } 403 } 404 405 if !debug { 406 args = append(args, "--quiet") // silence most nspawn output (log_warning is currently not covered by this) 407 env = append(env, "SYSTEMD_LOG_LEVEL=err") // silence log_warning too 408 } 409 410 env = append(env, "SYSTEMD_NSPAWN_CONTAINER_SERVICE=rkt") 411 412 if len(privateUsers) > 0 { 413 args = append(args, "--private-users="+privateUsers) 414 } 415 416 nsargs, err := stage1initcommon.PodToNspawnArgs(p) 417 if err != nil { 418 return nil, nil, errwrap.Wrap(errors.New("failed to generate nspawn args"), err) 419 } 420 args = append(args, nsargs...) 421 422 // Arguments to systemd 423 args = append(args, "--") 424 args = append(args, "--default-standard-output=tty") // redirect all service logs straight to tty 425 if !debug { 426 args = append(args, "--log-target=null") // silence systemd output inside pod 427 // TODO remove --log-level=warning when we update stage1 to systemd v222 428 args = append(args, "--log-level=warning") // limit log output (systemd-shutdown ignores --log-target) 429 args = append(args, "--show-status=0") // silence systemd initialization status output 430 } 431 432 return args, env, nil 433 } 434 435 func forwardedPorts(pod *stage1commontypes.Pod) ([]networking.ForwardedPort, error) { 436 var fps []networking.ForwardedPort 437 438 for _, ep := range pod.Manifest.Ports { 439 n := "" 440 fp := networking.ForwardedPort{} 441 442 for _, a := range pod.Manifest.Apps { 443 for _, p := range a.App.Ports { 444 if p.Name == ep.Name { 445 if n == "" { 446 fp.Protocol = p.Protocol 447 fp.HostPort = ep.HostPort 448 fp.PodPort = p.Port 449 n = a.Name.String() 450 } else { 451 return nil, fmt.Errorf("ambiguous exposed port in PodManifest: %q and %q both define port %q", n, a.Name, p.Name) 452 } 453 } 454 } 455 } 456 457 if n == "" { 458 return nil, fmt.Errorf("port name %q is not defined by any apps", ep.Name) 459 } 460 461 fps = append(fps, fp) 462 } 463 464 // TODO(eyakubovich): validate that there're no conflicts 465 466 return fps, nil 467 } 468 469 func stage1() int { 470 uuid, err := types.NewUUID(flag.Arg(0)) 471 if err != nil { 472 log.PrintE("UUID is missing or malformed", err) 473 return 1 474 } 475 476 root := "." 477 p, err := stage1commontypes.LoadPod(root, uuid) 478 if err != nil { 479 log.PrintE("failed to load pod", err) 480 return 1 481 } 482 483 // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed when invoking 484 // network plugins 485 lfd, err := common.GetRktLockFD() 486 if err != nil { 487 log.PrintE("failed to get rkt lock fd", err) 488 return 1 489 } 490 491 if err := sys.CloseOnExec(lfd, true); err != nil { 492 log.PrintE("failed to set FD_CLOEXEC on rkt lock", err) 493 return 1 494 } 495 496 mirrorLocalZoneInfo(p.Root) 497 498 flavor, _, err := stage1initcommon.GetFlavor(p) 499 if err != nil { 500 log.PrintE("failed to get stage1 flavor", err) 501 return 3 502 } 503 504 var n *networking.Networking 505 if netList.Contained() { 506 fps, err := forwardedPorts(p) 507 if err != nil { 508 log.Error(err) 509 return 6 510 } 511 512 n, err = networking.Setup(root, p.UUID, fps, netList, localConfig, flavor, debug) 513 if err != nil { 514 log.PrintE("failed to setup network", err) 515 return 6 516 } 517 518 if err = n.Save(); err != nil { 519 log.PrintE("failed to save networking state", err) 520 n.Teardown(flavor, debug) 521 return 6 522 } 523 524 if len(mdsToken) > 0 { 525 hostIP, err := n.GetDefaultHostIP() 526 if err != nil { 527 log.PrintE("failed to get default Host IP", err) 528 return 6 529 } 530 531 p.MetadataServiceURL = common.MetadataServicePublicURL(hostIP, mdsToken) 532 } 533 } else { 534 if flavor == "kvm" { 535 log.Print("flavor kvm requires private network configuration (try --net)") 536 return 6 537 } 538 if len(mdsToken) > 0 { 539 p.MetadataServiceURL = common.MetadataServicePublicURL(localhostIP, mdsToken) 540 } 541 } 542 543 if err = stage1initcommon.WriteDefaultTarget(p); err != nil { 544 log.PrintE("failed to write default.target", err) 545 return 2 546 } 547 548 if err = stage1initcommon.WritePrepareAppTemplate(p); err != nil { 549 log.PrintE("failed to write prepare-app service template", err) 550 return 2 551 } 552 553 if err := stage1initcommon.SetJournalPermissions(p); err != nil { 554 log.PrintE("warning: error setting journal ACLs, you'll need root to read the pod journal", err) 555 } 556 557 if flavor == "kvm" { 558 if err := KvmPodToSystemd(p, n); err != nil { 559 log.PrintE("failed to configure systemd for kvm", err) 560 return 2 561 } 562 } 563 564 if err = stage1initcommon.PodToSystemd(p, interactive, flavor, privateUsers); err != nil { 565 log.PrintE("failed to configure systemd", err) 566 return 2 567 } 568 569 args, env, err := getArgsEnv(p, flavor, debug, n) 570 if err != nil { 571 log.Error(err) 572 return 3 573 } 574 575 // create a separate mount namespace so the cgroup filesystems 576 // are unmounted when exiting the pod 577 if err := syscall.Unshare(syscall.CLONE_NEWNS); err != nil { 578 log.FatalE("error unsharing", err) 579 } 580 581 // we recursively make / a "shared and slave" so mount events from the 582 // new namespace don't propagate to the host namespace but mount events 583 // from the host propagate to the new namespace and are forwarded to 584 // its peer group 585 // See https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt 586 if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SLAVE, ""); err != nil { 587 log.FatalE("error making / a slave mount", err) 588 } 589 if err := syscall.Mount("", "/", "none", syscall.MS_REC|syscall.MS_SHARED, ""); err != nil { 590 log.FatalE("error making / a shared and slave mount", err) 591 } 592 593 enabledCgroups, err := cgroup.GetEnabledCgroups() 594 if err != nil { 595 log.FatalE("error getting cgroups", err) 596 return 5 597 } 598 599 // mount host cgroups in the rkt mount namespace 600 if err := mountHostCgroups(enabledCgroups); err != nil { 601 log.FatalE("couldn't mount the host cgroups", err) 602 return 5 603 } 604 605 var serviceNames []string 606 for _, app := range p.Manifest.Apps { 607 serviceNames = append(serviceNames, stage1initcommon.ServiceUnitName(app.Name)) 608 } 609 s1Root := common.Stage1RootfsPath(p.Root) 610 machineID := stage1initcommon.GetMachineID(p) 611 subcgroup, err := getContainerSubCgroup(machineID) 612 if err == nil { 613 if err := mountContainerCgroups(s1Root, enabledCgroups, subcgroup, serviceNames); err != nil { 614 log.PrintE("couldn't mount the container cgroups", err) 615 return 5 616 } 617 } else { 618 log.PrintE("continuing with per-app isolators disabled", err) 619 } 620 621 if err = stage1common.WritePpid(os.Getpid()); err != nil { 622 log.Error(err) 623 return 4 624 } 625 626 err = stage1common.WithClearedCloExec(lfd, func() error { 627 return syscall.Exec(args[0], args, env) 628 }) 629 if err != nil { 630 log.PrintE(fmt.Sprintf("failed to execute %q", args[0]), err) 631 return 7 632 } 633 634 return 0 635 } 636 637 func areHostCgroupsMounted(enabledCgroups map[int][]string) bool { 638 controllers := cgroup.GetControllerDirs(enabledCgroups) 639 for _, c := range controllers { 640 if !cgroup.IsControllerMounted(c) { 641 return false 642 } 643 } 644 645 return true 646 } 647 648 // mountHostCgroups mounts the host cgroup hierarchy as required by 649 // systemd-nspawn. We need this because some distributions don't have the 650 // "name=systemd" cgroup or don't mount the cgroup controllers in 651 // "/sys/fs/cgroup", and systemd-nspawn needs this. Since this is mounted 652 // inside the rkt mount namespace, it doesn't affect the host. 653 func mountHostCgroups(enabledCgroups map[int][]string) error { 654 systemdControllerPath := "/sys/fs/cgroup/systemd" 655 if !areHostCgroupsMounted(enabledCgroups) { 656 if err := cgroup.CreateCgroups("/", enabledCgroups); err != nil { 657 return errwrap.Wrap(errors.New("error creating host cgroups"), err) 658 } 659 } 660 661 if !cgroup.IsControllerMounted("systemd") { 662 if err := os.MkdirAll(systemdControllerPath, 0700); err != nil { 663 return err 664 } 665 if err := syscall.Mount("cgroup", systemdControllerPath, "cgroup", 0, "none,name=systemd"); err != nil { 666 return errwrap.Wrap(fmt.Errorf("error mounting name=systemd hierarchy on %q", systemdControllerPath), err) 667 } 668 } 669 670 return nil 671 } 672 673 // mountContainerCgroups mounts the cgroup controllers hierarchy in the container's 674 // namespace read-only, leaving the needed knobs in the subcgroup for each-app 675 // read-write so systemd inside stage1 can apply isolators to them 676 func mountContainerCgroups(s1Root string, enabledCgroups map[int][]string, subcgroup string, serviceNames []string) error { 677 if err := cgroup.CreateCgroups(s1Root, enabledCgroups); err != nil { 678 return errwrap.Wrap(errors.New("error creating container cgroups"), err) 679 } 680 if err := cgroup.RemountCgroupsRO(s1Root, enabledCgroups, subcgroup, serviceNames); err != nil { 681 return errwrap.Wrap(errors.New("error restricting container cgroups"), err) 682 } 683 684 return nil 685 } 686 687 func getContainerSubCgroup(machineID string) (string, error) { 688 var subcgroup string 689 fromUnit, err := util.RunningFromSystemService() 690 if err != nil { 691 return "", errwrap.Wrap(errors.New("could not determine if we're running from a unit file"), err) 692 } 693 if fromUnit { 694 slice, err := util.GetRunningSlice() 695 if err != nil { 696 return "", errwrap.Wrap(errors.New("could not get slice name"), err) 697 } 698 slicePath, err := common.SliceToPath(slice) 699 if err != nil { 700 return "", errwrap.Wrap(errors.New("could not convert slice name to path"), err) 701 } 702 unit, err := util.CurrentUnitName() 703 if err != nil { 704 return "", errwrap.Wrap(errors.New("could not get unit name"), err) 705 } 706 subcgroup = filepath.Join(slicePath, unit, "system.slice") 707 } else { 708 escapedmID := strings.Replace(machineID, "-", "\\x2d", -1) 709 machineDir := "machine-" + escapedmID + ".scope" 710 if machinedRegister() { 711 // we are not in the final cgroup yet: systemd-nspawn will move us 712 // to the correct cgroup later during registration so we can't 713 // look it up in /proc/self/cgroup 714 subcgroup = filepath.Join("machine.slice", machineDir, "system.slice") 715 } else { 716 // when registration is disabled the container will be directly 717 // under the current cgroup so we can look it up in /proc/self/cgroup 718 ownCgroupPath, err := cgroup.GetOwnCgroupPath("name=systemd") 719 if err != nil { 720 return "", errwrap.Wrap(errors.New("could not get own cgroup path"), err) 721 } 722 // systemd-nspawn won't work if we are in the root cgroup. In addition, 723 // we want all rkt instances to be in distinct cgroups. Create a 724 // subcgroup and add ourselves to it. 725 ownCgroupPath = filepath.Join(ownCgroupPath, machineDir) 726 if err := cgroup.JoinSubcgroup("systemd", ownCgroupPath); err != nil { 727 return "", errwrap.Wrap(fmt.Errorf("error joining %s subcgroup", ownCgroupPath), err) 728 } 729 subcgroup = filepath.Join(ownCgroupPath, "system.slice") 730 } 731 } 732 733 return subcgroup, nil 734 } 735 736 func main() { 737 flag.Parse() 738 739 stage1initcommon.InitDebug(debug) 740 741 log, diag, _ = rktlog.NewLogSet("stage1", debug) 742 if !debug { 743 diag.SetOutput(ioutil.Discard) 744 } 745 746 // move code into stage1() helper so deferred fns get run 747 os.Exit(stage1()) 748 }