github.com/coreos/rocket@v1.30.1-0.20200224141603-171c416fac02/stage0/run.go (about) 1 // Copyright 2014 The rkt Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //+build linux 16 17 package stage0 18 19 // 20 // rkt is a reference implementation of the app container specification. 21 // 22 // Execution on rkt is divided into a number of stages, and the `rkt` 23 // binary implements the first stage (stage0) 24 // 25 26 import ( 27 "encoding/json" 28 "errors" 29 "fmt" 30 "io/ioutil" 31 "os" 32 "path" 33 "path/filepath" 34 "runtime" 35 "strconv" 36 "strings" 37 "syscall" 38 "time" 39 40 "github.com/appc/spec/schema" 41 "github.com/appc/spec/schema/types" 42 cnitypes "github.com/containernetworking/cni/pkg/types" 43 "github.com/hashicorp/errwrap" 44 "github.com/opencontainers/selinux/go-selinux/label" 45 "github.com/rkt/rkt/common" 46 "github.com/rkt/rkt/common/apps" 47 commonnet "github.com/rkt/rkt/common/networking" 48 "github.com/rkt/rkt/common/overlay" 49 "github.com/rkt/rkt/pkg/aci" 50 "github.com/rkt/rkt/pkg/fileutil" 51 "github.com/rkt/rkt/pkg/sys" 52 "github.com/rkt/rkt/pkg/tpm" 53 "github.com/rkt/rkt/pkg/user" 54 "github.com/rkt/rkt/store/imagestore" 55 "github.com/rkt/rkt/store/treestore" 56 "github.com/rkt/rkt/version" 57 ) 58 59 var debugEnabled bool 60 61 // PrepareConfig defines the configuration parameters required by Prepare 62 type PrepareConfig struct { 63 *CommonConfig 64 Apps *apps.Apps // apps to prepare 65 InheritEnv bool // inherit parent environment into apps 66 ExplicitEnv []string // always set these environment variables for all the apps 67 EnvFromFile []string // environment variables loaded from files, set for all the apps 68 Ports []types.ExposedPort // list of ports that rkt will expose on the host 69 UseOverlay bool // prepare pod with overlay fs 70 PodManifest string // use the pod manifest specified by the user, this will ignore flags such as '--volume', '--port', etc. 71 PrivateUsers *user.UidRange // user namespaces 72 UserAnnotations types.UserAnnotations // user annotations for the pod. 73 UserLabels types.UserLabels // user labels for the pod. 74 } 75 76 // RunConfig defines the configuration parameters needed by Run 77 type RunConfig struct { 78 *CommonConfig 79 Net common.NetList // pod should have its own network stack 80 LockFd int // lock file descriptor 81 Interactive bool // whether the pod is interactive or not 82 MDSRegister bool // whether to register with metadata service or not 83 Apps schema.AppList // applications (prepare gets them via Apps) 84 LocalConfig string // Path to local configuration 85 Hostname string // hostname of the pod 86 RktGid int // group id of the 'rkt' group, -1 ere's no rkt group. 87 DNSConfMode DNSConfMode // dns configuration file mode - for stAage1 88 DNSConfig cnitypes.DNS // the DNS configuration (nameservers, search, options) 89 InsecureCapabilities bool // Do not restrict capabilities 90 InsecurePaths bool // Do not restrict access to files in sysfs or procfs 91 InsecureSeccomp bool // Do not add seccomp restrictions 92 UseOverlay bool // run pod with overlay fs 93 HostsEntries HostsEntries // The entries in /etc/hosts 94 IPCMode string // whether to stay in the host IPC namespace 95 } 96 97 // CommonConfig defines the configuration shared by both Run and Prepare 98 type CommonConfig struct { 99 DataDir string // The path to the data directory, e.g. /var/lib/rkt/pods 100 Store *imagestore.Store // store containing all of the configured application images 101 TreeStore *treestore.Store // store containing all of the configured application images 102 Stage1Image types.Hash // stage1 image containing usable /init and /enter entrypoints 103 UUID *types.UUID // UUID of the pod 104 RootHash string // hash of the root filesystem 105 ManifestData string // the pod manifest data 106 Debug bool // debug mode 107 MountLabel string // SELinux label to use for fs 108 ProcessLabel string // SELinux label to use 109 Mutable bool // whether this pod is mutable 110 Annotations map[types.ACIdentifier]string // pod-level annotations, for internal/experimental usage 111 } 112 113 // HostsEntries encapsulates the entries in an etc-hosts file: mapping from IP 114 // to arbitrary list of hostnames 115 type HostsEntries map[string][]string 116 117 // DNSConfMode indicates what the stage1 should do with dns config files 118 // The values and meanings are: 119 // 'host': bind-mount from host 120 // 'stage0': the stage0 has generated it 121 // 'none' : do not generate it 122 // 'default' : do whatever was the default 123 type DNSConfMode struct { 124 Resolv string // /etc/rkt-resolv.conf 125 Hosts string // /etc/rkt-hosts 126 } 127 128 func init() { 129 // this ensures that main runs only on main thread (thread group leader). 130 // since namespace ops (unshare, setns) are done for a single thread, we 131 // must ensure that the goroutine does not jump from OS thread to thread 132 runtime.LockOSThread() 133 } 134 135 // InitDebug enables debugging 136 func InitDebug() { 137 debugEnabled = true 138 log.SetDebug(true) 139 } 140 141 func debug(format string, i ...interface{}) { 142 if debugEnabled { 143 log.Printf(format, i...) 144 } 145 } 146 147 // mergeEnvs merges environment variables from env into the current appEnv 148 // if override is set to true, then variables with the same name will be set to the value in env 149 // env is expected to be in the os.Environ() key=value format 150 func mergeEnvs(appEnv *types.Environment, env []string, override bool) { 151 for _, ev := range env { 152 pair := strings.SplitN(ev, "=", 2) 153 if _, exists := appEnv.Get(pair[0]); override || !exists { 154 appEnv.Set(pair[0], pair[1]) 155 } 156 } 157 } 158 159 // deduplicateMPs removes Mounts with duplicated paths. If there's more than 160 // one Mount with the same path, it keeps the first one encountered. 161 func deduplicateMPs(mounts []schema.Mount) []schema.Mount { 162 var res []schema.Mount 163 seen := make(map[string]struct{}) 164 for _, m := range mounts { 165 cleanPath := path.Clean(m.Path) 166 if _, ok := seen[cleanPath]; !ok { 167 res = append(res, m) 168 seen[cleanPath] = struct{}{} 169 } 170 } 171 return res 172 } 173 174 // MergeMounts combines the global and per-app mount slices 175 func MergeMounts(mounts []schema.Mount, appMounts []schema.Mount) []schema.Mount { 176 ml := append(appMounts, mounts...) 177 return deduplicateMPs(ml) 178 } 179 180 // generatePodManifest creates the pod manifest from the command line input. 181 // It returns the pod manifest as []byte on success. 182 // This is invoked if no pod manifest is specified at the command line. 183 func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { 184 pm := schema.PodManifest{ 185 ACKind: "PodManifest", 186 Apps: make(schema.AppList, 0), 187 } 188 189 v, err := types.NewSemVer(version.Version) 190 if err != nil { 191 return nil, errwrap.Wrap(errors.New("error creating version"), err) 192 } 193 pm.ACVersion = *v 194 195 if err := cfg.Apps.Walk(func(app *apps.App) error { 196 img := app.ImageID 197 198 am, err := cfg.Store.GetImageManifest(img.String()) 199 if err != nil { 200 return errwrap.Wrap(errors.New("error getting the manifest"), err) 201 } 202 203 if app.Name == "" { 204 appName, err := common.ImageNameToAppName(am.Name) 205 if err != nil { 206 return errwrap.Wrap(errors.New("error converting image name to app name"), err) 207 } 208 app.Name = appName.String() 209 } 210 211 appName, err := types.NewACName(app.Name) 212 if err != nil { 213 return errwrap.Wrap(errors.New("invalid app name format"), err) 214 } 215 216 if _, err := prepareAppImage(cfg, *appName, img, dir, cfg.UseOverlay); err != nil { 217 return errwrap.Wrap(fmt.Errorf("error preparing image %s", img), err) 218 } 219 if pm.Apps.Get(*appName) != nil { 220 return fmt.Errorf("error: multiple apps with name %s", app.Name) 221 } 222 if am.App == nil && app.Exec == "" { 223 return fmt.Errorf("error: image %s has no app section and --exec argument is not provided", img) 224 } 225 226 ra, err := generateRuntimeApp(app, am, cfg.Apps.Mounts) 227 if err != nil { 228 return err 229 } 230 231 // loading the environment from the lowest priority to highest 232 if cfg.InheritEnv { 233 // Inherit environment does not override app image environment 234 mergeEnvs(&ra.App.Environment, os.Environ(), false) 235 } 236 237 mergeEnvs(&ra.App.Environment, cfg.EnvFromFile, true) 238 mergeEnvs(&ra.App.Environment, cfg.ExplicitEnv, true) 239 240 pm.Apps = append(pm.Apps, ra) 241 242 return nil 243 }); err != nil { 244 return nil, err 245 } 246 247 // TODO(jonboulle): check that app mountpoint expectations are 248 // satisfied here, rather than waiting for stage1 249 pm.Volumes = cfg.Apps.Volumes 250 251 // Check to see if ports have any errors 252 pm.Ports = cfg.Ports 253 if _, err := commonnet.ForwardedPorts(&pm); err != nil { 254 return nil, err 255 } 256 257 pm.Annotations = append(pm.Annotations, types.Annotation{ 258 Name: "coreos.com/rkt/stage1/mutable", 259 Value: strconv.FormatBool(cfg.Mutable), 260 }) 261 262 pm.UserAnnotations = cfg.UserAnnotations 263 pm.UserLabels = cfg.UserLabels 264 265 // Add internal annotations for rkt experiments 266 for k, v := range cfg.Annotations { 267 if _, ok := pm.Annotations.Get(k.String()); ok { 268 continue 269 } 270 pm.Annotations.Set(k, v) 271 } 272 273 pmb, err := json.Marshal(pm) 274 if err != nil { 275 return nil, errwrap.Wrap(errors.New("error marshalling pod manifest"), err) 276 } 277 return pmb, nil 278 } 279 280 // prepareIsolators merges the CLI app parameters with the manifest's app 281 func prepareIsolators(setup *apps.App, app *types.App) error { 282 if memoryOverride := setup.MemoryLimit; memoryOverride != nil { 283 isolator := memoryOverride.AsIsolator() 284 app.Isolators = append(app.Isolators, isolator) 285 } 286 287 if cpuOverride := setup.CPULimit; cpuOverride != nil { 288 isolator := cpuOverride.AsIsolator() 289 app.Isolators = append(app.Isolators, isolator) 290 } 291 292 if cpuSharesOverride := setup.CPUShares; cpuSharesOverride != nil { 293 isolator := cpuSharesOverride.AsIsolator() 294 app.Isolators.ReplaceIsolatorsByName(isolator, []types.ACIdentifier{types.LinuxCPUSharesName}) 295 } 296 297 if oomAdjOverride := setup.OOMScoreAdj; oomAdjOverride != nil { 298 app.Isolators.ReplaceIsolatorsByName(oomAdjOverride.AsIsolator(), []types.ACIdentifier{types.LinuxOOMScoreAdjName}) 299 } 300 301 if setup.CapsRetain != nil && setup.CapsRemove != nil { 302 return fmt.Errorf("error: cannot use both --caps-retain and --caps-remove on the same image") 303 } 304 305 // Delete existing caps isolators if the user wants to override 306 // them with either --caps-retain or --caps-remove 307 if setup.CapsRetain != nil || setup.CapsRemove != nil { 308 for i := len(app.Isolators) - 1; i >= 0; i-- { 309 isolator := app.Isolators[i] 310 if _, ok := isolator.Value().(types.LinuxCapabilitiesSet); ok { 311 app.Isolators = append(app.Isolators[:i], 312 app.Isolators[i+1:]...) 313 } 314 } 315 } 316 317 if capsRetain := setup.CapsRetain; capsRetain != nil { 318 isolator, err := capsRetain.AsIsolator() 319 if err != nil { 320 return err 321 } 322 app.Isolators = append(app.Isolators, *isolator) 323 } else if capsRemove := setup.CapsRemove; capsRemove != nil { 324 isolator, err := capsRemove.AsIsolator() 325 if err != nil { 326 return err 327 } 328 app.Isolators = append(app.Isolators, *isolator) 329 } 330 331 // Override seccomp isolators via --seccomp CLI switch 332 if setup.SeccompFilter != "" { 333 var is *types.Isolator 334 mode, errno, set, err := setup.SeccompOverride() 335 if err != nil { 336 return err 337 } 338 switch mode { 339 case "retain": 340 lss, err := types.NewLinuxSeccompRetainSet(errno, set...) 341 if err != nil { 342 return err 343 } 344 if is, err = lss.AsIsolator(); err != nil { 345 return err 346 } 347 case "remove": 348 lss, err := types.NewLinuxSeccompRemoveSet(errno, set...) 349 if err != nil { 350 return err 351 } 352 if is, err = lss.AsIsolator(); err != nil { 353 return err 354 } 355 default: 356 return apps.ErrInvalidSeccompMode 357 } 358 app.Isolators.ReplaceIsolatorsByName(*is, []types.ACIdentifier{types.LinuxSeccompRemoveSetName, types.LinuxSeccompRetainSetName}) 359 } 360 return nil 361 } 362 363 // validatePodManifest reads the user-specified pod manifest, prepares the app images 364 // and validates the pod manifest. If the pod manifest passes validation, it returns 365 // the manifest as []byte. 366 // TODO(yifan): More validation in the future. 367 func validatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { 368 pmb, err := ioutil.ReadFile(cfg.PodManifest) 369 if err != nil { 370 return nil, errwrap.Wrap(errors.New("error reading pod manifest"), err) 371 } 372 var pm schema.PodManifest 373 if err := json.Unmarshal(pmb, &pm); err != nil { 374 return nil, errwrap.Wrap(errors.New("error unmarshaling pod manifest"), err) 375 } 376 377 appNames := make(map[types.ACName]struct{}) 378 for _, ra := range pm.Apps { 379 img := ra.Image 380 381 if img.ID.Empty() { 382 return nil, fmt.Errorf("no image ID for app %q", ra.Name) 383 } 384 am, err := cfg.Store.GetImageManifest(img.ID.String()) 385 if err != nil { 386 return nil, errwrap.Wrap(errors.New("error getting the image manifest from store"), err) 387 } 388 if _, err := prepareAppImage(cfg, ra.Name, img.ID, dir, cfg.UseOverlay); err != nil { 389 return nil, errwrap.Wrap(fmt.Errorf("error preparing image %s", img), err) 390 } 391 if _, ok := appNames[ra.Name]; ok { 392 return nil, fmt.Errorf("multiple apps with same name %s", ra.Name) 393 } 394 appNames[ra.Name] = struct{}{} 395 if ra.App == nil && am.App == nil { 396 return nil, fmt.Errorf("no app section in the pod manifest or the image manifest") 397 } 398 } 399 400 // Validate forwarded ports 401 if _, err := commonnet.ForwardedPorts(&pm); err != nil { 402 return nil, err 403 } 404 return pmb, nil 405 } 406 407 // Prepare sets up a pod based on the given config. 408 func Prepare(cfg PrepareConfig, dir string, uuid *types.UUID) error { 409 if err := os.MkdirAll(common.AppsInfoPath(dir), common.DefaultRegularDirPerm); err != nil { 410 return errwrap.Wrap(errors.New("error creating apps info directory"), err) 411 } 412 debug("Preparing stage1") 413 if err := prepareStage1Image(cfg, dir); err != nil { 414 return errwrap.Wrap(errors.New("error preparing stage1"), err) 415 } 416 417 var pmb []byte 418 var err error 419 if len(cfg.PodManifest) > 0 { 420 pmb, err = validatePodManifest(cfg, dir) 421 } else { 422 pmb, err = generatePodManifest(cfg, dir) 423 } 424 if err != nil { 425 return err 426 } 427 428 cfg.CommonConfig.ManifestData = string(pmb) 429 430 // create pod lock file for app add/rm operations. 431 f, err := os.OpenFile(common.PodManifestLockPath(dir), os.O_CREATE|os.O_RDWR, 0600) 432 if err != nil { 433 return err 434 } 435 f.Close() 436 437 debug("Writing pod manifest") 438 fn := common.PodManifestPath(dir) 439 if err := ioutil.WriteFile(fn, pmb, common.DefaultRegularFilePerm); err != nil { 440 return errwrap.Wrap(errors.New("error writing pod manifest"), err) 441 } 442 443 f, err = os.OpenFile(common.PodCreatedPath(dir), os.O_CREATE|os.O_RDWR, common.DefaultRegularFilePerm) 444 if err != nil { 445 return err 446 } 447 f.Close() 448 449 if cfg.UseOverlay { 450 // mark the pod as prepared with overlay 451 f, err := os.Create(filepath.Join(dir, common.OverlayPreparedFilename)) 452 if err != nil { 453 return errwrap.Wrap(errors.New("error writing overlay marker file"), err) 454 } 455 defer f.Close() 456 } 457 458 if cfg.PrivateUsers.Shift > 0 { 459 // mark the pod as prepared for user namespaces 460 uidrangeBytes := cfg.PrivateUsers.Serialize() 461 462 if err := ioutil.WriteFile(filepath.Join(dir, common.PrivateUsersPreparedFilename), uidrangeBytes, common.DefaultRegularFilePerm); err != nil { 463 return errwrap.Wrap(errors.New("error writing userns marker file"), err) 464 } 465 } 466 467 return nil 468 } 469 470 func preparedWithPrivateUsers(dir string) (string, error) { 471 bytes, err := ioutil.ReadFile(filepath.Join(dir, common.PrivateUsersPreparedFilename)) 472 if os.IsNotExist(err) { 473 return "", nil 474 } 475 if err != nil { 476 return "", err 477 } 478 479 return string(bytes), nil 480 } 481 482 func writeDnsConfig(cfg *RunConfig, rootfs string) { 483 writeResolvConf(cfg, rootfs) 484 writeEtcHosts(cfg, rootfs) 485 } 486 487 // writeResolvConf will generate <stage1>/etc/rkt-resolv.conf if needed 488 func writeResolvConf(cfg *RunConfig, rootfs string) { 489 if cfg.DNSConfMode.Resolv != "stage0" { 490 return 491 } 492 493 if err := os.Mkdir(filepath.Join(rootfs, "etc"), common.DefaultRegularDirPerm); err != nil { 494 if !os.IsExist(err) { 495 log.Fatalf("error creating dir %q: %v\n", "/etc", err) 496 } 497 } 498 resolvPath := filepath.Join(rootfs, "etc/rkt-resolv.conf") 499 f, err := os.Create(resolvPath) 500 if err != nil { 501 log.Fatalf("error writing etc/rkt-resolv.conf: %v\n", err) 502 } 503 defer f.Close() 504 505 _, err = f.WriteString(common.MakeResolvConf(cfg.DNSConfig, "Generated by rkt run")) 506 if err != nil { 507 log.Fatalf("error writing etc/rkt-resolv.conf: %v\n", err) 508 } 509 } 510 511 // writeEtcHosts writes the file /etc/rkt-hosts into the stage1 rootfs. 512 // This will read defaults from <rootfs>/etc/hosts-fallback if it exists. 513 // Therefore, this should be called after the stage1 is mounted 514 func writeEtcHosts(cfg *RunConfig, rootfs string) { 515 if cfg.DNSConfMode.Hosts != "stage0" { 516 return 517 } 518 519 // Read <stage1>/rootfs/etc/hosts-fallback to get some sane defaults 520 hostsTextb, err := ioutil.ReadFile(filepath.Join(rootfs, "etc/hosts-fallback")) 521 if err != nil { 522 // fallback-fallback :-) 523 hostsTextb = []byte("#created by rkt stage0\n127.0.0.1 localhost localhost.localdomain\n") 524 } 525 hostsText := string(hostsTextb) 526 527 hostsText += "\n\n# Added by rkt run --hosts-entry\n" 528 529 for ip, hostnames := range cfg.HostsEntries { 530 hostsText = fmt.Sprintf("%s%s %s\n", hostsText, ip, strings.Join(hostnames, " ")) 531 } 532 533 // Create /etc if it does not exist 534 etcPath := filepath.Join(rootfs, "etc") 535 if _, err := os.Stat(etcPath); err != nil && os.IsNotExist(err) { 536 err = os.Mkdir(etcPath, 0755) 537 if err != nil { 538 log.FatalE("failed to make stage1 etc directory", err) 539 } 540 } else if err != nil { 541 log.FatalE("Failed to stat stage1 etc", err) 542 } 543 544 hostsPath := filepath.Join(etcPath, "rkt-hosts") 545 err = ioutil.WriteFile(hostsPath, []byte(hostsText), 0644) 546 if err != nil { 547 log.FatalE("failed to write etc/rkt-hosts", err) 548 } 549 } 550 551 // Run mounts the right overlay filesystems and actually runs the prepared 552 // pod by exec()ing the stage1 init inside the pod filesystem. 553 func Run(cfg RunConfig, dir string, dataDir string) { 554 privateUsers, err := preparedWithPrivateUsers(dir) 555 if err != nil { 556 log.FatalE("error preparing private users", err) 557 } 558 559 debug("Setting up stage1") 560 if err := setupStage1Image(cfg, dir, cfg.UseOverlay); err != nil { 561 log.FatalE("error setting up stage1", err) 562 } 563 debug("Wrote filesystem to %s\n", dir) 564 565 for _, app := range cfg.Apps { 566 if err := setupAppImage(cfg, app.Name, app.Image.ID, dir, cfg.UseOverlay); err != nil { 567 log.FatalE("error setting up app image", err) 568 } 569 } 570 571 destRootfs := common.Stage1RootfsPath(dir) 572 573 writeDnsConfig(&cfg, destRootfs) 574 575 if err := os.Setenv(common.EnvLockFd, fmt.Sprintf("%v", cfg.LockFd)); err != nil { 576 log.FatalE("setting lock fd environment", err) 577 } 578 579 if err := os.Setenv(common.EnvSELinuxContext, fmt.Sprintf("%v", cfg.ProcessLabel)); err != nil { 580 log.FatalE("setting SELinux context environment", err) 581 } 582 583 if err := os.Setenv(common.EnvSELinuxMountContext, fmt.Sprintf("%v", cfg.MountLabel)); err != nil { 584 log.FatalE("setting SELinux mount context environment", err) 585 } 586 587 debug("Pivoting to filesystem %s", dir) 588 if err := os.Chdir(dir); err != nil { 589 log.FatalE("failed changing to dir", err) 590 } 591 592 ep, err := getStage1Entrypoint(dir, runEntrypoint) 593 if err != nil { 594 log.FatalE("error determining 'run' entrypoint", err) 595 } 596 args := []string{filepath.Join(destRootfs, ep)} 597 598 if cfg.Debug { 599 args = append(args, "--debug") 600 } 601 602 args = append(args, "--net="+cfg.Net.String()) 603 604 if cfg.Interactive { 605 args = append(args, "--interactive") 606 } 607 if len(privateUsers) > 0 { 608 args = append(args, "--private-users="+privateUsers) 609 } 610 if cfg.MDSRegister { 611 mdsToken, err := registerPod(".", cfg.UUID, cfg.Apps) 612 if err != nil { 613 log.FatalE("failed to register the pod", err) 614 } 615 616 args = append(args, "--mds-token="+mdsToken) 617 } 618 619 if cfg.LocalConfig != "" { 620 args = append(args, "--local-config="+cfg.LocalConfig) 621 } 622 623 s1v, err := getStage1InterfaceVersion(dir) 624 if err != nil { 625 log.FatalE("error determining stage1 interface version", err) 626 } 627 628 if cfg.Hostname != "" { 629 if interfaceVersionSupportsHostname(s1v) { 630 args = append(args, "--hostname="+cfg.Hostname) 631 } else { 632 log.Printf("warning: --hostname option is not supported by stage1") 633 } 634 } 635 636 if cfg.DNSConfMode.Hosts != "default" || cfg.DNSConfMode.Resolv != "default" { 637 if interfaceVersionSupportsDNSConfMode(s1v) { 638 args = append(args, fmt.Sprintf("--dns-conf-mode=resolv=%s,hosts=%s", cfg.DNSConfMode.Resolv, cfg.DNSConfMode.Hosts)) 639 } else { 640 log.Printf("warning: --dns-conf-mode option not supported by stage1") 641 } 642 } 643 644 if interfaceVersionSupportsInsecureOptions(s1v) { 645 if cfg.InsecureCapabilities { 646 args = append(args, "--disable-capabilities-restriction") 647 } 648 if cfg.InsecurePaths { 649 args = append(args, "--disable-paths") 650 } 651 if cfg.InsecureSeccomp { 652 args = append(args, "--disable-seccomp") 653 } 654 } 655 656 if cfg.Mutable { 657 mutable, err := supportsMutableEnvironment(dir) 658 659 switch { 660 case err != nil: 661 log.FatalE("error determining stage1 mutable support", err) 662 case !mutable: 663 log.Fatalln("stage1 does not support mutable pods") 664 } 665 666 args = append(args, "--mutable") 667 } 668 669 if cfg.IPCMode != "" { 670 if interfaceVersionSupportsIPCMode(s1v) { 671 args = append(args, "--ipc="+cfg.IPCMode) 672 } else { 673 log.Printf("warning: --ipc option is not supported by stage1") 674 } 675 } 676 677 args = append(args, cfg.UUID.String()) 678 679 // make sure the lock fd stays open across exec 680 if err := sys.CloseOnExec(cfg.LockFd, false); err != nil { 681 log.Fatalf("error clearing FD_CLOEXEC on lock fd") 682 } 683 684 tpmEvent := fmt.Sprintf("rkt: Rootfs: %s Manifest: %s Stage1 args: %s", cfg.CommonConfig.RootHash, cfg.CommonConfig.ManifestData, strings.Join(args, " ")) 685 // If there's no TPM available or there's a failure for some other 686 // reason, ignore it and continue anyway. Long term we'll want policy 687 // that enforces TPM behaviour, but we don't have any infrastructure 688 // around that yet. 689 _ = tpm.Extend(tpmEvent) 690 691 debug("Execing %s", args) 692 if err := syscall.Exec(args[0], args, os.Environ()); err != nil { 693 log.FatalE("error execing init", err) 694 } 695 } 696 697 // prepareAppImage renders and verifies the tree cache of the app image that 698 // corresponds to the given app name. 699 // When useOverlay is false, it attempts to render and expand the app image. 700 // It returns the tree store ID if overlay is being used. 701 func prepareAppImage(cfg PrepareConfig, appName types.ACName, img types.Hash, cdir string, useOverlay bool) (string, error) { 702 debug("Loading image %s", img.String()) 703 704 am, err := cfg.Store.GetImageManifest(img.String()) 705 if err != nil { 706 return "", errwrap.Wrap(errors.New("error getting the manifest"), err) 707 } 708 709 if _, hasOS := am.Labels.Get("os"); !hasOS { 710 return "", fmt.Errorf("missing os label in the image manifest") 711 } 712 713 if _, hasArch := am.Labels.Get("arch"); !hasArch { 714 return "", fmt.Errorf("missing arch label in the image manifest") 715 } 716 717 if err := types.IsValidOSArch(am.Labels.ToMap(), ValidOSArch); err != nil { 718 return "", err 719 } 720 721 appInfoDir := common.AppInfoPath(cdir, appName) 722 if err := os.MkdirAll(appInfoDir, common.DefaultRegularDirPerm); err != nil { 723 return "", errwrap.Wrap(errors.New("error creating apps info directory"), err) 724 } 725 726 var treeStoreID string 727 if useOverlay { 728 if cfg.PrivateUsers.Shift > 0 { 729 return "", fmt.Errorf("cannot use both overlay and user namespace: not implemented yet. (Try --no-overlay)") 730 } 731 732 treeStoreID, _, err = cfg.TreeStore.Render(img.String(), false) 733 if err != nil { 734 return "", errwrap.Wrap(errors.New("error rendering tree image"), err) 735 } 736 737 if err := ioutil.WriteFile(common.AppTreeStoreIDPath(cdir, appName), []byte(treeStoreID), common.DefaultRegularFilePerm); err != nil { 738 return "", errwrap.Wrap(errors.New("error writing app treeStoreID"), err) 739 } 740 } else { 741 ad := common.AppPath(cdir, appName) 742 743 err := os.MkdirAll(ad, common.DefaultRegularDirPerm) 744 if err != nil { 745 return "", errwrap.Wrap(errors.New("error creating image directory"), err) 746 } 747 748 shiftedUid, shiftedGid, err := cfg.PrivateUsers.ShiftRange(uint32(os.Getuid()), uint32(os.Getgid())) 749 if err != nil { 750 return "", errwrap.Wrap(errors.New("error getting uid, gid"), err) 751 } 752 753 if err := os.Chown(ad, int(shiftedUid), int(shiftedGid)); err != nil { 754 return "", errwrap.Wrap(fmt.Errorf("error shifting app %q's stage2 dir", appName), err) 755 } 756 757 if err := aci.RenderACIWithImageID(img, ad, cfg.Store, cfg.PrivateUsers); err != nil { 758 return "", errwrap.Wrap(errors.New("error rendering ACI"), err) 759 } 760 } 761 762 if err := writeManifest(*cfg.CommonConfig, img, appInfoDir); err != nil { 763 return "", errwrap.Wrap(errors.New("error writing manifest"), err) 764 } 765 766 return treeStoreID, nil 767 } 768 769 // setupAppImage mounts the overlay filesystem for the app image that 770 // corresponds to the given hash if useOverlay is true. 771 // It also creates an mtab file in the application's rootfs if one is not 772 // present. 773 func setupAppImage(cfg RunConfig, appName types.ACName, img types.Hash, cdir string, useOverlay bool) error { 774 ad := common.AppPath(cdir, appName) 775 if useOverlay { 776 err := os.MkdirAll(ad, common.DefaultRegularDirPerm) 777 if err != nil { 778 return errwrap.Wrap(errors.New("error creating image directory"), err) 779 } 780 treeStoreID, err := ioutil.ReadFile(common.AppTreeStoreIDPath(cdir, appName)) 781 if err != nil { 782 return err 783 } 784 if err := copyAppManifest(cdir, appName, ad); err != nil { 785 return err 786 } 787 if err := overlayRender(cfg, string(treeStoreID), cdir, ad, appName.String()); err != nil { 788 return errwrap.Wrap(errors.New("error rendering overlay filesystem"), err) 789 } 790 } 791 return ensureMtabExists(filepath.Join(ad, "rootfs")) 792 } 793 794 // ensureMtabExists creates a symlink from /etc/mtab -> /proc/self/mounts if 795 // nothing exists at /etc/mtab. 796 // Various tools, such as mount from util-linux 2.25, expect the mtab file to 797 // be populated. 798 func ensureMtabExists(rootfs string) error { 799 stat, err := os.Stat(filepath.Join(rootfs, "etc")) 800 if os.IsNotExist(err) { 801 // If your image has no /etc you don't get /etc/mtab either 802 return nil 803 } 804 if err != nil { 805 return errwrap.Wrap(errors.New("error determining if /etc existed in the image"), err) 806 } 807 if !stat.IsDir() { 808 return nil 809 } 810 mtabPath := filepath.Join(rootfs, "etc", "mtab") 811 if _, err = os.Lstat(mtabPath); err == nil { 812 // If the image already has an mtab, don't replace it 813 return nil 814 } 815 if !os.IsNotExist(err) { 816 return errwrap.Wrap(errors.New("error determining if /etc/mtab exists in the image"), err) 817 } 818 819 target := "../proc/self/mounts" 820 err = os.Symlink(target, mtabPath) 821 if err != nil { 822 return errwrap.Wrap(errors.New("error creating mtab symlink"), err) 823 } 824 return nil 825 } 826 827 // prepareStage1Image renders and verifies tree cache of the given hash 828 // when using overlay. 829 // When useOverlay is false, it attempts to render and expand the stage1. 830 func prepareStage1Image(cfg PrepareConfig, cdir string) error { 831 s1 := common.Stage1ImagePath(cdir) 832 if err := os.MkdirAll(s1, common.DefaultRegularDirPerm); err != nil { 833 return errwrap.Wrap(errors.New("error creating stage1 directory"), err) 834 } 835 836 treeStoreID, _, err := cfg.TreeStore.Render(cfg.Stage1Image.String(), false) 837 if err != nil { 838 return errwrap.Wrap(errors.New("error rendering tree image"), err) 839 } 840 841 if err := writeManifest(*cfg.CommonConfig, cfg.Stage1Image, s1); err != nil { 842 return errwrap.Wrap(errors.New("error writing manifest"), err) 843 } 844 845 if !cfg.UseOverlay { 846 destRootfs := filepath.Join(s1, "rootfs") 847 cachedTreePath := cfg.TreeStore.GetRootFS(treeStoreID) 848 if err := fileutil.CopyTree(cachedTreePath, destRootfs, cfg.PrivateUsers); err != nil { 849 return errwrap.Wrap(errors.New("error rendering ACI"), err) 850 } 851 } 852 853 fn := path.Join(cdir, common.Stage1TreeStoreIDFilename) 854 if err := ioutil.WriteFile(fn, []byte(treeStoreID), common.DefaultRegularFilePerm); err != nil { 855 return errwrap.Wrap(errors.New("error writing stage1 treeStoreID"), err) 856 } 857 return nil 858 } 859 860 // setupStage1Image mounts the overlay filesystem for stage1. 861 // When useOverlay is false it is a noop 862 func setupStage1Image(cfg RunConfig, cdir string, useOverlay bool) error { 863 s1 := common.Stage1ImagePath(cdir) 864 if useOverlay { 865 treeStoreID, err := ioutil.ReadFile(filepath.Join(cdir, common.Stage1TreeStoreIDFilename)) 866 if err != nil { 867 return err 868 } 869 870 // pass an empty appName 871 if err := overlayRender(cfg, string(treeStoreID), cdir, s1, ""); err != nil { 872 return errwrap.Wrap(errors.New("error rendering overlay filesystem"), err) 873 } 874 875 // we will later read the status from the upper layer of the overlay fs 876 // force the status directory to be there by touching it 877 statusPath := filepath.Join(s1, "rootfs", "rkt", "status") 878 if err := os.Chtimes(statusPath, time.Now(), time.Now()); err != nil { 879 return errwrap.Wrap(errors.New("error touching status dir"), err) 880 } 881 } 882 883 return nil 884 } 885 886 // writeManifest takes an img ID and writes the corresponding manifest in dest 887 func writeManifest(cfg CommonConfig, img types.Hash, dest string) error { 888 mb, err := cfg.Store.GetImageManifestJSON(img.String()) 889 if err != nil { 890 return err 891 } 892 893 debug("Writing image manifest") 894 if err := ioutil.WriteFile(filepath.Join(dest, "manifest"), mb, common.DefaultRegularFilePerm); err != nil { 895 return errwrap.Wrap(errors.New("error writing image manifest"), err) 896 } 897 898 return nil 899 } 900 901 // copyAppManifest copies to saved image manifest for the given appName and 902 // writes it in the dest directory. 903 func copyAppManifest(cdir string, appName types.ACName, dest string) error { 904 appInfoDir := common.AppInfoPath(cdir, appName) 905 sourceFn := filepath.Join(appInfoDir, "manifest") 906 destFn := filepath.Join(dest, "manifest") 907 if err := fileutil.CopyRegularFile(sourceFn, destFn); err != nil { 908 return errwrap.Wrap(errors.New("error copying image manifest"), err) 909 } 910 return nil 911 } 912 913 // overlayRender renders the image that corresponds to the given hash using the 914 // overlay filesystem. It mounts an overlay filesystem from the cached tree of 915 // the image as rootfs. 916 func overlayRender(cfg RunConfig, treeStoreID string, cdir string, dest string, appName string) error { 917 cachedTreePath := cfg.TreeStore.GetRootFS(treeStoreID) 918 mc, err := prepareOverlay(cachedTreePath, treeStoreID, cdir, dest, appName, cfg.MountLabel, 919 cfg.RktGid, common.DefaultRegularDirPerm) 920 if err != nil { 921 return errwrap.Wrap(errors.New("problem preparing overlay directories"), err) 922 } 923 if err = overlay.Mount(mc); err != nil { 924 return errwrap.Wrap(errors.New("problem mounting overlay filesystem"), err) 925 } 926 927 return nil 928 } 929 930 // prepateOverlay sets up the needed directories, files and permissions for the 931 // overlay-rendered pods 932 func prepareOverlay(lower, treeStoreID, cdir, dest, appName, lbl string, 933 gid int, fm os.FileMode) (*overlay.MountCfg, error) { 934 fi, err := os.Stat(lower) 935 if err != nil { 936 return nil, err 937 } 938 imgMode := fi.Mode() 939 940 dst := path.Join(dest, "rootfs") 941 if err := os.MkdirAll(dst, imgMode); err != nil { 942 return nil, err 943 } 944 945 overlayDir := path.Join(cdir, "overlay") 946 if err := os.MkdirAll(overlayDir, fm); err != nil { 947 return nil, err 948 } 949 950 // Since the parent directory (rkt/pods/$STATE/$POD_UUID) has the 'S_ISGID' bit, here 951 // we need to explicitly turn the bit off when creating this overlay 952 // directory so that it won't inherit the bit. Otherwise the files 953 // created by users within the pod will inherit the 'S_ISGID' bit 954 // as well. 955 if err := os.Chmod(overlayDir, fm); err != nil { 956 return nil, err 957 } 958 959 imgDir := path.Join(overlayDir, treeStoreID) 960 if err := os.MkdirAll(imgDir, fm); err != nil { 961 return nil, err 962 } 963 // Also make 'rkt/pods/$STATE/$POD_UUID/overlay/$IMAGE_ID' to be readable by 'rkt' group 964 // As 'rkt' status will read the 'rkt/pods/$STATE/$POD_UUID/overlay/$IMAGE_ID/upper/rkt/status/$APP' 965 // to get exgid 966 if err := os.Chown(imgDir, -1, gid); err != nil { 967 return nil, err 968 } 969 970 upper := path.Join(imgDir, "upper", appName) 971 if err := os.MkdirAll(upper, imgMode); err != nil { 972 return nil, err 973 } 974 if err := label.SetFileLabel(upper, lbl); err != nil { 975 return nil, err 976 } 977 978 work := path.Join(imgDir, "work", appName) 979 if err := os.MkdirAll(work, fm); err != nil { 980 return nil, err 981 } 982 if err := label.SetFileLabel(work, lbl); err != nil { 983 return nil, err 984 } 985 986 return &overlay.MountCfg{lower, upper, work, dst, lbl}, nil 987 }