github.com/blixtra/rkt@v0.8.1-0.20160204105720-ab0d1add1a43/stage1/init/common/pod.go (about) 1 // Copyright 2014 The rkt Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //+build linux 16 17 package common 18 19 import ( 20 "bytes" 21 "errors" 22 "fmt" 23 "io" 24 "io/ioutil" 25 "os" 26 "path" 27 "path/filepath" 28 "regexp" 29 "strconv" 30 "strings" 31 32 "github.com/coreos/rkt/pkg/acl" 33 stage1commontypes "github.com/coreos/rkt/stage1/common/types" 34 35 "github.com/appc/spec/schema" 36 "github.com/appc/spec/schema/types" 37 "github.com/coreos/go-systemd/unit" 38 "github.com/hashicorp/errwrap" 39 40 "github.com/coreos/rkt/common" 41 "github.com/coreos/rkt/common/cgroup" 42 "github.com/coreos/rkt/pkg/uid" 43 ) 44 45 const ( 46 // FlavorFile names the file storing the pod's flavor 47 FlavorFile = "flavor" 48 sharedVolPerm = os.FileMode(0755) 49 ) 50 51 var ( 52 defaultEnv = map[string]string{ 53 "PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 54 "SHELL": "/bin/sh", 55 "USER": "root", 56 "LOGNAME": "root", 57 "HOME": "/root", 58 } 59 ) 60 61 // execEscape uses Golang's string quoting for ", \, \n, and regex for special cases 62 func execEscape(i int, str string) string { 63 escapeMap := map[string]string{ 64 `'`: `\`, 65 } 66 67 if i > 0 { // These are escaped only after the first argument 68 escapeMap[`$`] = `$` 69 } 70 71 escArg := fmt.Sprintf("%q", str) 72 for k := range escapeMap { 73 reStr := `([` + regexp.QuoteMeta(k) + `])` 74 re := regexp.MustCompile(reStr) 75 escArg = re.ReplaceAllStringFunc(escArg, func(s string) string { 76 escaped := escapeMap[s] + s 77 return escaped 78 }) 79 } 80 return escArg 81 } 82 83 // quoteExec returns an array of quoted strings appropriate for systemd execStart usage 84 func quoteExec(exec []string) string { 85 if len(exec) == 0 { 86 // existing callers prefix {"/appexec", "/app/root", "/work/dir", "/env/file"} so this shouldn't occur. 87 panic("empty exec") 88 } 89 90 var qexec []string 91 for i, arg := range exec { 92 escArg := execEscape(i, arg) 93 qexec = append(qexec, escArg) 94 } 95 return strings.Join(qexec, " ") 96 } 97 98 // WriteDefaultTarget writes the default.target unit file 99 // which is responsible for bringing up the applications 100 func WriteDefaultTarget(p *stage1commontypes.Pod) error { 101 opts := []*unit.UnitOption{ 102 unit.NewUnitOption("Unit", "Description", "rkt apps target"), 103 unit.NewUnitOption("Unit", "DefaultDependencies", "false"), 104 } 105 106 for i := range p.Manifest.Apps { 107 ra := &p.Manifest.Apps[i] 108 serviceName := ServiceUnitName(ra.Name) 109 opts = append(opts, unit.NewUnitOption("Unit", "After", serviceName)) 110 opts = append(opts, unit.NewUnitOption("Unit", "Wants", serviceName)) 111 } 112 113 unitsPath := filepath.Join(common.Stage1RootfsPath(p.Root), UnitsDir) 114 file, err := os.OpenFile(filepath.Join(unitsPath, "default.target"), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) 115 if err != nil { 116 return err 117 } 118 defer file.Close() 119 120 if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { 121 return err 122 } 123 124 return nil 125 } 126 127 // WritePrepareAppTemplate writes service unit files for preparing the pod's applications 128 func WritePrepareAppTemplate(p *stage1commontypes.Pod) error { 129 opts := []*unit.UnitOption{ 130 unit.NewUnitOption("Unit", "Description", "Prepare minimum environment for chrooted applications"), 131 unit.NewUnitOption("Unit", "DefaultDependencies", "false"), 132 unit.NewUnitOption("Unit", "OnFailureJobMode", "fail"), 133 unit.NewUnitOption("Unit", "Requires", "systemd-journald.service"), 134 unit.NewUnitOption("Unit", "After", "systemd-journald.service"), 135 unit.NewUnitOption("Service", "Type", "oneshot"), 136 unit.NewUnitOption("Service", "Restart", "no"), 137 unit.NewUnitOption("Service", "ExecStart", "/prepare-app %I"), 138 unit.NewUnitOption("Service", "User", "0"), 139 unit.NewUnitOption("Service", "Group", "0"), 140 unit.NewUnitOption("Service", "CapabilityBoundingSet", "CAP_SYS_ADMIN CAP_DAC_OVERRIDE"), 141 } 142 143 unitsPath := filepath.Join(common.Stage1RootfsPath(p.Root), UnitsDir) 144 file, err := os.OpenFile(filepath.Join(unitsPath, "prepare-app@.service"), os.O_WRONLY|os.O_CREATE, 0644) 145 if err != nil { 146 return errwrap.Wrap(errors.New("failed to create service unit file"), err) 147 } 148 defer file.Close() 149 150 if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { 151 return errwrap.Wrap(errors.New("failed to write service unit file"), err) 152 } 153 154 return nil 155 } 156 157 func writeAppReaper(p *stage1commontypes.Pod, appName string) error { 158 opts := []*unit.UnitOption{ 159 unit.NewUnitOption("Unit", "Description", fmt.Sprintf("%s Reaper", appName)), 160 unit.NewUnitOption("Unit", "DefaultDependencies", "false"), 161 unit.NewUnitOption("Unit", "StopWhenUnneeded", "yes"), 162 unit.NewUnitOption("Unit", "Wants", "shutdown.service"), 163 unit.NewUnitOption("Unit", "After", "shutdown.service"), 164 unit.NewUnitOption("Unit", "Conflicts", "exit.target"), 165 unit.NewUnitOption("Unit", "Conflicts", "halt.target"), 166 unit.NewUnitOption("Unit", "Conflicts", "poweroff.target"), 167 unit.NewUnitOption("Service", "RemainAfterExit", "yes"), 168 unit.NewUnitOption("Service", "ExecStop", fmt.Sprintf("/reaper.sh %s", appName)), 169 } 170 171 unitsPath := filepath.Join(common.Stage1RootfsPath(p.Root), UnitsDir) 172 file, err := os.OpenFile(filepath.Join(unitsPath, fmt.Sprintf("reaper-%s.service", appName)), os.O_WRONLY|os.O_CREATE, 0644) 173 if err != nil { 174 return errwrap.Wrap(errors.New("failed to create service unit file"), err) 175 } 176 defer file.Close() 177 178 if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { 179 return errwrap.Wrap(errors.New("failed to write service unit file"), err) 180 } 181 182 return nil 183 } 184 185 // SetJournalPermissions sets ACLs and permissions so the rkt group can access 186 // the pod's logs 187 func SetJournalPermissions(p *stage1commontypes.Pod) error { 188 s1 := common.Stage1ImagePath(p.Root) 189 190 rktgid, err := common.LookupGid(common.RktGroup) 191 if err != nil { 192 return fmt.Errorf("group %q not found", common.RktGroup) 193 } 194 195 journalPath := filepath.Join(s1, "rootfs", "var", "log", "journal") 196 if err := os.MkdirAll(journalPath, os.FileMode(0755)); err != nil { 197 return errwrap.Wrap(errors.New("error creating journal dir"), err) 198 } 199 200 a, err := acl.InitACL() 201 if err != nil { 202 return err 203 } 204 defer a.Free() 205 206 if err := a.ParseACL(fmt.Sprintf("g:%d:r-x,m:r-x", rktgid)); err != nil { 207 return errwrap.Wrap(errors.New("error parsing ACL string"), err) 208 } 209 210 if err := a.AddBaseEntries(journalPath); err != nil { 211 return errwrap.Wrap(errors.New("error adding base ACL entries"), err) 212 } 213 214 if err := a.Valid(); err != nil { 215 return err 216 } 217 218 if err := a.SetFileACLDefault(journalPath); err != nil { 219 return errwrap.Wrap(fmt.Errorf("error setting default ACLs on %q", journalPath), err) 220 } 221 222 return nil 223 } 224 225 func generateGidArg(gid int, supplGid []int) string { 226 arg := []string{strconv.Itoa(gid)} 227 for _, sg := range supplGid { 228 arg = append(arg, strconv.Itoa(sg)) 229 } 230 return strings.Join(arg, ",") 231 } 232 233 // appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units 234 func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error { 235 app := ra.App 236 appName := ra.Name 237 image, ok := p.Images[appName.String()] 238 if !ok { 239 // This is impossible as we have updated the map in LoadPod(). 240 panic(fmt.Sprintf("No images for app %q", ra.Name.String())) 241 } 242 imgName := image.Name 243 244 if len(app.Exec) == 0 { 245 return fmt.Errorf(`image %q has an empty "exec" (try --exec=BINARY)`, imgName) 246 } 247 248 workDir := "/" 249 if app.WorkingDirectory != "" { 250 workDir = app.WorkingDirectory 251 } 252 253 env := app.Environment 254 255 env.Set("AC_APP_NAME", appName.String()) 256 if p.MetadataServiceURL != "" { 257 env.Set("AC_METADATA_URL", p.MetadataServiceURL) 258 } 259 260 if err := writeEnvFile(p, env, appName, privateUsers); err != nil { 261 return errwrap.Wrap(errors.New("unable to write environment file"), err) 262 } 263 264 // This is a partial implementation for app.User and app.Group: 265 // For now, only numeric ids (and the string "root") are supported. 266 var uid, gid int 267 var err error 268 if app.User == "root" { 269 uid = 0 270 } else { 271 uid, err = strconv.Atoi(app.User) 272 if err != nil { 273 return fmt.Errorf("non-numerical user id not supported yet") 274 } 275 } 276 if app.Group == "root" { 277 gid = 0 278 } else { 279 gid, err = strconv.Atoi(app.Group) 280 if err != nil { 281 return fmt.Errorf("non-numerical group id not supported yet") 282 } 283 } 284 285 execWrap := []string{"/appexec", common.RelAppRootfsPath(appName), workDir, RelEnvFilePath(appName), strconv.Itoa(uid), generateGidArg(gid, app.SupplementaryGIDs)} 286 execStart := quoteExec(append(execWrap, app.Exec...)) 287 opts := []*unit.UnitOption{ 288 unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v", appName, imgName)), 289 unit.NewUnitOption("Unit", "DefaultDependencies", "false"), 290 unit.NewUnitOption("Unit", "Wants", fmt.Sprintf("reaper-%s.service", appName)), 291 unit.NewUnitOption("Service", "Restart", "no"), 292 unit.NewUnitOption("Service", "ExecStart", execStart), 293 unit.NewUnitOption("Service", "User", "0"), 294 unit.NewUnitOption("Service", "Group", "0"), 295 } 296 297 if interactive { 298 opts = append(opts, unit.NewUnitOption("Service", "StandardInput", "tty")) 299 opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "tty")) 300 opts = append(opts, unit.NewUnitOption("Service", "StandardError", "tty")) 301 } else { 302 opts = append(opts, unit.NewUnitOption("Service", "StandardOutput", "journal+console")) 303 opts = append(opts, unit.NewUnitOption("Service", "StandardError", "journal+console")) 304 opts = append(opts, unit.NewUnitOption("Service", "SyslogIdentifier", filepath.Base(app.Exec[0]))) 305 } 306 307 // When an app fails, we shut down the pod 308 opts = append(opts, unit.NewUnitOption("Unit", "OnFailure", "halt.target")) 309 310 for _, eh := range app.EventHandlers { 311 var typ string 312 switch eh.Name { 313 case "pre-start": 314 typ = "ExecStartPre" 315 case "post-stop": 316 typ = "ExecStopPost" 317 default: 318 return fmt.Errorf("unrecognized eventHandler: %v", eh.Name) 319 } 320 exec := quoteExec(append(execWrap, eh.Exec...)) 321 opts = append(opts, unit.NewUnitOption("Service", typ, exec)) 322 } 323 324 // Some pre-start jobs take a long time, set the timeout to 0 325 opts = append(opts, unit.NewUnitOption("Service", "TimeoutStartSec", "0")) 326 327 var saPorts []types.Port 328 for _, p := range app.Ports { 329 if p.SocketActivated { 330 saPorts = append(saPorts, p) 331 } 332 } 333 334 for _, i := range app.Isolators { 335 switch v := i.Value().(type) { 336 case *types.ResourceMemory: 337 opts, err = cgroup.MaybeAddIsolator(opts, "memory", v.Limit()) 338 if err != nil { 339 return err 340 } 341 case *types.ResourceCPU: 342 opts, err = cgroup.MaybeAddIsolator(opts, "cpu", v.Limit()) 343 if err != nil { 344 return err 345 } 346 } 347 } 348 349 if len(saPorts) > 0 { 350 sockopts := []*unit.UnitOption{ 351 unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Application=%v Image=%v %s", appName, imgName, "socket-activated ports")), 352 unit.NewUnitOption("Unit", "DefaultDependencies", "false"), 353 unit.NewUnitOption("Socket", "BindIPv6Only", "both"), 354 unit.NewUnitOption("Socket", "Service", ServiceUnitName(appName)), 355 } 356 357 for _, sap := range saPorts { 358 var proto string 359 switch sap.Protocol { 360 case "tcp": 361 proto = "ListenStream" 362 case "udp": 363 proto = "ListenDatagram" 364 default: 365 return fmt.Errorf("unrecognized protocol: %v", sap.Protocol) 366 } 367 sockopts = append(sockopts, unit.NewUnitOption("Socket", proto, fmt.Sprintf("%v", sap.Port))) 368 } 369 370 file, err := os.OpenFile(SocketUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) 371 if err != nil { 372 return errwrap.Wrap(errors.New("failed to create socket file"), err) 373 } 374 defer file.Close() 375 376 if _, err = io.Copy(file, unit.Serialize(sockopts)); err != nil { 377 return errwrap.Wrap(errors.New("failed to write socket unit file"), err) 378 } 379 380 if err = os.Symlink(path.Join("..", SocketUnitName(appName)), SocketWantPath(p.Root, appName)); err != nil { 381 return errwrap.Wrap(errors.New("failed to link socket want"), err) 382 } 383 384 opts = append(opts, unit.NewUnitOption("Unit", "Requires", SocketUnitName(appName))) 385 } 386 387 opts = append(opts, unit.NewUnitOption("Unit", "Requires", InstantiatedPrepareAppUnitName(appName))) 388 opts = append(opts, unit.NewUnitOption("Unit", "After", InstantiatedPrepareAppUnitName(appName))) 389 390 file, err := os.OpenFile(ServiceUnitPath(p.Root, appName), os.O_WRONLY|os.O_CREATE, 0644) 391 if err != nil { 392 return errwrap.Wrap(errors.New("failed to create service unit file"), err) 393 } 394 defer file.Close() 395 396 if _, err = io.Copy(file, unit.Serialize(opts)); err != nil { 397 return errwrap.Wrap(errors.New("failed to write service unit file"), err) 398 } 399 400 if err = os.Symlink(path.Join("..", ServiceUnitName(appName)), ServiceWantPath(p.Root, appName)); err != nil { 401 return errwrap.Wrap(errors.New("failed to link service want"), err) 402 } 403 404 if flavor == "kvm" { 405 // bind mount all shared volumes from /mnt/volumeName (we don't use mechanism for bind-mounting given by nspawn) 406 err := AppToSystemdMountUnits(common.Stage1RootfsPath(p.Root), appName, p.Manifest.Volumes, ra, UnitsDir) 407 if err != nil { 408 return errwrap.Wrap(errors.New("failed to prepare mount units"), err) 409 } 410 411 } 412 413 if err = writeAppReaper(p, appName.String()); err != nil { 414 return errwrap.Wrap(fmt.Errorf("failed to write app %q reaper service", appName), err) 415 } 416 417 return nil 418 } 419 420 // writeEnvFile creates an environment file for given app name, the minimum 421 // required environment variables by the appc spec will be set to sensible 422 // defaults here if they're not provided by env. 423 func writeEnvFile(p *stage1commontypes.Pod, env types.Environment, appName types.ACName, privateUsers string) error { 424 ef := bytes.Buffer{} 425 426 for dk, dv := range defaultEnv { 427 if _, exists := env.Get(dk); !exists { 428 fmt.Fprintf(&ef, "%s=%s\000", dk, dv) 429 } 430 } 431 432 for _, e := range env { 433 fmt.Fprintf(&ef, "%s=%s\000", e.Name, e.Value) 434 } 435 436 uidRange := uid.NewBlankUidRange() 437 if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { 438 return err 439 } 440 441 envFilePath := EnvFilePath(p.Root, appName) 442 if err := ioutil.WriteFile(envFilePath, ef.Bytes(), 0644); err != nil { 443 return err 444 } 445 446 if uidRange.Shift != 0 && uidRange.Count != 0 { 447 if err := os.Chown(envFilePath, int(uidRange.Shift), int(uidRange.Shift)); err != nil { 448 return err 449 } 450 } 451 452 return nil 453 } 454 455 // PodToSystemd creates the appropriate systemd service unit files for 456 // all the constituent apps of the Pod 457 func PodToSystemd(p *stage1commontypes.Pod, interactive bool, flavor string, privateUsers string) error { 458 459 for i := range p.Manifest.Apps { 460 ra := &p.Manifest.Apps[i] 461 if err := appToSystemd(p, ra, interactive, flavor, privateUsers); err != nil { 462 return errwrap.Wrap(fmt.Errorf("failed to transform app %q into systemd service", ra.Name), err) 463 } 464 } 465 return nil 466 } 467 468 // appToNspawnArgs transforms the given app manifest, with the given associated 469 // app name, into a subset of applicable systemd-nspawn argument 470 func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp) ([]string, error) { 471 var args []string 472 appName := ra.Name 473 app := ra.App 474 475 sharedVolPath := common.SharedVolumesPath(p.Root) 476 if err := os.MkdirAll(sharedVolPath, sharedVolPerm); err != nil { 477 return nil, errwrap.Wrap(errors.New("could not create shared volumes directory"), err) 478 } 479 if err := os.Chmod(sharedVolPath, sharedVolPerm); err != nil { 480 return nil, errwrap.Wrap(fmt.Errorf("could not change permissions of %q", sharedVolPath), err) 481 } 482 483 vols := make(map[types.ACName]types.Volume) 484 for _, v := range p.Manifest.Volumes { 485 vols[v.Name] = v 486 } 487 488 mounts := GenerateMounts(ra, vols) 489 for _, m := range mounts { 490 vol := vols[m.Volume] 491 492 if vol.Kind == "empty" { 493 p := filepath.Join(sharedVolPath, vol.Name.String()) 494 if err := os.MkdirAll(p, sharedVolPerm); err != nil { 495 return nil, errwrap.Wrap(fmt.Errorf("could not create shared volume %q", vol.Name), err) 496 } 497 if err := os.Chown(p, *vol.UID, *vol.GID); err != nil { 498 return nil, errwrap.Wrap(fmt.Errorf("could not change owner of %q", p), err) 499 } 500 mod, err := strconv.ParseUint(*vol.Mode, 8, 32) 501 if err != nil { 502 return nil, errwrap.Wrap(fmt.Errorf("invalid mode %q for volume %q", *vol.Mode, vol.Name), err) 503 } 504 if err := os.Chmod(p, os.FileMode(mod)); err != nil { 505 return nil, errwrap.Wrap(fmt.Errorf("could not change permissions of %q", p), err) 506 } 507 } 508 509 opt := make([]string, 4) 510 511 if IsMountReadOnly(vol, app.MountPoints) { 512 opt[0] = "--bind-ro=" 513 } else { 514 opt[0] = "--bind=" 515 } 516 517 switch vol.Kind { 518 case "host": 519 opt[1] = vol.Source 520 case "empty": 521 absRoot, err := filepath.Abs(p.Root) 522 if err != nil { 523 return nil, errwrap.Wrap(errors.New("cannot get pod's root absolute path"), err) 524 } 525 opt[1] = filepath.Join(common.SharedVolumesPath(absRoot), vol.Name.String()) 526 default: 527 return nil, fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, vol.Kind) 528 } 529 opt[2] = ":" 530 opt[3] = filepath.Join(common.RelAppRootfsPath(appName), m.Path) 531 532 args = append(args, strings.Join(opt, "")) 533 } 534 535 for _, i := range app.Isolators { 536 switch v := i.Value().(type) { 537 case types.LinuxCapabilitiesSet: 538 var caps []string 539 // TODO: cleanup the API on LinuxCapabilitiesSet to give strings easily. 540 for _, c := range v.Set() { 541 caps = append(caps, string(c)) 542 } 543 if i.Name == types.LinuxCapabilitiesRetainSetName { 544 capList := strings.Join(caps, ",") 545 args = append(args, "--capability="+capList) 546 } 547 } 548 } 549 550 return args, nil 551 } 552 553 // PodToNspawnArgs renders a prepared Pod as a systemd-nspawn 554 // argument list ready to be executed 555 func PodToNspawnArgs(p *stage1commontypes.Pod) ([]string, error) { 556 args := []string{ 557 "--uuid=" + p.UUID.String(), 558 "--machine=" + GetMachineID(p), 559 "--directory=" + common.Stage1RootfsPath(p.Root), 560 } 561 562 for i := range p.Manifest.Apps { 563 aa, err := appToNspawnArgs(p, &p.Manifest.Apps[i]) 564 if err != nil { 565 return nil, err 566 } 567 args = append(args, aa...) 568 } 569 570 return args, nil 571 } 572 573 // GetFlavor populates a flavor string based on the flavor itself and respectively the systemd version 574 func GetFlavor(p *stage1commontypes.Pod) (flavor string, systemdVersion string, err error) { 575 flavor, err = os.Readlink(filepath.Join(common.Stage1RootfsPath(p.Root), "flavor")) 576 if err != nil { 577 return "", "", errwrap.Wrap(errors.New("unable to determine stage1 flavor"), err) 578 } 579 580 if flavor == "host" { 581 // This flavor does not contain systemd, so don't return systemdVersion 582 return flavor, "", nil 583 } 584 585 systemdVersionBytes, err := ioutil.ReadFile(filepath.Join(common.Stage1RootfsPath(p.Root), "systemd-version")) 586 if err != nil { 587 return "", "", errwrap.Wrap(errors.New("unable to determine stage1's systemd version"), err) 588 } 589 systemdVersion = strings.Trim(string(systemdVersionBytes), " \n") 590 return flavor, systemdVersion, nil 591 } 592 593 // GetAppHashes returns a list of hashes of the apps in this pod 594 func GetAppHashes(p *stage1commontypes.Pod) []types.Hash { 595 var names []types.Hash 596 for _, a := range p.Manifest.Apps { 597 names = append(names, a.Image.ID) 598 } 599 600 return names 601 } 602 603 // GetMachineID returns the machine id string of the pod to be passed to 604 // systemd-nspawn 605 func GetMachineID(p *stage1commontypes.Pod) string { 606 return "rkt-" + p.UUID.String() 607 }