github.com/jiasir/docker@v1.3.3-0.20170609024000-252e610103e7/daemon/graphdriver/overlay2/overlay.go (about) 1 // +build linux 2 3 package overlay2 4 5 import ( 6 "bufio" 7 "errors" 8 "fmt" 9 "io" 10 "io/ioutil" 11 "os" 12 "os/exec" 13 "path" 14 "path/filepath" 15 "strconv" 16 "strings" 17 "sync" 18 "syscall" 19 20 "github.com/Sirupsen/logrus" 21 22 "github.com/docker/docker/daemon/graphdriver" 23 "github.com/docker/docker/daemon/graphdriver/overlayutils" 24 "github.com/docker/docker/daemon/graphdriver/quota" 25 "github.com/docker/docker/pkg/archive" 26 "github.com/docker/docker/pkg/chrootarchive" 27 "github.com/docker/docker/pkg/directory" 28 "github.com/docker/docker/pkg/fsutils" 29 "github.com/docker/docker/pkg/idtools" 30 "github.com/docker/docker/pkg/locker" 31 "github.com/docker/docker/pkg/mount" 32 "github.com/docker/docker/pkg/parsers" 33 "github.com/docker/docker/pkg/parsers/kernel" 34 "github.com/docker/docker/pkg/system" 35 units "github.com/docker/go-units" 36 37 "github.com/opencontainers/selinux/go-selinux/label" 38 ) 39 40 var ( 41 // untar defines the untar method 42 untar = chrootarchive.UntarUncompressed 43 ) 44 45 // This backend uses the overlay union filesystem for containers 46 // with diff directories for each layer. 47 48 // This version of the overlay driver requires at least kernel 49 // 4.0.0 in order to support mounting multiple diff directories. 50 51 // Each container/image has at least a "diff" directory and "link" file. 52 // If there is also a "lower" file when there are diff layers 53 // below as well as "merged" and "work" directories. The "diff" directory 54 // has the upper layer of the overlay and is used to capture any 55 // changes to the layer. The "lower" file contains all the lower layer 56 // mounts separated by ":" and ordered from uppermost to lowermost 57 // layers. The overlay itself is mounted in the "merged" directory, 58 // and the "work" dir is needed for overlay to work. 59 60 // The "link" file for each layer contains a unique string for the layer. 61 // Under the "l" directory at the root there will be a symbolic link 62 // with that unique string pointing the "diff" directory for the layer. 63 // The symbolic links are used to reference lower layers in the "lower" 64 // file and on mount. The links are used to shorten the total length 65 // of a layer reference without requiring changes to the layer identifier 66 // or root directory. Mounts are always done relative to root and 67 // referencing the symbolic links in order to ensure the number of 68 // lower directories can fit in a single page for making the mount 69 // syscall. A hard upper limit of 128 lower layers is enforced to ensure 70 // that mounts do not fail due to length. 71 72 const ( 73 driverName = "overlay2" 74 linkDir = "l" 75 lowerFile = "lower" 76 maxDepth = 128 77 78 // idLength represents the number of random characters 79 // which can be used to create the unique link identifer 80 // for every layer. If this value is too long then the 81 // page size limit for the mount command may be exceeded. 82 // The idLength should be selected such that following equation 83 // is true (512 is a buffer for label metadata). 84 // ((idLength + len(linkDir) + 1) * maxDepth) <= (pageSize - 512) 85 idLength = 26 86 ) 87 88 type overlayOptions struct { 89 overrideKernelCheck bool 90 quota quota.Quota 91 } 92 93 // Driver contains information about the home directory and the list of active mounts that are created using this driver. 94 type Driver struct { 95 home string 96 uidMaps []idtools.IDMap 97 gidMaps []idtools.IDMap 98 ctr *graphdriver.RefCounter 99 quotaCtl *quota.Control 100 options overlayOptions 101 naiveDiff graphdriver.DiffDriver 102 supportsDType bool 103 locker *locker.Locker 104 } 105 106 var ( 107 backingFs = "<unknown>" 108 projectQuotaSupported = false 109 110 useNaiveDiffLock sync.Once 111 useNaiveDiffOnly bool 112 ) 113 114 func init() { 115 graphdriver.Register(driverName, Init) 116 } 117 118 // Init returns the a native diff driver for overlay filesystem. 119 // If overlay filesystem is not supported on the host, graphdriver.ErrNotSupported is returned as error. 120 // If an overlay filesystem is not supported over an existing filesystem then error graphdriver.ErrIncompatibleFS is returned. 121 func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) { 122 opts, err := parseOptions(options) 123 if err != nil { 124 return nil, err 125 } 126 127 if err := supportsOverlay(); err != nil { 128 return nil, graphdriver.ErrNotSupported 129 } 130 131 // require kernel 4.0.0 to ensure multiple lower dirs are supported 132 v, err := kernel.GetKernelVersion() 133 if err != nil { 134 return nil, err 135 } 136 if kernel.CompareKernelVersion(*v, kernel.VersionInfo{Kernel: 4, Major: 0, Minor: 0}) < 0 { 137 if !opts.overrideKernelCheck { 138 return nil, graphdriver.ErrNotSupported 139 } 140 logrus.Warn("Using pre-4.0.0 kernel for overlay2, mount failures may require kernel update") 141 } 142 143 fsMagic, err := graphdriver.GetFSMagic(home) 144 if err != nil { 145 return nil, err 146 } 147 if fsName, ok := graphdriver.FsNames[fsMagic]; ok { 148 backingFs = fsName 149 } 150 151 // check if they are running over btrfs, aufs, zfs, overlay, or ecryptfs 152 switch fsMagic { 153 case graphdriver.FsMagicAufs, graphdriver.FsMagicZfs, graphdriver.FsMagicOverlay, graphdriver.FsMagicEcryptfs: 154 logrus.Errorf("'overlay2' is not supported over %s", backingFs) 155 return nil, graphdriver.ErrIncompatibleFS 156 case graphdriver.FsMagicBtrfs: 157 // Support for OverlayFS on BTRFS was added in kernel 4.7 158 // See https://btrfs.wiki.kernel.org/index.php/Changelog 159 if kernel.CompareKernelVersion(*v, kernel.VersionInfo{Kernel: 4, Major: 7, Minor: 0}) < 0 { 160 if !opts.overrideKernelCheck { 161 logrus.Errorf("'overlay2' requires kernel 4.7 to use on %s", backingFs) 162 return nil, graphdriver.ErrIncompatibleFS 163 } 164 logrus.Warn("Using pre-4.7.0 kernel for overlay2 on btrfs, may require kernel update") 165 } 166 } 167 168 rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps) 169 if err != nil { 170 return nil, err 171 } 172 // Create the driver home dir 173 if err := idtools.MkdirAllAs(path.Join(home, linkDir), 0700, rootUID, rootGID); err != nil && !os.IsExist(err) { 174 return nil, err 175 } 176 177 if err := mount.MakePrivate(home); err != nil { 178 return nil, err 179 } 180 181 supportsDType, err := fsutils.SupportsDType(home) 182 if err != nil { 183 return nil, err 184 } 185 if !supportsDType { 186 // not a fatal error until v17.12 (#27443) 187 logrus.Warn(overlayutils.ErrDTypeNotSupported("overlay2", backingFs)) 188 } 189 190 d := &Driver{ 191 home: home, 192 uidMaps: uidMaps, 193 gidMaps: gidMaps, 194 ctr: graphdriver.NewRefCounter(graphdriver.NewFsChecker(graphdriver.FsMagicOverlay)), 195 supportsDType: supportsDType, 196 locker: locker.New(), 197 } 198 199 d.naiveDiff = graphdriver.NewNaiveDiffDriver(d, uidMaps, gidMaps) 200 201 if backingFs == "xfs" { 202 // Try to enable project quota support over xfs. 203 if d.quotaCtl, err = quota.NewControl(home); err == nil { 204 projectQuotaSupported = true 205 } 206 } 207 208 logrus.Debugf("backingFs=%s, projectQuotaSupported=%v", backingFs, projectQuotaSupported) 209 210 return d, nil 211 } 212 213 func parseOptions(options []string) (*overlayOptions, error) { 214 o := &overlayOptions{} 215 for _, option := range options { 216 key, val, err := parsers.ParseKeyValueOpt(option) 217 if err != nil { 218 return nil, err 219 } 220 key = strings.ToLower(key) 221 switch key { 222 case "overlay2.override_kernel_check": 223 o.overrideKernelCheck, err = strconv.ParseBool(val) 224 if err != nil { 225 return nil, err 226 } 227 228 default: 229 return nil, fmt.Errorf("overlay2: Unknown option %s\n", key) 230 } 231 } 232 return o, nil 233 } 234 235 func supportsOverlay() error { 236 // We can try to modprobe overlay first before looking at 237 // proc/filesystems for when overlay is supported 238 exec.Command("modprobe", "overlay").Run() 239 240 f, err := os.Open("/proc/filesystems") 241 if err != nil { 242 return err 243 } 244 defer f.Close() 245 246 s := bufio.NewScanner(f) 247 for s.Scan() { 248 if s.Text() == "nodev\toverlay" { 249 return nil 250 } 251 } 252 logrus.Error("'overlay' not found as a supported filesystem on this host. Please ensure kernel is new enough and has overlay support loaded.") 253 return graphdriver.ErrNotSupported 254 } 255 256 func useNaiveDiff(home string) bool { 257 useNaiveDiffLock.Do(func() { 258 if err := hasOpaqueCopyUpBug(home); err != nil { 259 logrus.Warnf("Not using native diff for overlay2: %v", err) 260 useNaiveDiffOnly = true 261 } 262 }) 263 return useNaiveDiffOnly 264 } 265 266 func (d *Driver) String() string { 267 return driverName 268 } 269 270 // Status returns current driver information in a two dimensional string array. 271 // Output contains "Backing Filesystem" used in this implementation. 272 func (d *Driver) Status() [][2]string { 273 return [][2]string{ 274 {"Backing Filesystem", backingFs}, 275 {"Supports d_type", strconv.FormatBool(d.supportsDType)}, 276 {"Native Overlay Diff", strconv.FormatBool(!useNaiveDiff(d.home))}, 277 } 278 } 279 280 // GetMetadata returns meta data about the overlay driver such as 281 // LowerDir, UpperDir, WorkDir and MergeDir used to store data. 282 func (d *Driver) GetMetadata(id string) (map[string]string, error) { 283 dir := d.dir(id) 284 if _, err := os.Stat(dir); err != nil { 285 return nil, err 286 } 287 288 metadata := map[string]string{ 289 "WorkDir": path.Join(dir, "work"), 290 "MergedDir": path.Join(dir, "merged"), 291 "UpperDir": path.Join(dir, "diff"), 292 } 293 294 lowerDirs, err := d.getLowerDirs(id) 295 if err != nil { 296 return nil, err 297 } 298 if len(lowerDirs) > 0 { 299 metadata["LowerDir"] = strings.Join(lowerDirs, ":") 300 } 301 302 return metadata, nil 303 } 304 305 // Cleanup any state created by overlay which should be cleaned when daemon 306 // is being shutdown. For now, we just have to unmount the bind mounted 307 // we had created. 308 func (d *Driver) Cleanup() error { 309 return mount.Unmount(d.home) 310 } 311 312 // CreateReadWrite creates a layer that is writable for use as a container 313 // file system. 314 func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error { 315 return d.Create(id, parent, opts) 316 } 317 318 // Create is used to create the upper, lower, and merge directories required for overlay fs for a given id. 319 // The parent filesystem is used to configure these directories for the overlay. 320 func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) (retErr error) { 321 322 if opts != nil && len(opts.StorageOpt) != 0 && !projectQuotaSupported { 323 return fmt.Errorf("--storage-opt is supported only for overlay over xfs with 'pquota' mount option") 324 } 325 326 dir := d.dir(id) 327 328 rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps) 329 if err != nil { 330 return err 331 } 332 if err := idtools.MkdirAllAs(path.Dir(dir), 0700, rootUID, rootGID); err != nil { 333 return err 334 } 335 if err := idtools.MkdirAs(dir, 0700, rootUID, rootGID); err != nil { 336 return err 337 } 338 339 defer func() { 340 // Clean up on failure 341 if retErr != nil { 342 os.RemoveAll(dir) 343 } 344 }() 345 346 if opts != nil && len(opts.StorageOpt) > 0 { 347 driver := &Driver{} 348 if err := d.parseStorageOpt(opts.StorageOpt, driver); err != nil { 349 return err 350 } 351 352 if driver.options.quota.Size > 0 { 353 // Set container disk quota limit 354 if err := d.quotaCtl.SetQuota(dir, driver.options.quota); err != nil { 355 return err 356 } 357 } 358 } 359 360 if err := idtools.MkdirAs(path.Join(dir, "diff"), 0755, rootUID, rootGID); err != nil { 361 return err 362 } 363 364 lid := generateID(idLength) 365 if err := os.Symlink(path.Join("..", id, "diff"), path.Join(d.home, linkDir, lid)); err != nil { 366 return err 367 } 368 369 // Write link id to link file 370 if err := ioutil.WriteFile(path.Join(dir, "link"), []byte(lid), 0644); err != nil { 371 return err 372 } 373 374 // if no parent directory, done 375 if parent == "" { 376 return nil 377 } 378 379 if err := idtools.MkdirAs(path.Join(dir, "work"), 0700, rootUID, rootGID); err != nil { 380 return err 381 } 382 if err := idtools.MkdirAs(path.Join(dir, "merged"), 0700, rootUID, rootGID); err != nil { 383 return err 384 } 385 386 lower, err := d.getLower(parent) 387 if err != nil { 388 return err 389 } 390 if lower != "" { 391 if err := ioutil.WriteFile(path.Join(dir, lowerFile), []byte(lower), 0666); err != nil { 392 return err 393 } 394 } 395 396 return nil 397 } 398 399 // Parse overlay storage options 400 func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error { 401 // Read size to set the disk project quota per container 402 for key, val := range storageOpt { 403 key := strings.ToLower(key) 404 switch key { 405 case "size": 406 size, err := units.RAMInBytes(val) 407 if err != nil { 408 return err 409 } 410 driver.options.quota.Size = uint64(size) 411 default: 412 return fmt.Errorf("Unknown option %s", key) 413 } 414 } 415 416 return nil 417 } 418 419 func (d *Driver) getLower(parent string) (string, error) { 420 parentDir := d.dir(parent) 421 422 // Ensure parent exists 423 if _, err := os.Lstat(parentDir); err != nil { 424 return "", err 425 } 426 427 // Read Parent link fileA 428 parentLink, err := ioutil.ReadFile(path.Join(parentDir, "link")) 429 if err != nil { 430 return "", err 431 } 432 lowers := []string{path.Join(linkDir, string(parentLink))} 433 434 parentLower, err := ioutil.ReadFile(path.Join(parentDir, lowerFile)) 435 if err == nil { 436 parentLowers := strings.Split(string(parentLower), ":") 437 lowers = append(lowers, parentLowers...) 438 } 439 if len(lowers) > maxDepth { 440 return "", errors.New("max depth exceeded") 441 } 442 return strings.Join(lowers, ":"), nil 443 } 444 445 func (d *Driver) dir(id string) string { 446 return path.Join(d.home, id) 447 } 448 449 func (d *Driver) getLowerDirs(id string) ([]string, error) { 450 var lowersArray []string 451 lowers, err := ioutil.ReadFile(path.Join(d.dir(id), lowerFile)) 452 if err == nil { 453 for _, s := range strings.Split(string(lowers), ":") { 454 lp, err := os.Readlink(path.Join(d.home, s)) 455 if err != nil { 456 return nil, err 457 } 458 lowersArray = append(lowersArray, path.Clean(path.Join(d.home, linkDir, lp))) 459 } 460 } else if !os.IsNotExist(err) { 461 return nil, err 462 } 463 return lowersArray, nil 464 } 465 466 // Remove cleans the directories that are created for this id. 467 func (d *Driver) Remove(id string) error { 468 d.locker.Lock(id) 469 defer d.locker.Unlock(id) 470 dir := d.dir(id) 471 lid, err := ioutil.ReadFile(path.Join(dir, "link")) 472 if err == nil { 473 if err := os.RemoveAll(path.Join(d.home, linkDir, string(lid))); err != nil { 474 logrus.Debugf("Failed to remove link: %v", err) 475 } 476 } 477 478 if err := system.EnsureRemoveAll(dir); err != nil && !os.IsNotExist(err) { 479 return err 480 } 481 return nil 482 } 483 484 // Get creates and mounts the required file system for the given id and returns the mount path. 485 func (d *Driver) Get(id string, mountLabel string) (s string, err error) { 486 d.locker.Lock(id) 487 defer d.locker.Unlock(id) 488 dir := d.dir(id) 489 if _, err := os.Stat(dir); err != nil { 490 return "", err 491 } 492 493 diffDir := path.Join(dir, "diff") 494 lowers, err := ioutil.ReadFile(path.Join(dir, lowerFile)) 495 if err != nil { 496 // If no lower, just return diff directory 497 if os.IsNotExist(err) { 498 return diffDir, nil 499 } 500 return "", err 501 } 502 503 mergedDir := path.Join(dir, "merged") 504 if count := d.ctr.Increment(mergedDir); count > 1 { 505 return mergedDir, nil 506 } 507 defer func() { 508 if err != nil { 509 if c := d.ctr.Decrement(mergedDir); c <= 0 { 510 syscall.Unmount(mergedDir, 0) 511 } 512 } 513 }() 514 515 workDir := path.Join(dir, "work") 516 splitLowers := strings.Split(string(lowers), ":") 517 absLowers := make([]string, len(splitLowers)) 518 for i, s := range splitLowers { 519 absLowers[i] = path.Join(d.home, s) 520 } 521 opts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", strings.Join(absLowers, ":"), path.Join(dir, "diff"), path.Join(dir, "work")) 522 mountData := label.FormatMountLabel(opts, mountLabel) 523 mount := syscall.Mount 524 mountTarget := mergedDir 525 526 pageSize := syscall.Getpagesize() 527 528 // Go can return a larger page size than supported by the system 529 // as of go 1.7. This will be fixed in 1.8 and this block can be 530 // removed when building with 1.8. 531 // See https://github.com/golang/go/commit/1b9499b06989d2831e5b156161d6c07642926ee1 532 // See https://github.com/docker/docker/issues/27384 533 if pageSize > 4096 { 534 pageSize = 4096 535 } 536 537 // Use relative paths and mountFrom when the mount data has exceeded 538 // the page size. The mount syscall fails if the mount data cannot 539 // fit within a page and relative links make the mount data much 540 // smaller at the expense of requiring a fork exec to chroot. 541 if len(mountData) > pageSize { 542 opts = fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", string(lowers), path.Join(id, "diff"), path.Join(id, "work")) 543 mountData = label.FormatMountLabel(opts, mountLabel) 544 if len(mountData) > pageSize { 545 return "", fmt.Errorf("cannot mount layer, mount label too large %d", len(mountData)) 546 } 547 548 mount = func(source string, target string, mType string, flags uintptr, label string) error { 549 return mountFrom(d.home, source, target, mType, flags, label) 550 } 551 mountTarget = path.Join(id, "merged") 552 } 553 554 if err := mount("overlay", mountTarget, "overlay", 0, mountData); err != nil { 555 return "", fmt.Errorf("error creating overlay mount to %s: %v", mergedDir, err) 556 } 557 558 // chown "workdir/work" to the remapped root UID/GID. Overlay fs inside a 559 // user namespace requires this to move a directory from lower to upper. 560 rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps) 561 if err != nil { 562 return "", err 563 } 564 565 if err := os.Chown(path.Join(workDir, "work"), rootUID, rootGID); err != nil { 566 return "", err 567 } 568 569 return mergedDir, nil 570 } 571 572 // Put unmounts the mount path created for the give id. 573 func (d *Driver) Put(id string) error { 574 d.locker.Lock(id) 575 defer d.locker.Unlock(id) 576 dir := d.dir(id) 577 _, err := ioutil.ReadFile(path.Join(dir, lowerFile)) 578 if err != nil { 579 // If no lower, no mount happened and just return directly 580 if os.IsNotExist(err) { 581 return nil 582 } 583 return err 584 } 585 586 mountpoint := path.Join(dir, "merged") 587 if count := d.ctr.Decrement(mountpoint); count > 0 { 588 return nil 589 } 590 if err := syscall.Unmount(mountpoint, 0); err != nil { 591 logrus.Debugf("Failed to unmount %s overlay: %s - %v", id, mountpoint, err) 592 } 593 return nil 594 } 595 596 // Exists checks to see if the id is already mounted. 597 func (d *Driver) Exists(id string) bool { 598 _, err := os.Stat(d.dir(id)) 599 return err == nil 600 } 601 602 // isParent returns if the passed in parent is the direct parent of the passed in layer 603 func (d *Driver) isParent(id, parent string) bool { 604 lowers, err := d.getLowerDirs(id) 605 if err != nil { 606 return false 607 } 608 if parent == "" && len(lowers) > 0 { 609 return false 610 } 611 612 parentDir := d.dir(parent) 613 var ld string 614 if len(lowers) > 0 { 615 ld = filepath.Dir(lowers[0]) 616 } 617 if ld == "" && parent == "" { 618 return true 619 } 620 return ld == parentDir 621 } 622 623 // ApplyDiff applies the new layer into a root 624 func (d *Driver) ApplyDiff(id string, parent string, diff io.Reader) (size int64, err error) { 625 if !d.isParent(id, parent) { 626 return d.naiveDiff.ApplyDiff(id, parent, diff) 627 } 628 629 applyDir := d.getDiffPath(id) 630 631 logrus.Debugf("Applying tar in %s", applyDir) 632 // Overlay doesn't need the parent id to apply the diff 633 if err := untar(diff, applyDir, &archive.TarOptions{ 634 UIDMaps: d.uidMaps, 635 GIDMaps: d.gidMaps, 636 WhiteoutFormat: archive.OverlayWhiteoutFormat, 637 }); err != nil { 638 return 0, err 639 } 640 641 return directory.Size(applyDir) 642 } 643 644 func (d *Driver) getDiffPath(id string) string { 645 dir := d.dir(id) 646 647 return path.Join(dir, "diff") 648 } 649 650 // DiffSize calculates the changes between the specified id 651 // and its parent and returns the size in bytes of the changes 652 // relative to its base filesystem directory. 653 func (d *Driver) DiffSize(id, parent string) (size int64, err error) { 654 if useNaiveDiff(d.home) || !d.isParent(id, parent) { 655 return d.naiveDiff.DiffSize(id, parent) 656 } 657 return directory.Size(d.getDiffPath(id)) 658 } 659 660 // Diff produces an archive of the changes between the specified 661 // layer and its parent layer which may be "". 662 func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) { 663 if useNaiveDiff(d.home) || !d.isParent(id, parent) { 664 return d.naiveDiff.Diff(id, parent) 665 } 666 667 diffPath := d.getDiffPath(id) 668 logrus.Debugf("Tar with options on %s", diffPath) 669 return archive.TarWithOptions(diffPath, &archive.TarOptions{ 670 Compression: archive.Uncompressed, 671 UIDMaps: d.uidMaps, 672 GIDMaps: d.gidMaps, 673 WhiteoutFormat: archive.OverlayWhiteoutFormat, 674 }) 675 } 676 677 // Changes produces a list of changes between the specified layer 678 // and its parent layer. If parent is "", then all changes will be ADD changes. 679 func (d *Driver) Changes(id, parent string) ([]archive.Change, error) { 680 if useNaiveDiff(d.home) || !d.isParent(id, parent) { 681 return d.naiveDiff.Changes(id, parent) 682 } 683 // Overlay doesn't have snapshots, so we need to get changes from all parent 684 // layers. 685 diffPath := d.getDiffPath(id) 686 layers, err := d.getLowerDirs(id) 687 if err != nil { 688 return nil, err 689 } 690 691 return archive.OverlayChanges(layers, diffPath) 692 }