github.com/lacework-dev/go-moby@v20.10.12+incompatible/daemon/graphdriver/btrfs/btrfs.go (about) 1 // +build linux 2 3 package btrfs // import "github.com/docker/docker/daemon/graphdriver/btrfs" 4 5 /* 6 #include <stdlib.h> 7 #include <dirent.h> 8 #include <btrfs/ioctl.h> 9 #include <btrfs/ctree.h> 10 11 static void set_name_btrfs_ioctl_vol_args_v2(struct btrfs_ioctl_vol_args_v2* btrfs_struct, const char* value) { 12 snprintf(btrfs_struct->name, BTRFS_SUBVOL_NAME_MAX, "%s", value); 13 } 14 */ 15 import "C" 16 17 import ( 18 "fmt" 19 "io/ioutil" 20 "math" 21 "os" 22 "path" 23 "path/filepath" 24 "strconv" 25 "strings" 26 "sync" 27 "unsafe" 28 29 "github.com/docker/docker/daemon/graphdriver" 30 "github.com/docker/docker/pkg/containerfs" 31 "github.com/docker/docker/pkg/idtools" 32 "github.com/docker/docker/pkg/parsers" 33 "github.com/docker/docker/pkg/system" 34 units "github.com/docker/go-units" 35 "github.com/moby/sys/mount" 36 "github.com/opencontainers/selinux/go-selinux/label" 37 "github.com/pkg/errors" 38 "github.com/sirupsen/logrus" 39 "golang.org/x/sys/unix" 40 ) 41 42 func init() { 43 graphdriver.Register("btrfs", Init) 44 } 45 46 type btrfsOptions struct { 47 minSpace uint64 48 size uint64 49 } 50 51 // Init returns a new BTRFS driver. 52 // An error is returned if BTRFS is not supported. 53 func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) { 54 55 // Perform feature detection on /var/lib/docker/btrfs if it's an existing directory. 56 // This covers situations where /var/lib/docker/btrfs is a mount, and on a different 57 // filesystem than /var/lib/docker. 58 // If the path does not exist, fall back to using /var/lib/docker for feature detection. 59 testdir := home 60 if _, err := os.Stat(testdir); os.IsNotExist(err) { 61 testdir = filepath.Dir(testdir) 62 } 63 64 fsMagic, err := graphdriver.GetFSMagic(testdir) 65 if err != nil { 66 return nil, err 67 } 68 69 if fsMagic != graphdriver.FsMagicBtrfs { 70 return nil, graphdriver.ErrPrerequisites 71 } 72 73 remappedRoot := idtools.NewIDMappingsFromMaps(uidMaps, gidMaps) 74 currentID := idtools.CurrentIdentity() 75 dirID := idtools.Identity{ 76 UID: currentID.UID, 77 GID: remappedRoot.RootPair().GID, 78 } 79 80 if err := idtools.MkdirAllAndChown(home, 0710, dirID); err != nil { 81 return nil, err 82 } 83 84 opt, userDiskQuota, err := parseOptions(options) 85 if err != nil { 86 return nil, err 87 } 88 89 // For some reason shared mount propagation between a container 90 // and the host does not work for btrfs, and a remedy is to bind 91 // mount graphdriver home to itself (even without changing the 92 // propagation mode). 93 err = mount.MakeMount(home) 94 if err != nil { 95 return nil, errors.Wrapf(err, "failed to make %s a mount", home) 96 } 97 98 driver := &Driver{ 99 home: home, 100 uidMaps: uidMaps, 101 gidMaps: gidMaps, 102 options: opt, 103 } 104 105 if userDiskQuota { 106 if err := driver.subvolEnableQuota(); err != nil { 107 return nil, err 108 } 109 } 110 111 return graphdriver.NewNaiveDiffDriver(driver, uidMaps, gidMaps), nil 112 } 113 114 func parseOptions(opt []string) (btrfsOptions, bool, error) { 115 var options btrfsOptions 116 userDiskQuota := false 117 for _, option := range opt { 118 key, val, err := parsers.ParseKeyValueOpt(option) 119 if err != nil { 120 return options, userDiskQuota, err 121 } 122 key = strings.ToLower(key) 123 switch key { 124 case "btrfs.min_space": 125 minSpace, err := units.RAMInBytes(val) 126 if err != nil { 127 return options, userDiskQuota, err 128 } 129 userDiskQuota = true 130 options.minSpace = uint64(minSpace) 131 default: 132 return options, userDiskQuota, fmt.Errorf("Unknown option %s", key) 133 } 134 } 135 return options, userDiskQuota, nil 136 } 137 138 // Driver contains information about the filesystem mounted. 139 type Driver struct { 140 // root of the file system 141 home string 142 uidMaps []idtools.IDMap 143 gidMaps []idtools.IDMap 144 options btrfsOptions 145 quotaEnabled bool 146 once sync.Once 147 } 148 149 // String prints the name of the driver (btrfs). 150 func (d *Driver) String() string { 151 return "btrfs" 152 } 153 154 // Status returns current driver information in a two dimensional string array. 155 // Output contains "Build Version" and "Library Version" of the btrfs libraries used. 156 // Version information can be used to check compatibility with your kernel. 157 func (d *Driver) Status() [][2]string { 158 status := [][2]string{} 159 if bv := btrfsBuildVersion(); bv != "-" { 160 status = append(status, [2]string{"Build Version", bv}) 161 } 162 if lv := btrfsLibVersion(); lv != -1 { 163 status = append(status, [2]string{"Library Version", fmt.Sprintf("%d", lv)}) 164 } 165 return status 166 } 167 168 // GetMetadata returns empty metadata for this driver. 169 func (d *Driver) GetMetadata(id string) (map[string]string, error) { 170 return nil, nil 171 } 172 173 // Cleanup unmounts the home directory. 174 func (d *Driver) Cleanup() error { 175 err := d.subvolDisableQuota() 176 umountErr := mount.Unmount(d.home) 177 178 // in case we have two errors, prefer the one from disableQuota() 179 if err != nil { 180 return err 181 } 182 183 if umountErr != nil { 184 return umountErr 185 } 186 187 return nil 188 } 189 190 func free(p *C.char) { 191 C.free(unsafe.Pointer(p)) 192 } 193 194 func openDir(path string) (*C.DIR, error) { 195 Cpath := C.CString(path) 196 defer free(Cpath) 197 198 dir := C.opendir(Cpath) 199 if dir == nil { 200 return nil, fmt.Errorf("Can't open dir") 201 } 202 return dir, nil 203 } 204 205 func closeDir(dir *C.DIR) { 206 if dir != nil { 207 C.closedir(dir) 208 } 209 } 210 211 func getDirFd(dir *C.DIR) uintptr { 212 return uintptr(C.dirfd(dir)) 213 } 214 215 func subvolCreate(path, name string) error { 216 dir, err := openDir(path) 217 if err != nil { 218 return err 219 } 220 defer closeDir(dir) 221 222 var args C.struct_btrfs_ioctl_vol_args 223 for i, c := range []byte(name) { 224 args.name[i] = C.char(c) 225 } 226 227 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SUBVOL_CREATE, 228 uintptr(unsafe.Pointer(&args))) 229 if errno != 0 { 230 return fmt.Errorf("Failed to create btrfs subvolume: %v", errno.Error()) 231 } 232 return nil 233 } 234 235 func subvolSnapshot(src, dest, name string) error { 236 srcDir, err := openDir(src) 237 if err != nil { 238 return err 239 } 240 defer closeDir(srcDir) 241 242 destDir, err := openDir(dest) 243 if err != nil { 244 return err 245 } 246 defer closeDir(destDir) 247 248 var args C.struct_btrfs_ioctl_vol_args_v2 249 args.fd = C.__s64(getDirFd(srcDir)) 250 251 var cs = C.CString(name) 252 C.set_name_btrfs_ioctl_vol_args_v2(&args, cs) 253 C.free(unsafe.Pointer(cs)) 254 255 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(destDir), C.BTRFS_IOC_SNAP_CREATE_V2, 256 uintptr(unsafe.Pointer(&args))) 257 if errno != 0 { 258 return fmt.Errorf("Failed to create btrfs snapshot: %v", errno.Error()) 259 } 260 return nil 261 } 262 263 func isSubvolume(p string) (bool, error) { 264 var bufStat unix.Stat_t 265 if err := unix.Lstat(p, &bufStat); err != nil { 266 return false, err 267 } 268 269 // return true if it is a btrfs subvolume 270 return bufStat.Ino == C.BTRFS_FIRST_FREE_OBJECTID, nil 271 } 272 273 func subvolDelete(dirpath, name string, quotaEnabled bool) error { 274 dir, err := openDir(dirpath) 275 if err != nil { 276 return err 277 } 278 defer closeDir(dir) 279 fullPath := path.Join(dirpath, name) 280 281 var args C.struct_btrfs_ioctl_vol_args 282 283 // walk the btrfs subvolumes 284 walkSubvolumes := func(p string, f os.FileInfo, err error) error { 285 if err != nil { 286 if os.IsNotExist(err) && p != fullPath { 287 // missing most likely because the path was a subvolume that got removed in the previous iteration 288 // since it's gone anyway, we don't care 289 return nil 290 } 291 return fmt.Errorf("error walking subvolumes: %v", err) 292 } 293 // we want to check children only so skip itself 294 // it will be removed after the filepath walk anyways 295 if f.IsDir() && p != fullPath { 296 sv, err := isSubvolume(p) 297 if err != nil { 298 return fmt.Errorf("Failed to test if %s is a btrfs subvolume: %v", p, err) 299 } 300 if sv { 301 if err := subvolDelete(path.Dir(p), f.Name(), quotaEnabled); err != nil { 302 return fmt.Errorf("Failed to destroy btrfs child subvolume (%s) of parent (%s): %v", p, dirpath, err) 303 } 304 } 305 } 306 return nil 307 } 308 if err := filepath.Walk(path.Join(dirpath, name), walkSubvolumes); err != nil { 309 return fmt.Errorf("Recursively walking subvolumes for %s failed: %v", dirpath, err) 310 } 311 312 if quotaEnabled { 313 if qgroupid, err := subvolLookupQgroup(fullPath); err == nil { 314 var args C.struct_btrfs_ioctl_qgroup_create_args 315 args.qgroupid = C.__u64(qgroupid) 316 317 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_CREATE, 318 uintptr(unsafe.Pointer(&args))) 319 if errno != 0 { 320 logrus.WithField("storage-driver", "btrfs").Errorf("Failed to delete btrfs qgroup %v for %s: %v", qgroupid, fullPath, errno.Error()) 321 } 322 } else { 323 logrus.WithField("storage-driver", "btrfs").Errorf("Failed to lookup btrfs qgroup for %s: %v", fullPath, err.Error()) 324 } 325 } 326 327 // all subvolumes have been removed 328 // now remove the one originally passed in 329 for i, c := range []byte(name) { 330 args.name[i] = C.char(c) 331 } 332 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SNAP_DESTROY, 333 uintptr(unsafe.Pointer(&args))) 334 if errno != 0 { 335 return fmt.Errorf("Failed to destroy btrfs snapshot %s for %s: %v", dirpath, name, errno.Error()) 336 } 337 return nil 338 } 339 340 func (d *Driver) updateQuotaStatus() { 341 d.once.Do(func() { 342 if !d.quotaEnabled { 343 // In case quotaEnabled is not set, check qgroup and update quotaEnabled as needed 344 if err := subvolQgroupStatus(d.home); err != nil { 345 // quota is still not enabled 346 return 347 } 348 d.quotaEnabled = true 349 } 350 }) 351 } 352 353 func (d *Driver) subvolEnableQuota() error { 354 d.updateQuotaStatus() 355 356 if d.quotaEnabled { 357 return nil 358 } 359 360 dir, err := openDir(d.home) 361 if err != nil { 362 return err 363 } 364 defer closeDir(dir) 365 366 var args C.struct_btrfs_ioctl_quota_ctl_args 367 args.cmd = C.BTRFS_QUOTA_CTL_ENABLE 368 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL, 369 uintptr(unsafe.Pointer(&args))) 370 if errno != 0 { 371 return fmt.Errorf("Failed to enable btrfs quota for %s: %v", dir, errno.Error()) 372 } 373 374 d.quotaEnabled = true 375 376 return nil 377 } 378 379 func (d *Driver) subvolDisableQuota() error { 380 d.updateQuotaStatus() 381 382 if !d.quotaEnabled { 383 return nil 384 } 385 386 dir, err := openDir(d.home) 387 if err != nil { 388 return err 389 } 390 defer closeDir(dir) 391 392 var args C.struct_btrfs_ioctl_quota_ctl_args 393 args.cmd = C.BTRFS_QUOTA_CTL_DISABLE 394 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL, 395 uintptr(unsafe.Pointer(&args))) 396 if errno != 0 { 397 return fmt.Errorf("Failed to disable btrfs quota for %s: %v", dir, errno.Error()) 398 } 399 400 d.quotaEnabled = false 401 402 return nil 403 } 404 405 func (d *Driver) subvolRescanQuota() error { 406 d.updateQuotaStatus() 407 408 if !d.quotaEnabled { 409 return nil 410 } 411 412 dir, err := openDir(d.home) 413 if err != nil { 414 return err 415 } 416 defer closeDir(dir) 417 418 var args C.struct_btrfs_ioctl_quota_rescan_args 419 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_RESCAN_WAIT, 420 uintptr(unsafe.Pointer(&args))) 421 if errno != 0 { 422 return fmt.Errorf("Failed to rescan btrfs quota for %s: %v", dir, errno.Error()) 423 } 424 425 return nil 426 } 427 428 func subvolLimitQgroup(path string, size uint64) error { 429 dir, err := openDir(path) 430 if err != nil { 431 return err 432 } 433 defer closeDir(dir) 434 435 var args C.struct_btrfs_ioctl_qgroup_limit_args 436 args.lim.max_referenced = C.__u64(size) 437 args.lim.flags = C.BTRFS_QGROUP_LIMIT_MAX_RFER 438 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_LIMIT, 439 uintptr(unsafe.Pointer(&args))) 440 if errno != 0 { 441 return fmt.Errorf("Failed to limit qgroup for %s: %v", dir, errno.Error()) 442 } 443 444 return nil 445 } 446 447 // subvolQgroupStatus performs a BTRFS_IOC_TREE_SEARCH on the root path 448 // with search key of BTRFS_QGROUP_STATUS_KEY. 449 // In case qgroup is enabled, the retuned key type will match BTRFS_QGROUP_STATUS_KEY. 450 // For more details please see https://github.com/kdave/btrfs-progs/blob/v4.9/qgroup.c#L1035 451 func subvolQgroupStatus(path string) error { 452 dir, err := openDir(path) 453 if err != nil { 454 return err 455 } 456 defer closeDir(dir) 457 458 var args C.struct_btrfs_ioctl_search_args 459 args.key.tree_id = C.BTRFS_QUOTA_TREE_OBJECTID 460 args.key.min_type = C.BTRFS_QGROUP_STATUS_KEY 461 args.key.max_type = C.BTRFS_QGROUP_STATUS_KEY 462 args.key.max_objectid = C.__u64(math.MaxUint64) 463 args.key.max_offset = C.__u64(math.MaxUint64) 464 args.key.max_transid = C.__u64(math.MaxUint64) 465 args.key.nr_items = 4096 466 467 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_TREE_SEARCH, 468 uintptr(unsafe.Pointer(&args))) 469 if errno != 0 { 470 return fmt.Errorf("Failed to search qgroup for %s: %v", path, errno.Error()) 471 } 472 sh := (*C.struct_btrfs_ioctl_search_header)(unsafe.Pointer(&args.buf)) 473 if sh._type != C.BTRFS_QGROUP_STATUS_KEY { 474 return fmt.Errorf("Invalid qgroup search header type for %s: %v", path, sh._type) 475 } 476 return nil 477 } 478 479 func subvolLookupQgroup(path string) (uint64, error) { 480 dir, err := openDir(path) 481 if err != nil { 482 return 0, err 483 } 484 defer closeDir(dir) 485 486 var args C.struct_btrfs_ioctl_ino_lookup_args 487 args.objectid = C.BTRFS_FIRST_FREE_OBJECTID 488 489 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_INO_LOOKUP, 490 uintptr(unsafe.Pointer(&args))) 491 if errno != 0 { 492 return 0, fmt.Errorf("Failed to lookup qgroup for %s: %v", dir, errno.Error()) 493 } 494 if args.treeid == 0 { 495 return 0, fmt.Errorf("Invalid qgroup id for %s: 0", dir) 496 } 497 498 return uint64(args.treeid), nil 499 } 500 501 func (d *Driver) subvolumesDir() string { 502 return path.Join(d.home, "subvolumes") 503 } 504 505 func (d *Driver) subvolumesDirID(id string) string { 506 return path.Join(d.subvolumesDir(), id) 507 } 508 509 func (d *Driver) quotasDir() string { 510 return path.Join(d.home, "quotas") 511 } 512 513 func (d *Driver) quotasDirID(id string) string { 514 return path.Join(d.quotasDir(), id) 515 } 516 517 // CreateReadWrite creates a layer that is writable for use as a container 518 // file system. 519 func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error { 520 return d.Create(id, parent, opts) 521 } 522 523 // Create the filesystem with given id. 524 func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error { 525 quotas := path.Join(d.home, "quotas") 526 subvolumes := path.Join(d.home, "subvolumes") 527 rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps) 528 if err != nil { 529 return err 530 } 531 532 currentID := idtools.CurrentIdentity() 533 dirID := idtools.Identity{ 534 UID: currentID.UID, 535 GID: rootGID, 536 } 537 538 if err := idtools.MkdirAllAndChown(subvolumes, 0710, dirID); err != nil { 539 return err 540 } 541 if parent == "" { 542 if err := subvolCreate(subvolumes, id); err != nil { 543 return err 544 } 545 } else { 546 parentDir := d.subvolumesDirID(parent) 547 st, err := os.Stat(parentDir) 548 if err != nil { 549 return err 550 } 551 if !st.IsDir() { 552 return fmt.Errorf("%s: not a directory", parentDir) 553 } 554 if err := subvolSnapshot(parentDir, subvolumes, id); err != nil { 555 return err 556 } 557 } 558 559 var storageOpt map[string]string 560 if opts != nil { 561 storageOpt = opts.StorageOpt 562 } 563 564 if _, ok := storageOpt["size"]; ok { 565 driver := &Driver{} 566 if err := d.parseStorageOpt(storageOpt, driver); err != nil { 567 return err 568 } 569 570 if err := d.setStorageSize(path.Join(subvolumes, id), driver); err != nil { 571 return err 572 } 573 if err := idtools.MkdirAllAndChown(quotas, 0700, idtools.CurrentIdentity()); err != nil { 574 return err 575 } 576 if err := ioutil.WriteFile(path.Join(quotas, id), []byte(fmt.Sprint(driver.options.size)), 0644); err != nil { 577 return err 578 } 579 } 580 581 // if we have a remapped root (user namespaces enabled), change the created snapshot 582 // dir ownership to match 583 if rootUID != 0 || rootGID != 0 { 584 if err := os.Chown(path.Join(subvolumes, id), rootUID, rootGID); err != nil { 585 return err 586 } 587 } 588 589 mountLabel := "" 590 if opts != nil { 591 mountLabel = opts.MountLabel 592 } 593 594 return label.Relabel(path.Join(subvolumes, id), mountLabel, false) 595 } 596 597 // Parse btrfs storage options 598 func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error { 599 // Read size to change the subvolume disk quota per container 600 for key, val := range storageOpt { 601 key := strings.ToLower(key) 602 switch key { 603 case "size": 604 size, err := units.RAMInBytes(val) 605 if err != nil { 606 return err 607 } 608 driver.options.size = uint64(size) 609 default: 610 return fmt.Errorf("Unknown option %s", key) 611 } 612 } 613 614 return nil 615 } 616 617 // Set btrfs storage size 618 func (d *Driver) setStorageSize(dir string, driver *Driver) error { 619 if driver.options.size == 0 { 620 return fmt.Errorf("btrfs: invalid storage size: %s", units.HumanSize(float64(driver.options.size))) 621 } 622 if d.options.minSpace > 0 && driver.options.size < d.options.minSpace { 623 return fmt.Errorf("btrfs: storage size cannot be less than %s", units.HumanSize(float64(d.options.minSpace))) 624 } 625 if err := d.subvolEnableQuota(); err != nil { 626 return err 627 } 628 return subvolLimitQgroup(dir, driver.options.size) 629 } 630 631 // Remove the filesystem with given id. 632 func (d *Driver) Remove(id string) error { 633 dir := d.subvolumesDirID(id) 634 if _, err := os.Stat(dir); err != nil { 635 return err 636 } 637 quotasDir := d.quotasDirID(id) 638 if _, err := os.Stat(quotasDir); err == nil { 639 if err := os.Remove(quotasDir); err != nil { 640 return err 641 } 642 } else if !os.IsNotExist(err) { 643 return err 644 } 645 646 // Call updateQuotaStatus() to invoke status update 647 d.updateQuotaStatus() 648 649 if err := subvolDelete(d.subvolumesDir(), id, d.quotaEnabled); err != nil { 650 if d.quotaEnabled { 651 return err 652 } 653 // If quota is not enabled, fallback to rmdir syscall to delete subvolumes. 654 // This would allow unprivileged user to delete their owned subvolumes 655 // in kernel >= 4.18 without user_subvol_rm_allowed mount option. 656 // 657 // From https://github.com/containers/storage/pull/508/commits/831e32b6bdcb530acc4c1cb9059d3c6dba14208c 658 } 659 if err := system.EnsureRemoveAll(dir); err != nil { 660 return err 661 } 662 return d.subvolRescanQuota() 663 } 664 665 // Get the requested filesystem id. 666 func (d *Driver) Get(id, mountLabel string) (containerfs.ContainerFS, error) { 667 dir := d.subvolumesDirID(id) 668 st, err := os.Stat(dir) 669 if err != nil { 670 return nil, err 671 } 672 673 if !st.IsDir() { 674 return nil, fmt.Errorf("%s: not a directory", dir) 675 } 676 677 if quota, err := ioutil.ReadFile(d.quotasDirID(id)); err == nil { 678 if size, err := strconv.ParseUint(string(quota), 10, 64); err == nil && size >= d.options.minSpace { 679 if err := d.subvolEnableQuota(); err != nil { 680 return nil, err 681 } 682 if err := subvolLimitQgroup(dir, size); err != nil { 683 return nil, err 684 } 685 } 686 } 687 688 return containerfs.NewLocalContainerFS(dir), nil 689 } 690 691 // Put is not implemented for BTRFS as there is no cleanup required for the id. 692 func (d *Driver) Put(id string) error { 693 // Get() creates no runtime resources (like e.g. mounts) 694 // so this doesn't need to do anything. 695 return nil 696 } 697 698 // Exists checks if the id exists in the filesystem. 699 func (d *Driver) Exists(id string) bool { 700 dir := d.subvolumesDirID(id) 701 _, err := os.Stat(dir) 702 return err == nil 703 }