github.com/Heebron/moby@v0.0.0-20221111184709-6eab4f55faf7/daemon/graphdriver/btrfs/btrfs.go (about) 1 //go:build linux 2 // +build linux 3 4 package btrfs // import "github.com/docker/docker/daemon/graphdriver/btrfs" 5 6 /* 7 #include <stdlib.h> 8 #include <dirent.h> 9 #include <btrfs/ioctl.h> 10 #include <btrfs/ctree.h> 11 12 static void set_name_btrfs_ioctl_vol_args_v2(struct btrfs_ioctl_vol_args_v2* btrfs_struct, const char* value) { 13 snprintf(btrfs_struct->name, BTRFS_SUBVOL_NAME_MAX, "%s", value); 14 } 15 */ 16 import "C" 17 18 import ( 19 "fmt" 20 "math" 21 "os" 22 "path" 23 "path/filepath" 24 "strconv" 25 "strings" 26 "sync" 27 "unsafe" 28 29 "github.com/containerd/containerd/pkg/userns" 30 "github.com/docker/docker/daemon/graphdriver" 31 "github.com/docker/docker/pkg/containerfs" 32 "github.com/docker/docker/pkg/idtools" 33 "github.com/docker/docker/pkg/parsers" 34 units "github.com/docker/go-units" 35 "github.com/moby/sys/mount" 36 "github.com/opencontainers/selinux/go-selinux/label" 37 "github.com/pkg/errors" 38 "github.com/sirupsen/logrus" 39 "golang.org/x/sys/unix" 40 ) 41 42 func init() { 43 graphdriver.Register("btrfs", Init) 44 } 45 46 type btrfsOptions struct { 47 minSpace uint64 48 size uint64 49 } 50 51 // Init returns a new BTRFS driver. 52 // An error is returned if BTRFS is not supported. 53 func Init(home string, options []string, idMap idtools.IdentityMapping) (graphdriver.Driver, error) { 54 // Perform feature detection on /var/lib/docker/btrfs if it's an existing directory. 55 // This covers situations where /var/lib/docker/btrfs is a mount, and on a different 56 // filesystem than /var/lib/docker. 57 // If the path does not exist, fall back to using /var/lib/docker for feature detection. 58 testdir := home 59 if _, err := os.Stat(testdir); os.IsNotExist(err) { 60 testdir = filepath.Dir(testdir) 61 } 62 63 fsMagic, err := graphdriver.GetFSMagic(testdir) 64 if err != nil { 65 return nil, err 66 } 67 68 if fsMagic != graphdriver.FsMagicBtrfs { 69 return nil, graphdriver.ErrPrerequisites 70 } 71 72 currentID := idtools.CurrentIdentity() 73 dirID := idtools.Identity{ 74 UID: currentID.UID, 75 GID: idMap.RootPair().GID, 76 } 77 78 if err := idtools.MkdirAllAndChown(home, 0710, dirID); err != nil { 79 return nil, err 80 } 81 82 opt, userDiskQuota, err := parseOptions(options) 83 if err != nil { 84 return nil, err 85 } 86 87 // For some reason shared mount propagation between a container 88 // and the host does not work for btrfs, and a remedy is to bind 89 // mount graphdriver home to itself (even without changing the 90 // propagation mode). 91 err = mount.MakeMount(home) 92 if err != nil { 93 return nil, errors.Wrapf(err, "failed to make %s a mount", home) 94 } 95 96 driver := &Driver{ 97 home: home, 98 idMap: idMap, 99 options: opt, 100 } 101 102 if userDiskQuota { 103 if err := driver.enableQuota(); err != nil { 104 return nil, err 105 } 106 } 107 108 return graphdriver.NewNaiveDiffDriver(driver, driver.idMap), nil 109 } 110 111 func parseOptions(opt []string) (btrfsOptions, bool, error) { 112 var options btrfsOptions 113 userDiskQuota := false 114 for _, option := range opt { 115 key, val, err := parsers.ParseKeyValueOpt(option) 116 if err != nil { 117 return options, userDiskQuota, err 118 } 119 key = strings.ToLower(key) 120 switch key { 121 case "btrfs.min_space": 122 minSpace, err := units.RAMInBytes(val) 123 if err != nil { 124 return options, userDiskQuota, err 125 } 126 userDiskQuota = true 127 options.minSpace = uint64(minSpace) 128 default: 129 return options, userDiskQuota, fmt.Errorf("Unknown option %s", key) 130 } 131 } 132 return options, userDiskQuota, nil 133 } 134 135 // Driver contains information about the filesystem mounted. 136 type Driver struct { 137 // root of the file system 138 home string 139 idMap idtools.IdentityMapping 140 options btrfsOptions 141 quotaEnabled bool 142 once sync.Once 143 } 144 145 // String prints the name of the driver (btrfs). 146 func (d *Driver) String() string { 147 return "btrfs" 148 } 149 150 // Status returns current driver information in a two dimensional string array. 151 // Output contains "Build Version" and "Library Version" of the btrfs libraries used. 152 // Version information can be used to check compatibility with your kernel. 153 func (d *Driver) Status() [][2]string { 154 status := [][2]string{} 155 if bv := btrfsBuildVersion(); bv != "-" { 156 status = append(status, [2]string{"Build Version", bv}) 157 } 158 if lv := btrfsLibVersion(); lv != -1 { 159 status = append(status, [2]string{"Library Version", strconv.Itoa(lv)}) 160 } 161 return status 162 } 163 164 // GetMetadata returns empty metadata for this driver. 165 func (d *Driver) GetMetadata(id string) (map[string]string, error) { 166 return nil, nil 167 } 168 169 // Cleanup unmounts the home directory. 170 func (d *Driver) Cleanup() error { 171 if err := mount.Unmount(d.home); err != nil { 172 return err 173 } 174 175 return nil 176 } 177 178 func free(p *C.char) { 179 C.free(unsafe.Pointer(p)) 180 } 181 182 func openDir(path string) (*C.DIR, error) { 183 Cpath := C.CString(path) 184 defer free(Cpath) 185 186 dir := C.opendir(Cpath) 187 if dir == nil { 188 return nil, fmt.Errorf("Can't open dir") 189 } 190 return dir, nil 191 } 192 193 func closeDir(dir *C.DIR) { 194 if dir != nil { 195 C.closedir(dir) 196 } 197 } 198 199 func getDirFd(dir *C.DIR) uintptr { 200 return uintptr(C.dirfd(dir)) 201 } 202 203 func subvolCreate(path, name string) error { 204 dir, err := openDir(path) 205 if err != nil { 206 return err 207 } 208 defer closeDir(dir) 209 210 var args C.struct_btrfs_ioctl_vol_args 211 for i, c := range []byte(name) { 212 args.name[i] = C.char(c) 213 } 214 215 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SUBVOL_CREATE, 216 uintptr(unsafe.Pointer(&args))) 217 if errno != 0 { 218 return fmt.Errorf("Failed to create btrfs subvolume: %v", errno.Error()) 219 } 220 return nil 221 } 222 223 func subvolSnapshot(src, dest, name string) error { 224 srcDir, err := openDir(src) 225 if err != nil { 226 return err 227 } 228 defer closeDir(srcDir) 229 230 destDir, err := openDir(dest) 231 if err != nil { 232 return err 233 } 234 defer closeDir(destDir) 235 236 var args C.struct_btrfs_ioctl_vol_args_v2 237 args.fd = C.__s64(getDirFd(srcDir)) 238 239 var cs = C.CString(name) 240 C.set_name_btrfs_ioctl_vol_args_v2(&args, cs) 241 C.free(unsafe.Pointer(cs)) 242 243 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(destDir), C.BTRFS_IOC_SNAP_CREATE_V2, 244 uintptr(unsafe.Pointer(&args))) 245 if errno != 0 { 246 return fmt.Errorf("Failed to create btrfs snapshot: %v", errno.Error()) 247 } 248 return nil 249 } 250 251 func isSubvolume(p string) (bool, error) { 252 var bufStat unix.Stat_t 253 if err := unix.Lstat(p, &bufStat); err != nil { 254 return false, err 255 } 256 257 // return true if it is a btrfs subvolume 258 return bufStat.Ino == C.BTRFS_FIRST_FREE_OBJECTID, nil 259 } 260 261 func subvolDelete(dirpath, name string, quotaEnabled bool) error { 262 dir, err := openDir(dirpath) 263 if err != nil { 264 return err 265 } 266 defer closeDir(dir) 267 fullPath := path.Join(dirpath, name) 268 269 var args C.struct_btrfs_ioctl_vol_args 270 271 // walk the btrfs subvolumes 272 walkSubVolumes := func(p string, f os.DirEntry, err error) error { 273 if err != nil { 274 if os.IsNotExist(err) && p != fullPath { 275 // missing most likely because the path was a subvolume that got removed in the previous iteration 276 // since it's gone anyway, we don't care 277 return nil 278 } 279 return fmt.Errorf("error walking subvolumes: %v", err) 280 } 281 // we want to check children only so skip itself 282 // it will be removed after the filepath walk anyways 283 if f.IsDir() && p != fullPath { 284 sv, err := isSubvolume(p) 285 if err != nil { 286 return fmt.Errorf("Failed to test if %s is a btrfs subvolume: %v", p, err) 287 } 288 if sv { 289 if err := subvolDelete(path.Dir(p), f.Name(), quotaEnabled); err != nil { 290 return fmt.Errorf("Failed to destroy btrfs child subvolume (%s) of parent (%s): %v", p, dirpath, err) 291 } 292 } 293 } 294 return nil 295 } 296 if err := filepath.WalkDir(path.Join(dirpath, name), walkSubVolumes); err != nil { 297 return fmt.Errorf("Recursively walking subvolumes for %s failed: %v", dirpath, err) 298 } 299 300 if quotaEnabled { 301 if qgroupid, err := subvolLookupQgroup(fullPath); err == nil { 302 var args C.struct_btrfs_ioctl_qgroup_create_args 303 args.qgroupid = C.__u64(qgroupid) 304 305 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_CREATE, 306 uintptr(unsafe.Pointer(&args))) 307 if errno != 0 { 308 logrus.WithField("storage-driver", "btrfs").Errorf("Failed to delete btrfs qgroup %v for %s: %v", qgroupid, fullPath, errno.Error()) 309 } 310 } else { 311 logrus.WithField("storage-driver", "btrfs").Errorf("Failed to lookup btrfs qgroup for %s: %v", fullPath, err.Error()) 312 } 313 } 314 315 // all subvolumes have been removed 316 // now remove the one originally passed in 317 for i, c := range []byte(name) { 318 args.name[i] = C.char(c) 319 } 320 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SNAP_DESTROY, 321 uintptr(unsafe.Pointer(&args))) 322 if errno != 0 { 323 return fmt.Errorf("Failed to destroy btrfs snapshot %s for %s: %v", dirpath, name, errno.Error()) 324 } 325 return nil 326 } 327 328 func (d *Driver) updateQuotaStatus() { 329 d.once.Do(func() { 330 if !d.quotaEnabled { 331 // In case quotaEnabled is not set, check qgroup and update quotaEnabled as needed 332 if err := qgroupStatus(d.home); err != nil { 333 // quota is still not enabled 334 return 335 } 336 d.quotaEnabled = true 337 } 338 }) 339 } 340 341 func (d *Driver) enableQuota() error { 342 d.updateQuotaStatus() 343 344 if d.quotaEnabled { 345 return nil 346 } 347 348 dir, err := openDir(d.home) 349 if err != nil { 350 return err 351 } 352 defer closeDir(dir) 353 354 var args C.struct_btrfs_ioctl_quota_ctl_args 355 args.cmd = C.BTRFS_QUOTA_CTL_ENABLE 356 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL, 357 uintptr(unsafe.Pointer(&args))) 358 if errno != 0 { 359 return fmt.Errorf("Failed to enable btrfs quota for %s: %v", dir, errno.Error()) 360 } 361 362 d.quotaEnabled = true 363 364 return nil 365 } 366 367 func (d *Driver) subvolRescanQuota() error { 368 d.updateQuotaStatus() 369 370 if !d.quotaEnabled { 371 return nil 372 } 373 374 dir, err := openDir(d.home) 375 if err != nil { 376 return err 377 } 378 defer closeDir(dir) 379 380 var args C.struct_btrfs_ioctl_quota_rescan_args 381 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_RESCAN_WAIT, 382 uintptr(unsafe.Pointer(&args))) 383 if errno != 0 { 384 return fmt.Errorf("Failed to rescan btrfs quota for %s: %v", dir, errno.Error()) 385 } 386 387 return nil 388 } 389 390 func subvolLimitQgroup(path string, size uint64) error { 391 dir, err := openDir(path) 392 if err != nil { 393 return err 394 } 395 defer closeDir(dir) 396 397 var args C.struct_btrfs_ioctl_qgroup_limit_args 398 args.lim.max_referenced = C.__u64(size) 399 args.lim.flags = C.BTRFS_QGROUP_LIMIT_MAX_RFER 400 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_LIMIT, 401 uintptr(unsafe.Pointer(&args))) 402 if errno != 0 { 403 return fmt.Errorf("Failed to limit qgroup for %s: %v", dir, errno.Error()) 404 } 405 406 return nil 407 } 408 409 // qgroupStatus performs a BTRFS_IOC_TREE_SEARCH on the root path 410 // with search key of BTRFS_QGROUP_STATUS_KEY. 411 // In case qgroup is enabled, the retuned key type will match BTRFS_QGROUP_STATUS_KEY. 412 // For more details please see https://github.com/kdave/btrfs-progs/blob/v4.9/qgroup.c#L1035 413 func qgroupStatus(path string) error { 414 dir, err := openDir(path) 415 if err != nil { 416 return err 417 } 418 defer closeDir(dir) 419 420 var args C.struct_btrfs_ioctl_search_args 421 args.key.tree_id = C.BTRFS_QUOTA_TREE_OBJECTID 422 args.key.min_type = C.BTRFS_QGROUP_STATUS_KEY 423 args.key.max_type = C.BTRFS_QGROUP_STATUS_KEY 424 args.key.max_objectid = C.__u64(math.MaxUint64) 425 args.key.max_offset = C.__u64(math.MaxUint64) 426 args.key.max_transid = C.__u64(math.MaxUint64) 427 args.key.nr_items = 4096 428 429 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_TREE_SEARCH, 430 uintptr(unsafe.Pointer(&args))) 431 if errno != 0 { 432 return fmt.Errorf("Failed to search qgroup for %s: %v", path, errno.Error()) 433 } 434 sh := (*C.struct_btrfs_ioctl_search_header)(unsafe.Pointer(&args.buf)) 435 if sh._type != C.BTRFS_QGROUP_STATUS_KEY { 436 return fmt.Errorf("Invalid qgroup search header type for %s: %v", path, sh._type) 437 } 438 return nil 439 } 440 441 func subvolLookupQgroup(path string) (uint64, error) { 442 dir, err := openDir(path) 443 if err != nil { 444 return 0, err 445 } 446 defer closeDir(dir) 447 448 var args C.struct_btrfs_ioctl_ino_lookup_args 449 args.objectid = C.BTRFS_FIRST_FREE_OBJECTID 450 451 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_INO_LOOKUP, 452 uintptr(unsafe.Pointer(&args))) 453 if errno != 0 { 454 return 0, fmt.Errorf("Failed to lookup qgroup for %s: %v", dir, errno.Error()) 455 } 456 if args.treeid == 0 { 457 return 0, fmt.Errorf("Invalid qgroup id for %s: 0", dir) 458 } 459 460 return uint64(args.treeid), nil 461 } 462 463 func (d *Driver) subvolumesDir() string { 464 return path.Join(d.home, "subvolumes") 465 } 466 467 func (d *Driver) subvolumesDirID(id string) string { 468 return path.Join(d.subvolumesDir(), id) 469 } 470 471 func (d *Driver) quotasDir() string { 472 return path.Join(d.home, "quotas") 473 } 474 475 func (d *Driver) quotasDirID(id string) string { 476 return path.Join(d.quotasDir(), id) 477 } 478 479 // CreateReadWrite creates a layer that is writable for use as a container 480 // file system. 481 func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error { 482 return d.Create(id, parent, opts) 483 } 484 485 // Create the filesystem with given id. 486 func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error { 487 quotas := path.Join(d.home, "quotas") 488 subvolumes := path.Join(d.home, "subvolumes") 489 root := d.idMap.RootPair() 490 491 currentID := idtools.CurrentIdentity() 492 dirID := idtools.Identity{ 493 UID: currentID.UID, 494 GID: root.GID, 495 } 496 497 if err := idtools.MkdirAllAndChown(subvolumes, 0710, dirID); err != nil { 498 return err 499 } 500 if parent == "" { 501 if err := subvolCreate(subvolumes, id); err != nil { 502 return err 503 } 504 } else { 505 parentDir := d.subvolumesDirID(parent) 506 st, err := os.Stat(parentDir) 507 if err != nil { 508 return err 509 } 510 if !st.IsDir() { 511 return fmt.Errorf("%s: not a directory", parentDir) 512 } 513 if err := subvolSnapshot(parentDir, subvolumes, id); err != nil { 514 return err 515 } 516 } 517 518 var storageOpt map[string]string 519 if opts != nil { 520 storageOpt = opts.StorageOpt 521 } 522 523 if _, ok := storageOpt["size"]; ok { 524 driver := &Driver{} 525 if err := d.parseStorageOpt(storageOpt, driver); err != nil { 526 return err 527 } 528 529 if err := d.setStorageSize(path.Join(subvolumes, id), driver); err != nil { 530 return err 531 } 532 if err := idtools.MkdirAllAndChown(quotas, 0700, idtools.CurrentIdentity()); err != nil { 533 return err 534 } 535 if err := os.WriteFile(path.Join(quotas, id), []byte(fmt.Sprint(driver.options.size)), 0644); err != nil { 536 return err 537 } 538 } 539 540 // if we have a remapped root (user namespaces enabled), change the created snapshot 541 // dir ownership to match 542 if root.UID != 0 || root.GID != 0 { 543 if err := root.Chown(path.Join(subvolumes, id)); err != nil { 544 return err 545 } 546 } 547 548 mountLabel := "" 549 if opts != nil { 550 mountLabel = opts.MountLabel 551 } 552 553 return label.Relabel(path.Join(subvolumes, id), mountLabel, false) 554 } 555 556 // Parse btrfs storage options 557 func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error { 558 // Read size to change the subvolume disk quota per container 559 for key, val := range storageOpt { 560 key := strings.ToLower(key) 561 switch key { 562 case "size": 563 size, err := units.RAMInBytes(val) 564 if err != nil { 565 return err 566 } 567 driver.options.size = uint64(size) 568 default: 569 return fmt.Errorf("Unknown option %s", key) 570 } 571 } 572 573 return nil 574 } 575 576 // Set btrfs storage size 577 func (d *Driver) setStorageSize(dir string, driver *Driver) error { 578 if driver.options.size == 0 { 579 return fmt.Errorf("btrfs: invalid storage size: %s", units.HumanSize(float64(driver.options.size))) 580 } 581 if d.options.minSpace > 0 && driver.options.size < d.options.minSpace { 582 return fmt.Errorf("btrfs: storage size cannot be less than %s", units.HumanSize(float64(d.options.minSpace))) 583 } 584 if err := d.enableQuota(); err != nil { 585 return err 586 } 587 return subvolLimitQgroup(dir, driver.options.size) 588 } 589 590 // Remove the filesystem with given id. 591 func (d *Driver) Remove(id string) error { 592 dir := d.subvolumesDirID(id) 593 if _, err := os.Stat(dir); err != nil { 594 return err 595 } 596 quotasDir := d.quotasDirID(id) 597 if _, err := os.Stat(quotasDir); err == nil { 598 if err := os.Remove(quotasDir); err != nil { 599 return err 600 } 601 } else if !os.IsNotExist(err) { 602 return err 603 } 604 605 // Call updateQuotaStatus() to invoke status update 606 d.updateQuotaStatus() 607 608 if err := subvolDelete(d.subvolumesDir(), id, d.quotaEnabled); err != nil { 609 if d.quotaEnabled { 610 // use strings.Contains() rather than errors.Is(), because subvolDelete() does not use %w yet 611 if userns.RunningInUserNS() && strings.Contains(err.Error(), "operation not permitted") { 612 err = errors.Wrap(err, `failed to delete subvolume without root (hint: remount btrfs on "user_subvol_rm_allowed" option, or update the kernel to >= 4.18, or change the storage driver to "fuse-overlayfs")`) 613 } 614 return err 615 } 616 // If quota is not enabled, fallback to rmdir syscall to delete subvolumes. 617 // This would allow unprivileged user to delete their owned subvolumes 618 // in kernel >= 4.18 without user_subvol_rm_allowed mount option. 619 // 620 // From https://github.com/containers/storage/pull/508/commits/831e32b6bdcb530acc4c1cb9059d3c6dba14208c 621 } 622 if err := containerfs.EnsureRemoveAll(dir); err != nil { 623 return err 624 } 625 return d.subvolRescanQuota() 626 } 627 628 // Get the requested filesystem id. 629 func (d *Driver) Get(id, mountLabel string) (string, error) { 630 dir := d.subvolumesDirID(id) 631 st, err := os.Stat(dir) 632 if err != nil { 633 return "", err 634 } 635 636 if !st.IsDir() { 637 return "", fmt.Errorf("%s: not a directory", dir) 638 } 639 640 if quota, err := os.ReadFile(d.quotasDirID(id)); err == nil { 641 if size, err := strconv.ParseUint(string(quota), 10, 64); err == nil && size >= d.options.minSpace { 642 if err := d.enableQuota(); err != nil { 643 return "", err 644 } 645 if err := subvolLimitQgroup(dir, size); err != nil { 646 return "", err 647 } 648 } 649 } 650 651 return dir, nil 652 } 653 654 // Put is not implemented for BTRFS as there is no cleanup required for the id. 655 func (d *Driver) Put(id string) error { 656 // Get() creates no runtime resources (like e.g. mounts) 657 // so this doesn't need to do anything. 658 return nil 659 } 660 661 // Exists checks if the id exists in the filesystem. 662 func (d *Driver) Exists(id string) bool { 663 dir := d.subvolumesDirID(id) 664 _, err := os.Stat(dir) 665 return err == nil 666 }