github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/daemon/graphdriver/btrfs/btrfs.go (about) 1 //go:build linux 2 3 package btrfs // import "github.com/Prakhar-Agarwal-byte/moby/daemon/graphdriver/btrfs" 4 5 /* 6 #include <stdlib.h> 7 #include <stdio.h> 8 #include <dirent.h> 9 10 #include <linux/version.h> 11 #if LINUX_VERSION_CODE < KERNEL_VERSION(4,12,0) 12 #error "Headers from kernel >= 4.12 are required to build with Btrfs support." 13 #error "HINT: Set 'DOCKER_BUILDTAGS=exclude_graphdriver_btrfs' to build without Btrfs." 14 #endif 15 16 #include <linux/btrfs.h> 17 #include <linux/btrfs_tree.h> 18 19 static void set_name_btrfs_ioctl_vol_args_v2(struct btrfs_ioctl_vol_args_v2* btrfs_struct, const char* value) { 20 snprintf(btrfs_struct->name, BTRFS_SUBVOL_NAME_MAX, "%s", value); 21 } 22 */ 23 import "C" 24 25 import ( 26 "context" 27 "fmt" 28 "math" 29 "os" 30 "path" 31 "path/filepath" 32 "strconv" 33 "strings" 34 "sync" 35 "unsafe" 36 37 "github.com/containerd/containerd/pkg/userns" 38 "github.com/containerd/log" 39 "github.com/Prakhar-Agarwal-byte/moby/daemon/graphdriver" 40 "github.com/Prakhar-Agarwal-byte/moby/pkg/containerfs" 41 "github.com/Prakhar-Agarwal-byte/moby/pkg/idtools" 42 "github.com/Prakhar-Agarwal-byte/moby/pkg/parsers" 43 units "github.com/docker/go-units" 44 "github.com/moby/sys/mount" 45 "github.com/opencontainers/selinux/go-selinux/label" 46 "github.com/pkg/errors" 47 "golang.org/x/sys/unix" 48 ) 49 50 func init() { 51 graphdriver.Register("btrfs", Init) 52 } 53 54 type btrfsOptions struct { 55 minSpace uint64 56 size uint64 57 } 58 59 // Init returns a new BTRFS driver. 60 // An error is returned if BTRFS is not supported. 61 func Init(home string, options []string, idMap idtools.IdentityMapping) (graphdriver.Driver, error) { 62 // Perform feature detection on /var/lib/docker/btrfs if it's an existing directory. 63 // This covers situations where /var/lib/docker/btrfs is a mount, and on a different 64 // filesystem than /var/lib/docker. 65 // If the path does not exist, fall back to using /var/lib/docker for feature detection. 66 testdir := home 67 if _, err := os.Stat(testdir); os.IsNotExist(err) { 68 testdir = filepath.Dir(testdir) 69 } 70 71 fsMagic, err := graphdriver.GetFSMagic(testdir) 72 if err != nil { 73 return nil, err 74 } 75 76 if fsMagic != graphdriver.FsMagicBtrfs { 77 return nil, graphdriver.ErrPrerequisites 78 } 79 80 currentID := idtools.CurrentIdentity() 81 dirID := idtools.Identity{ 82 UID: currentID.UID, 83 GID: idMap.RootPair().GID, 84 } 85 86 if err := idtools.MkdirAllAndChown(home, 0o710, dirID); err != nil { 87 return nil, err 88 } 89 90 opt, userDiskQuota, err := parseOptions(options) 91 if err != nil { 92 return nil, err 93 } 94 95 // For some reason shared mount propagation between a container 96 // and the host does not work for btrfs, and a remedy is to bind 97 // mount graphdriver home to itself (even without changing the 98 // propagation mode). 99 err = mount.MakeMount(home) 100 if err != nil { 101 return nil, errors.Wrapf(err, "failed to make %s a mount", home) 102 } 103 104 driver := &Driver{ 105 home: home, 106 idMap: idMap, 107 options: opt, 108 } 109 110 if userDiskQuota { 111 if err := driver.enableQuota(); err != nil { 112 return nil, err 113 } 114 } 115 116 return graphdriver.NewNaiveDiffDriver(driver, driver.idMap), nil 117 } 118 119 func parseOptions(opt []string) (btrfsOptions, bool, error) { 120 var options btrfsOptions 121 userDiskQuota := false 122 for _, option := range opt { 123 key, val, err := parsers.ParseKeyValueOpt(option) 124 if err != nil { 125 return options, userDiskQuota, err 126 } 127 key = strings.ToLower(key) 128 switch key { 129 case "btrfs.min_space": 130 minSpace, err := units.RAMInBytes(val) 131 if err != nil { 132 return options, userDiskQuota, err 133 } 134 userDiskQuota = true 135 options.minSpace = uint64(minSpace) 136 default: 137 return options, userDiskQuota, fmt.Errorf("Unknown option %s", key) 138 } 139 } 140 return options, userDiskQuota, nil 141 } 142 143 // Driver contains information about the filesystem mounted. 144 type Driver struct { 145 // root of the file system 146 home string 147 idMap idtools.IdentityMapping 148 options btrfsOptions 149 quotaEnabled bool 150 once sync.Once 151 } 152 153 // String prints the name of the driver (btrfs). 154 func (d *Driver) String() string { 155 return "btrfs" 156 } 157 158 // Status returns the status of the driver. 159 func (d *Driver) Status() [][2]string { 160 return [][2]string{ 161 {"Btrfs", ""}, 162 } 163 } 164 165 // GetMetadata returns empty metadata for this driver. 166 func (d *Driver) GetMetadata(id string) (map[string]string, error) { 167 return nil, nil 168 } 169 170 // Cleanup unmounts the home directory. 171 func (d *Driver) Cleanup() error { 172 if err := mount.Unmount(d.home); err != nil { 173 return err 174 } 175 176 return nil 177 } 178 179 func free(p *C.char) { 180 C.free(unsafe.Pointer(p)) 181 } 182 183 func openDir(path string) (*C.DIR, error) { 184 Cpath := C.CString(path) 185 defer free(Cpath) 186 187 dir := C.opendir(Cpath) 188 if dir == nil { 189 return nil, fmt.Errorf("Can't open dir") 190 } 191 return dir, nil 192 } 193 194 func closeDir(dir *C.DIR) { 195 if dir != nil { 196 C.closedir(dir) 197 } 198 } 199 200 func getDirFd(dir *C.DIR) uintptr { 201 return uintptr(C.dirfd(dir)) 202 } 203 204 func subvolCreate(path, name string) error { 205 dir, err := openDir(path) 206 if err != nil { 207 return err 208 } 209 defer closeDir(dir) 210 211 var args C.struct_btrfs_ioctl_vol_args 212 for i, c := range []byte(name) { 213 args.name[i] = C.char(c) 214 } 215 216 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SUBVOL_CREATE, 217 uintptr(unsafe.Pointer(&args))) 218 if errno != 0 { 219 return fmt.Errorf("Failed to create btrfs subvolume: %v", errno.Error()) 220 } 221 return nil 222 } 223 224 func subvolSnapshot(src, dest, name string) error { 225 srcDir, err := openDir(src) 226 if err != nil { 227 return err 228 } 229 defer closeDir(srcDir) 230 231 destDir, err := openDir(dest) 232 if err != nil { 233 return err 234 } 235 defer closeDir(destDir) 236 237 var args C.struct_btrfs_ioctl_vol_args_v2 238 args.fd = C.__s64(getDirFd(srcDir)) 239 240 cs := C.CString(name) 241 C.set_name_btrfs_ioctl_vol_args_v2(&args, cs) 242 free(cs) 243 244 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(destDir), C.BTRFS_IOC_SNAP_CREATE_V2, 245 uintptr(unsafe.Pointer(&args))) 246 if errno != 0 { 247 return fmt.Errorf("Failed to create btrfs snapshot: %v", errno.Error()) 248 } 249 return nil 250 } 251 252 func isSubvolume(p string) (bool, error) { 253 var bufStat unix.Stat_t 254 if err := unix.Lstat(p, &bufStat); err != nil { 255 return false, err 256 } 257 258 // return true if it is a btrfs subvolume 259 return bufStat.Ino == C.BTRFS_FIRST_FREE_OBJECTID, nil 260 } 261 262 func subvolDelete(dirpath, name string, quotaEnabled bool) error { 263 dir, err := openDir(dirpath) 264 if err != nil { 265 return err 266 } 267 defer closeDir(dir) 268 fullPath := path.Join(dirpath, name) 269 270 var args C.struct_btrfs_ioctl_vol_args 271 272 // walk the btrfs subvolumes 273 walkSubVolumes := func(p string, f os.DirEntry, err error) error { 274 if err != nil { 275 if os.IsNotExist(err) && p != fullPath { 276 // missing most likely because the path was a subvolume that got removed in the previous iteration 277 // since it's gone anyway, we don't care 278 return nil 279 } 280 return fmt.Errorf("error walking subvolumes: %v", err) 281 } 282 // we want to check children only so skip itself 283 // it will be removed after the filepath walk anyways 284 if f.IsDir() && p != fullPath { 285 sv, err := isSubvolume(p) 286 if err != nil { 287 return fmt.Errorf("Failed to test if %s is a btrfs subvolume: %v", p, err) 288 } 289 if sv { 290 if err := subvolDelete(path.Dir(p), f.Name(), quotaEnabled); err != nil { 291 return fmt.Errorf("Failed to destroy btrfs child subvolume (%s) of parent (%s): %v", p, dirpath, err) 292 } 293 } 294 } 295 return nil 296 } 297 if err := filepath.WalkDir(path.Join(dirpath, name), walkSubVolumes); err != nil { 298 return fmt.Errorf("Recursively walking subvolumes for %s failed: %v", dirpath, err) 299 } 300 301 if quotaEnabled { 302 if qgroupid, err := subvolLookupQgroup(fullPath); err == nil { 303 var args C.struct_btrfs_ioctl_qgroup_create_args 304 args.qgroupid = C.__u64(qgroupid) 305 306 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_CREATE, 307 uintptr(unsafe.Pointer(&args))) 308 if errno != 0 { 309 log.G(context.TODO()).WithField("storage-driver", "btrfs").Errorf("Failed to delete btrfs qgroup %v for %s: %v", qgroupid, fullPath, errno.Error()) 310 } 311 } else { 312 log.G(context.TODO()).WithField("storage-driver", "btrfs").Errorf("Failed to lookup btrfs qgroup for %s: %v", fullPath, err.Error()) 313 } 314 } 315 316 // all subvolumes have been removed 317 // now remove the one originally passed in 318 for i, c := range []byte(name) { 319 args.name[i] = C.char(c) 320 } 321 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SNAP_DESTROY, 322 uintptr(unsafe.Pointer(&args))) 323 if errno != 0 { 324 return fmt.Errorf("Failed to destroy btrfs snapshot %s for %s: %v", dirpath, name, errno.Error()) 325 } 326 return nil 327 } 328 329 func (d *Driver) updateQuotaStatus() { 330 d.once.Do(func() { 331 if !d.quotaEnabled { 332 // In case quotaEnabled is not set, check qgroup and update quotaEnabled as needed 333 if err := qgroupStatus(d.home); err != nil { 334 // quota is still not enabled 335 return 336 } 337 d.quotaEnabled = true 338 } 339 }) 340 } 341 342 func (d *Driver) enableQuota() error { 343 d.updateQuotaStatus() 344 345 if d.quotaEnabled { 346 return nil 347 } 348 349 dir, err := openDir(d.home) 350 if err != nil { 351 return err 352 } 353 defer closeDir(dir) 354 355 var args C.struct_btrfs_ioctl_quota_ctl_args 356 args.cmd = C.BTRFS_QUOTA_CTL_ENABLE 357 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL, 358 uintptr(unsafe.Pointer(&args))) 359 if errno != 0 { 360 return fmt.Errorf("Failed to enable btrfs quota for %s: %v", dir, errno.Error()) 361 } 362 363 d.quotaEnabled = true 364 365 return nil 366 } 367 368 func (d *Driver) subvolRescanQuota() error { 369 d.updateQuotaStatus() 370 371 if !d.quotaEnabled { 372 return nil 373 } 374 375 dir, err := openDir(d.home) 376 if err != nil { 377 return err 378 } 379 defer closeDir(dir) 380 381 var args C.struct_btrfs_ioctl_quota_rescan_args 382 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_RESCAN_WAIT, 383 uintptr(unsafe.Pointer(&args))) 384 if errno != 0 { 385 return fmt.Errorf("Failed to rescan btrfs quota for %s: %v", dir, errno.Error()) 386 } 387 388 return nil 389 } 390 391 func subvolLimitQgroup(path string, size uint64) error { 392 dir, err := openDir(path) 393 if err != nil { 394 return err 395 } 396 defer closeDir(dir) 397 398 var args C.struct_btrfs_ioctl_qgroup_limit_args 399 args.lim.max_rfer = C.__u64(size) 400 args.lim.flags = C.BTRFS_QGROUP_LIMIT_MAX_RFER 401 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_LIMIT, 402 uintptr(unsafe.Pointer(&args))) 403 if errno != 0 { 404 return fmt.Errorf("Failed to limit qgroup for %s: %v", dir, errno.Error()) 405 } 406 407 return nil 408 } 409 410 // qgroupStatus performs a BTRFS_IOC_TREE_SEARCH on the root path 411 // with search key of BTRFS_QGROUP_STATUS_KEY. 412 // In case qgroup is enabled, the retuned key type will match BTRFS_QGROUP_STATUS_KEY. 413 // For more details please see https://github.com/kdave/btrfs-progs/blob/v4.9/qgroup.c#L1035 414 func qgroupStatus(path string) error { 415 dir, err := openDir(path) 416 if err != nil { 417 return err 418 } 419 defer closeDir(dir) 420 421 var args C.struct_btrfs_ioctl_search_args 422 args.key.tree_id = C.BTRFS_QUOTA_TREE_OBJECTID 423 args.key.min_type = C.BTRFS_QGROUP_STATUS_KEY 424 args.key.max_type = C.BTRFS_QGROUP_STATUS_KEY 425 args.key.max_objectid = C.__u64(math.MaxUint64) 426 args.key.max_offset = C.__u64(math.MaxUint64) 427 args.key.max_transid = C.__u64(math.MaxUint64) 428 args.key.nr_items = 4096 429 430 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_TREE_SEARCH, 431 uintptr(unsafe.Pointer(&args))) 432 if errno != 0 { 433 return fmt.Errorf("Failed to search qgroup for %s: %v", path, errno.Error()) 434 } 435 sh := (*C.struct_btrfs_ioctl_search_header)(unsafe.Pointer(&args.buf)) 436 if sh._type != C.BTRFS_QGROUP_STATUS_KEY { 437 return fmt.Errorf("Invalid qgroup search header type for %s: %v", path, sh._type) 438 } 439 return nil 440 } 441 442 func subvolLookupQgroup(path string) (uint64, error) { 443 dir, err := openDir(path) 444 if err != nil { 445 return 0, err 446 } 447 defer closeDir(dir) 448 449 var args C.struct_btrfs_ioctl_ino_lookup_args 450 args.objectid = C.BTRFS_FIRST_FREE_OBJECTID 451 452 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_INO_LOOKUP, 453 uintptr(unsafe.Pointer(&args))) 454 if errno != 0 { 455 return 0, fmt.Errorf("Failed to lookup qgroup for %s: %v", dir, errno.Error()) 456 } 457 if args.treeid == 0 { 458 return 0, fmt.Errorf("Invalid qgroup id for %s: 0", dir) 459 } 460 461 return uint64(args.treeid), nil 462 } 463 464 func (d *Driver) subvolumesDir() string { 465 return path.Join(d.home, "subvolumes") 466 } 467 468 func (d *Driver) subvolumesDirID(id string) string { 469 return path.Join(d.subvolumesDir(), id) 470 } 471 472 func (d *Driver) quotasDir() string { 473 return path.Join(d.home, "quotas") 474 } 475 476 func (d *Driver) quotasDirID(id string) string { 477 return path.Join(d.quotasDir(), id) 478 } 479 480 // CreateReadWrite creates a layer that is writable for use as a container 481 // file system. 482 func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error { 483 return d.Create(id, parent, opts) 484 } 485 486 // Create the filesystem with given id. 487 func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error { 488 quotas := path.Join(d.home, "quotas") 489 subvolumes := path.Join(d.home, "subvolumes") 490 root := d.idMap.RootPair() 491 492 currentID := idtools.CurrentIdentity() 493 dirID := idtools.Identity{ 494 UID: currentID.UID, 495 GID: root.GID, 496 } 497 498 if err := idtools.MkdirAllAndChown(subvolumes, 0o710, dirID); err != nil { 499 return err 500 } 501 if parent == "" { 502 if err := subvolCreate(subvolumes, id); err != nil { 503 return err 504 } 505 } else { 506 parentDir := d.subvolumesDirID(parent) 507 st, err := os.Stat(parentDir) 508 if err != nil { 509 return err 510 } 511 if !st.IsDir() { 512 return fmt.Errorf("%s: not a directory", parentDir) 513 } 514 if err := subvolSnapshot(parentDir, subvolumes, id); err != nil { 515 return err 516 } 517 } 518 519 var storageOpt map[string]string 520 if opts != nil { 521 storageOpt = opts.StorageOpt 522 } 523 524 if _, ok := storageOpt["size"]; ok { 525 driver := &Driver{} 526 if err := d.parseStorageOpt(storageOpt, driver); err != nil { 527 return err 528 } 529 530 if err := d.setStorageSize(path.Join(subvolumes, id), driver); err != nil { 531 return err 532 } 533 if err := idtools.MkdirAllAndChown(quotas, 0o700, idtools.CurrentIdentity()); err != nil { 534 return err 535 } 536 if err := os.WriteFile(path.Join(quotas, id), []byte(fmt.Sprint(driver.options.size)), 0o644); err != nil { 537 return err 538 } 539 } 540 541 // if we have a remapped root (user namespaces enabled), change the created snapshot 542 // dir ownership to match 543 if root.UID != 0 || root.GID != 0 { 544 if err := root.Chown(path.Join(subvolumes, id)); err != nil { 545 return err 546 } 547 } 548 549 mountLabel := "" 550 if opts != nil { 551 mountLabel = opts.MountLabel 552 } 553 554 return label.Relabel(path.Join(subvolumes, id), mountLabel, false) 555 } 556 557 // Parse btrfs storage options 558 func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error { 559 // Read size to change the subvolume disk quota per container 560 for key, val := range storageOpt { 561 key := strings.ToLower(key) 562 switch key { 563 case "size": 564 size, err := units.RAMInBytes(val) 565 if err != nil { 566 return err 567 } 568 driver.options.size = uint64(size) 569 default: 570 return fmt.Errorf("Unknown option %s", key) 571 } 572 } 573 574 return nil 575 } 576 577 // Set btrfs storage size 578 func (d *Driver) setStorageSize(dir string, driver *Driver) error { 579 if driver.options.size == 0 { 580 return fmt.Errorf("btrfs: invalid storage size: %s", units.HumanSize(float64(driver.options.size))) 581 } 582 if d.options.minSpace > 0 && driver.options.size < d.options.minSpace { 583 return fmt.Errorf("btrfs: storage size cannot be less than %s", units.HumanSize(float64(d.options.minSpace))) 584 } 585 if err := d.enableQuota(); err != nil { 586 return err 587 } 588 return subvolLimitQgroup(dir, driver.options.size) 589 } 590 591 // Remove the filesystem with given id. 592 func (d *Driver) Remove(id string) error { 593 dir := d.subvolumesDirID(id) 594 if _, err := os.Stat(dir); err != nil { 595 return err 596 } 597 quotasDir := d.quotasDirID(id) 598 if _, err := os.Stat(quotasDir); err == nil { 599 if err := os.Remove(quotasDir); err != nil { 600 return err 601 } 602 } else if !os.IsNotExist(err) { 603 return err 604 } 605 606 // Call updateQuotaStatus() to invoke status update 607 d.updateQuotaStatus() 608 609 if err := subvolDelete(d.subvolumesDir(), id, d.quotaEnabled); err != nil { 610 if d.quotaEnabled { 611 // use strings.Contains() rather than errors.Is(), because subvolDelete() does not use %w yet 612 if userns.RunningInUserNS() && strings.Contains(err.Error(), "operation not permitted") { 613 err = errors.Wrap(err, `failed to delete subvolume without root (hint: remount btrfs on "user_subvol_rm_allowed" option, or update the kernel to >= 4.18, or change the storage driver to "fuse-overlayfs")`) 614 } 615 return err 616 } 617 // If quota is not enabled, fallback to rmdir syscall to delete subvolumes. 618 // This would allow unprivileged user to delete their owned subvolumes 619 // in kernel >= 4.18 without user_subvol_rm_allowed mount option. 620 // 621 // From https://github.com/containers/storage/pull/508/commits/831e32b6bdcb530acc4c1cb9059d3c6dba14208c 622 } 623 if err := containerfs.EnsureRemoveAll(dir); err != nil { 624 return err 625 } 626 return d.subvolRescanQuota() 627 } 628 629 // Get the requested filesystem id. 630 func (d *Driver) Get(id, mountLabel string) (string, error) { 631 dir := d.subvolumesDirID(id) 632 st, err := os.Stat(dir) 633 if err != nil { 634 return "", err 635 } 636 637 if !st.IsDir() { 638 return "", fmt.Errorf("%s: not a directory", dir) 639 } 640 641 if quota, err := os.ReadFile(d.quotasDirID(id)); err == nil { 642 if size, err := strconv.ParseUint(string(quota), 10, 64); err == nil && size >= d.options.minSpace { 643 if err := d.enableQuota(); err != nil { 644 return "", err 645 } 646 if err := subvolLimitQgroup(dir, size); err != nil { 647 return "", err 648 } 649 } 650 } 651 652 return dir, nil 653 } 654 655 // Put is not implemented for BTRFS as there is no cleanup required for the id. 656 func (d *Driver) Put(id string) error { 657 // Get() creates no runtime resources (like e.g. mounts) 658 // so this doesn't need to do anything. 659 return nil 660 } 661 662 // Exists checks if the id exists in the filesystem. 663 func (d *Driver) Exists(id string) bool { 664 dir := d.subvolumesDirID(id) 665 _, err := os.Stat(dir) 666 return err == nil 667 }