github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/engine/daemon/graphdriver/btrfs/btrfs.go (about) 1 //go:build linux 2 // +build linux 3 4 package btrfs // import "github.com/docker/docker/daemon/graphdriver/btrfs" 5 6 /* 7 #include <stdlib.h> 8 #include <dirent.h> 9 10 // keep struct field name compatible with btrfs-progs < 6.1. 11 #define max_referenced max_rfer 12 #include <btrfs/ioctl.h> 13 #include <btrfs/ctree.h> 14 15 static void set_name_btrfs_ioctl_vol_args_v2(struct btrfs_ioctl_vol_args_v2* btrfs_struct, const char* value) { 16 snprintf(btrfs_struct->name, BTRFS_SUBVOL_NAME_MAX, "%s", value); 17 } 18 */ 19 import "C" 20 21 import ( 22 "fmt" 23 "math" 24 "os" 25 "path" 26 "path/filepath" 27 "strconv" 28 "strings" 29 "sync" 30 "unsafe" 31 32 "github.com/docker/docker/daemon/graphdriver" 33 "github.com/docker/docker/pkg/containerfs" 34 "github.com/docker/docker/pkg/idtools" 35 "github.com/docker/docker/pkg/parsers" 36 "github.com/docker/docker/pkg/system" 37 units "github.com/docker/go-units" 38 "github.com/moby/sys/mount" 39 "github.com/opencontainers/selinux/go-selinux/label" 40 "github.com/pkg/errors" 41 "github.com/sirupsen/logrus" 42 "golang.org/x/sys/unix" 43 ) 44 45 func init() { 46 graphdriver.Register("btrfs", Init) 47 } 48 49 type btrfsOptions struct { 50 minSpace uint64 51 size uint64 52 } 53 54 // Init returns a new BTRFS driver. 55 // An error is returned if BTRFS is not supported. 56 func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) { 57 58 // Perform feature detection on /var/lib/docker/btrfs if it's an existing directory. 59 // This covers situations where /var/lib/docker/btrfs is a mount, and on a different 60 // filesystem than /var/lib/docker. 61 // If the path does not exist, fall back to using /var/lib/docker for feature detection. 62 testdir := home 63 if _, err := os.Stat(testdir); os.IsNotExist(err) { 64 testdir = filepath.Dir(testdir) 65 } 66 67 fsMagic, err := graphdriver.GetFSMagic(testdir) 68 if err != nil { 69 return nil, err 70 } 71 72 if fsMagic != graphdriver.FsMagicBtrfs { 73 return nil, graphdriver.ErrPrerequisites 74 } 75 76 remappedRoot := idtools.NewIDMappingsFromMaps(uidMaps, gidMaps) 77 currentID := idtools.CurrentIdentity() 78 dirID := idtools.Identity{ 79 UID: currentID.UID, 80 GID: remappedRoot.RootPair().GID, 81 } 82 83 if err := idtools.MkdirAllAndChown(home, 0710, dirID); err != nil { 84 return nil, err 85 } 86 87 opt, userDiskQuota, err := parseOptions(options) 88 if err != nil { 89 return nil, err 90 } 91 92 // For some reason shared mount propagation between a container 93 // and the host does not work for btrfs, and a remedy is to bind 94 // mount graphdriver home to itself (even without changing the 95 // propagation mode). 96 err = mount.MakeMount(home) 97 if err != nil { 98 return nil, errors.Wrapf(err, "failed to make %s a mount", home) 99 } 100 101 driver := &Driver{ 102 home: home, 103 uidMaps: uidMaps, 104 gidMaps: gidMaps, 105 options: opt, 106 } 107 108 if userDiskQuota { 109 if err := driver.subvolEnableQuota(); err != nil { 110 return nil, err 111 } 112 } 113 114 return graphdriver.NewNaiveDiffDriver(driver, uidMaps, gidMaps), nil 115 } 116 117 func parseOptions(opt []string) (btrfsOptions, bool, error) { 118 var options btrfsOptions 119 userDiskQuota := false 120 for _, option := range opt { 121 key, val, err := parsers.ParseKeyValueOpt(option) 122 if err != nil { 123 return options, userDiskQuota, err 124 } 125 key = strings.ToLower(key) 126 switch key { 127 case "btrfs.min_space": 128 minSpace, err := units.RAMInBytes(val) 129 if err != nil { 130 return options, userDiskQuota, err 131 } 132 userDiskQuota = true 133 options.minSpace = uint64(minSpace) 134 default: 135 return options, userDiskQuota, fmt.Errorf("Unknown option %s", key) 136 } 137 } 138 return options, userDiskQuota, nil 139 } 140 141 // Driver contains information about the filesystem mounted. 142 type Driver struct { 143 // root of the file system 144 home string 145 uidMaps []idtools.IDMap 146 gidMaps []idtools.IDMap 147 options btrfsOptions 148 quotaEnabled bool 149 once sync.Once 150 } 151 152 // String prints the name of the driver (btrfs). 153 func (d *Driver) String() string { 154 return "btrfs" 155 } 156 157 // Status returns current driver information in a two dimensional string array. 158 // Output contains "Build Version" and "Library Version" of the btrfs libraries used. 159 // Version information can be used to check compatibility with your kernel. 160 func (d *Driver) Status() [][2]string { 161 status := [][2]string{} 162 if bv := btrfsBuildVersion(); bv != "-" { 163 status = append(status, [2]string{"Build Version", bv}) 164 } 165 if lv := btrfsLibVersion(); lv != -1 { 166 status = append(status, [2]string{"Library Version", fmt.Sprintf("%d", lv)}) 167 } 168 return status 169 } 170 171 // GetMetadata returns empty metadata for this driver. 172 func (d *Driver) GetMetadata(id string) (map[string]string, error) { 173 return nil, nil 174 } 175 176 // Cleanup unmounts the home directory. 177 func (d *Driver) Cleanup() error { 178 err := d.subvolDisableQuota() 179 umountErr := mount.Unmount(d.home) 180 181 // in case we have two errors, prefer the one from disableQuota() 182 if err != nil { 183 return err 184 } 185 186 if umountErr != nil { 187 return umountErr 188 } 189 190 return nil 191 } 192 193 func free(p *C.char) { 194 C.free(unsafe.Pointer(p)) 195 } 196 197 func openDir(path string) (*C.DIR, error) { 198 Cpath := C.CString(path) 199 defer free(Cpath) 200 201 dir := C.opendir(Cpath) 202 if dir == nil { 203 return nil, fmt.Errorf("Can't open dir") 204 } 205 return dir, nil 206 } 207 208 func closeDir(dir *C.DIR) { 209 if dir != nil { 210 C.closedir(dir) 211 } 212 } 213 214 func getDirFd(dir *C.DIR) uintptr { 215 return uintptr(C.dirfd(dir)) 216 } 217 218 func subvolCreate(path, name string) error { 219 dir, err := openDir(path) 220 if err != nil { 221 return err 222 } 223 defer closeDir(dir) 224 225 var args C.struct_btrfs_ioctl_vol_args 226 for i, c := range []byte(name) { 227 args.name[i] = C.char(c) 228 } 229 230 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SUBVOL_CREATE, 231 uintptr(unsafe.Pointer(&args))) 232 if errno != 0 { 233 return fmt.Errorf("Failed to create btrfs subvolume: %v", errno.Error()) 234 } 235 return nil 236 } 237 238 func subvolSnapshot(src, dest, name string) error { 239 srcDir, err := openDir(src) 240 if err != nil { 241 return err 242 } 243 defer closeDir(srcDir) 244 245 destDir, err := openDir(dest) 246 if err != nil { 247 return err 248 } 249 defer closeDir(destDir) 250 251 var args C.struct_btrfs_ioctl_vol_args_v2 252 args.fd = C.__s64(getDirFd(srcDir)) 253 254 var cs = C.CString(name) 255 C.set_name_btrfs_ioctl_vol_args_v2(&args, cs) 256 C.free(unsafe.Pointer(cs)) 257 258 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(destDir), C.BTRFS_IOC_SNAP_CREATE_V2, 259 uintptr(unsafe.Pointer(&args))) 260 if errno != 0 { 261 return fmt.Errorf("Failed to create btrfs snapshot: %v", errno.Error()) 262 } 263 return nil 264 } 265 266 func isSubvolume(p string) (bool, error) { 267 var bufStat unix.Stat_t 268 if err := unix.Lstat(p, &bufStat); err != nil { 269 return false, err 270 } 271 272 // return true if it is a btrfs subvolume 273 return bufStat.Ino == C.BTRFS_FIRST_FREE_OBJECTID, nil 274 } 275 276 func subvolDelete(dirpath, name string, quotaEnabled bool) error { 277 dir, err := openDir(dirpath) 278 if err != nil { 279 return err 280 } 281 defer closeDir(dir) 282 fullPath := path.Join(dirpath, name) 283 284 var args C.struct_btrfs_ioctl_vol_args 285 286 // walk the btrfs subvolumes 287 walkSubvolumes := func(p string, f os.FileInfo, err error) error { 288 if err != nil { 289 if os.IsNotExist(err) && p != fullPath { 290 // missing most likely because the path was a subvolume that got removed in the previous iteration 291 // since it's gone anyway, we don't care 292 return nil 293 } 294 return fmt.Errorf("error walking subvolumes: %v", err) 295 } 296 // we want to check children only so skip itself 297 // it will be removed after the filepath walk anyways 298 if f.IsDir() && p != fullPath { 299 sv, err := isSubvolume(p) 300 if err != nil { 301 return fmt.Errorf("Failed to test if %s is a btrfs subvolume: %v", p, err) 302 } 303 if sv { 304 if err := subvolDelete(path.Dir(p), f.Name(), quotaEnabled); err != nil { 305 return fmt.Errorf("Failed to destroy btrfs child subvolume (%s) of parent (%s): %v", p, dirpath, err) 306 } 307 } 308 } 309 return nil 310 } 311 if err := filepath.Walk(path.Join(dirpath, name), walkSubvolumes); err != nil { 312 return fmt.Errorf("Recursively walking subvolumes for %s failed: %v", dirpath, err) 313 } 314 315 if quotaEnabled { 316 if qgroupid, err := subvolLookupQgroup(fullPath); err == nil { 317 var args C.struct_btrfs_ioctl_qgroup_create_args 318 args.qgroupid = C.__u64(qgroupid) 319 320 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_CREATE, 321 uintptr(unsafe.Pointer(&args))) 322 if errno != 0 { 323 logrus.WithField("storage-driver", "btrfs").Errorf("Failed to delete btrfs qgroup %v for %s: %v", qgroupid, fullPath, errno.Error()) 324 } 325 } else { 326 logrus.WithField("storage-driver", "btrfs").Errorf("Failed to lookup btrfs qgroup for %s: %v", fullPath, err.Error()) 327 } 328 } 329 330 // all subvolumes have been removed 331 // now remove the one originally passed in 332 for i, c := range []byte(name) { 333 args.name[i] = C.char(c) 334 } 335 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SNAP_DESTROY, 336 uintptr(unsafe.Pointer(&args))) 337 if errno != 0 { 338 return fmt.Errorf("Failed to destroy btrfs snapshot %s for %s: %v", dirpath, name, errno.Error()) 339 } 340 return nil 341 } 342 343 func (d *Driver) updateQuotaStatus() { 344 d.once.Do(func() { 345 if !d.quotaEnabled { 346 // In case quotaEnabled is not set, check qgroup and update quotaEnabled as needed 347 if err := subvolQgroupStatus(d.home); err != nil { 348 // quota is still not enabled 349 return 350 } 351 d.quotaEnabled = true 352 } 353 }) 354 } 355 356 func (d *Driver) subvolEnableQuota() error { 357 d.updateQuotaStatus() 358 359 if d.quotaEnabled { 360 return nil 361 } 362 363 dir, err := openDir(d.home) 364 if err != nil { 365 return err 366 } 367 defer closeDir(dir) 368 369 var args C.struct_btrfs_ioctl_quota_ctl_args 370 args.cmd = C.BTRFS_QUOTA_CTL_ENABLE 371 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL, 372 uintptr(unsafe.Pointer(&args))) 373 if errno != 0 { 374 return fmt.Errorf("Failed to enable btrfs quota for %s: %v", dir, errno.Error()) 375 } 376 377 d.quotaEnabled = true 378 379 return nil 380 } 381 382 func (d *Driver) subvolDisableQuota() error { 383 d.updateQuotaStatus() 384 385 if !d.quotaEnabled { 386 return nil 387 } 388 389 dir, err := openDir(d.home) 390 if err != nil { 391 return err 392 } 393 defer closeDir(dir) 394 395 var args C.struct_btrfs_ioctl_quota_ctl_args 396 args.cmd = C.BTRFS_QUOTA_CTL_DISABLE 397 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL, 398 uintptr(unsafe.Pointer(&args))) 399 if errno != 0 { 400 return fmt.Errorf("Failed to disable btrfs quota for %s: %v", dir, errno.Error()) 401 } 402 403 d.quotaEnabled = false 404 405 return nil 406 } 407 408 func (d *Driver) subvolRescanQuota() error { 409 d.updateQuotaStatus() 410 411 if !d.quotaEnabled { 412 return nil 413 } 414 415 dir, err := openDir(d.home) 416 if err != nil { 417 return err 418 } 419 defer closeDir(dir) 420 421 var args C.struct_btrfs_ioctl_quota_rescan_args 422 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_RESCAN_WAIT, 423 uintptr(unsafe.Pointer(&args))) 424 if errno != 0 { 425 return fmt.Errorf("Failed to rescan btrfs quota for %s: %v", dir, errno.Error()) 426 } 427 428 return nil 429 } 430 431 func subvolLimitQgroup(path string, size uint64) error { 432 dir, err := openDir(path) 433 if err != nil { 434 return err 435 } 436 defer closeDir(dir) 437 438 var args C.struct_btrfs_ioctl_qgroup_limit_args 439 args.lim.max_rfer = C.__u64(size) 440 args.lim.flags = C.BTRFS_QGROUP_LIMIT_MAX_RFER 441 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_LIMIT, 442 uintptr(unsafe.Pointer(&args))) 443 if errno != 0 { 444 return fmt.Errorf("Failed to limit qgroup for %s: %v", dir, errno.Error()) 445 } 446 447 return nil 448 } 449 450 // subvolQgroupStatus performs a BTRFS_IOC_TREE_SEARCH on the root path 451 // with search key of BTRFS_QGROUP_STATUS_KEY. 452 // In case qgroup is enabled, the retuned key type will match BTRFS_QGROUP_STATUS_KEY. 453 // For more details please see https://github.com/kdave/btrfs-progs/blob/v4.9/qgroup.c#L1035 454 func subvolQgroupStatus(path string) error { 455 dir, err := openDir(path) 456 if err != nil { 457 return err 458 } 459 defer closeDir(dir) 460 461 var args C.struct_btrfs_ioctl_search_args 462 args.key.tree_id = C.BTRFS_QUOTA_TREE_OBJECTID 463 args.key.min_type = C.BTRFS_QGROUP_STATUS_KEY 464 args.key.max_type = C.BTRFS_QGROUP_STATUS_KEY 465 args.key.max_objectid = C.__u64(math.MaxUint64) 466 args.key.max_offset = C.__u64(math.MaxUint64) 467 args.key.max_transid = C.__u64(math.MaxUint64) 468 args.key.nr_items = 4096 469 470 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_TREE_SEARCH, 471 uintptr(unsafe.Pointer(&args))) 472 if errno != 0 { 473 return fmt.Errorf("Failed to search qgroup for %s: %v", path, errno.Error()) 474 } 475 sh := (*C.struct_btrfs_ioctl_search_header)(unsafe.Pointer(&args.buf)) 476 if sh._type != C.BTRFS_QGROUP_STATUS_KEY { 477 return fmt.Errorf("Invalid qgroup search header type for %s: %v", path, sh._type) 478 } 479 return nil 480 } 481 482 func subvolLookupQgroup(path string) (uint64, error) { 483 dir, err := openDir(path) 484 if err != nil { 485 return 0, err 486 } 487 defer closeDir(dir) 488 489 var args C.struct_btrfs_ioctl_ino_lookup_args 490 args.objectid = C.BTRFS_FIRST_FREE_OBJECTID 491 492 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_INO_LOOKUP, 493 uintptr(unsafe.Pointer(&args))) 494 if errno != 0 { 495 return 0, fmt.Errorf("Failed to lookup qgroup for %s: %v", dir, errno.Error()) 496 } 497 if args.treeid == 0 { 498 return 0, fmt.Errorf("Invalid qgroup id for %s: 0", dir) 499 } 500 501 return uint64(args.treeid), nil 502 } 503 504 func (d *Driver) subvolumesDir() string { 505 return path.Join(d.home, "subvolumes") 506 } 507 508 func (d *Driver) subvolumesDirID(id string) string { 509 return path.Join(d.subvolumesDir(), id) 510 } 511 512 func (d *Driver) quotasDir() string { 513 return path.Join(d.home, "quotas") 514 } 515 516 func (d *Driver) quotasDirID(id string) string { 517 return path.Join(d.quotasDir(), id) 518 } 519 520 // CreateReadWrite creates a layer that is writable for use as a container 521 // file system. 522 func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error { 523 return d.Create(id, parent, opts) 524 } 525 526 // Create the filesystem with given id. 527 func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error { 528 quotas := path.Join(d.home, "quotas") 529 subvolumes := path.Join(d.home, "subvolumes") 530 rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps) 531 if err != nil { 532 return err 533 } 534 535 currentID := idtools.CurrentIdentity() 536 dirID := idtools.Identity{ 537 UID: currentID.UID, 538 GID: rootGID, 539 } 540 541 if err := idtools.MkdirAllAndChown(subvolumes, 0710, dirID); err != nil { 542 return err 543 } 544 if parent == "" { 545 if err := subvolCreate(subvolumes, id); err != nil { 546 return err 547 } 548 } else { 549 parentDir := d.subvolumesDirID(parent) 550 st, err := os.Stat(parentDir) 551 if err != nil { 552 return err 553 } 554 if !st.IsDir() { 555 return fmt.Errorf("%s: not a directory", parentDir) 556 } 557 if err := subvolSnapshot(parentDir, subvolumes, id); err != nil { 558 return err 559 } 560 } 561 562 var storageOpt map[string]string 563 if opts != nil { 564 storageOpt = opts.StorageOpt 565 } 566 567 if _, ok := storageOpt["size"]; ok { 568 driver := &Driver{} 569 if err := d.parseStorageOpt(storageOpt, driver); err != nil { 570 return err 571 } 572 573 if err := d.setStorageSize(path.Join(subvolumes, id), driver); err != nil { 574 return err 575 } 576 if err := idtools.MkdirAllAndChown(quotas, 0700, idtools.CurrentIdentity()); err != nil { 577 return err 578 } 579 if err := os.WriteFile(path.Join(quotas, id), []byte(fmt.Sprint(driver.options.size)), 0644); err != nil { 580 return err 581 } 582 } 583 584 // if we have a remapped root (user namespaces enabled), change the created snapshot 585 // dir ownership to match 586 if rootUID != 0 || rootGID != 0 { 587 if err := os.Chown(path.Join(subvolumes, id), rootUID, rootGID); err != nil { 588 return err 589 } 590 } 591 592 mountLabel := "" 593 if opts != nil { 594 mountLabel = opts.MountLabel 595 } 596 597 return label.Relabel(path.Join(subvolumes, id), mountLabel, false) 598 } 599 600 // Parse btrfs storage options 601 func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error { 602 // Read size to change the subvolume disk quota per container 603 for key, val := range storageOpt { 604 key := strings.ToLower(key) 605 switch key { 606 case "size": 607 size, err := units.RAMInBytes(val) 608 if err != nil { 609 return err 610 } 611 driver.options.size = uint64(size) 612 default: 613 return fmt.Errorf("Unknown option %s", key) 614 } 615 } 616 617 return nil 618 } 619 620 // Set btrfs storage size 621 func (d *Driver) setStorageSize(dir string, driver *Driver) error { 622 if driver.options.size == 0 { 623 return fmt.Errorf("btrfs: invalid storage size: %s", units.HumanSize(float64(driver.options.size))) 624 } 625 if d.options.minSpace > 0 && driver.options.size < d.options.minSpace { 626 return fmt.Errorf("btrfs: storage size cannot be less than %s", units.HumanSize(float64(d.options.minSpace))) 627 } 628 if err := d.subvolEnableQuota(); err != nil { 629 return err 630 } 631 return subvolLimitQgroup(dir, driver.options.size) 632 } 633 634 // Remove the filesystem with given id. 635 func (d *Driver) Remove(id string) error { 636 dir := d.subvolumesDirID(id) 637 if _, err := os.Stat(dir); err != nil { 638 return err 639 } 640 quotasDir := d.quotasDirID(id) 641 if _, err := os.Stat(quotasDir); err == nil { 642 if err := os.Remove(quotasDir); err != nil { 643 return err 644 } 645 } else if !os.IsNotExist(err) { 646 return err 647 } 648 649 // Call updateQuotaStatus() to invoke status update 650 d.updateQuotaStatus() 651 652 if err := subvolDelete(d.subvolumesDir(), id, d.quotaEnabled); err != nil { 653 if d.quotaEnabled { 654 return err 655 } 656 // If quota is not enabled, fallback to rmdir syscall to delete subvolumes. 657 // This would allow unprivileged user to delete their owned subvolumes 658 // in kernel >= 4.18 without user_subvol_rm_allowed mount option. 659 // 660 // From https://github.com/containers/storage/pull/508/commits/831e32b6bdcb530acc4c1cb9059d3c6dba14208c 661 } 662 if err := system.EnsureRemoveAll(dir); err != nil { 663 return err 664 } 665 return d.subvolRescanQuota() 666 } 667 668 // Get the requested filesystem id. 669 func (d *Driver) Get(id, mountLabel string) (containerfs.ContainerFS, error) { 670 dir := d.subvolumesDirID(id) 671 st, err := os.Stat(dir) 672 if err != nil { 673 return nil, err 674 } 675 676 if !st.IsDir() { 677 return nil, fmt.Errorf("%s: not a directory", dir) 678 } 679 680 if quota, err := os.ReadFile(d.quotasDirID(id)); err == nil { 681 if size, err := strconv.ParseUint(string(quota), 10, 64); err == nil && size >= d.options.minSpace { 682 if err := d.subvolEnableQuota(); err != nil { 683 return nil, err 684 } 685 if err := subvolLimitQgroup(dir, size); err != nil { 686 return nil, err 687 } 688 } 689 } 690 691 return containerfs.NewLocalContainerFS(dir), nil 692 } 693 694 // Put is not implemented for BTRFS as there is no cleanup required for the id. 695 func (d *Driver) Put(id string) error { 696 // Get() creates no runtime resources (like e.g. mounts) 697 // so this doesn't need to do anything. 698 return nil 699 } 700 701 // Exists checks if the id exists in the filesystem. 702 func (d *Driver) Exists(id string) bool { 703 dir := d.subvolumesDirID(id) 704 _, err := os.Stat(dir) 705 return err == nil 706 }