github.com/docker/engine@v22.0.0-20211208180946-d456264580cf+incompatible/daemon/graphdriver/btrfs/btrfs.go (about) 1 //go:build linux 2 // +build linux 3 4 package btrfs // import "github.com/docker/docker/daemon/graphdriver/btrfs" 5 6 /* 7 #include <stdlib.h> 8 #include <dirent.h> 9 #include <btrfs/ioctl.h> 10 #include <btrfs/ctree.h> 11 12 static void set_name_btrfs_ioctl_vol_args_v2(struct btrfs_ioctl_vol_args_v2* btrfs_struct, const char* value) { 13 snprintf(btrfs_struct->name, BTRFS_SUBVOL_NAME_MAX, "%s", value); 14 } 15 */ 16 import "C" 17 18 import ( 19 "fmt" 20 "math" 21 "os" 22 "path" 23 "path/filepath" 24 "strconv" 25 "strings" 26 "sync" 27 "unsafe" 28 29 "github.com/containerd/containerd/pkg/userns" 30 "github.com/docker/docker/daemon/graphdriver" 31 "github.com/docker/docker/pkg/containerfs" 32 "github.com/docker/docker/pkg/idtools" 33 "github.com/docker/docker/pkg/parsers" 34 "github.com/docker/docker/pkg/system" 35 units "github.com/docker/go-units" 36 "github.com/moby/sys/mount" 37 "github.com/opencontainers/selinux/go-selinux/label" 38 "github.com/pkg/errors" 39 "github.com/sirupsen/logrus" 40 "golang.org/x/sys/unix" 41 ) 42 43 func init() { 44 graphdriver.Register("btrfs", Init) 45 } 46 47 type btrfsOptions struct { 48 minSpace uint64 49 size uint64 50 } 51 52 // Init returns a new BTRFS driver. 53 // An error is returned if BTRFS is not supported. 54 func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) { 55 56 // Perform feature detection on /var/lib/docker/btrfs if it's an existing directory. 57 // This covers situations where /var/lib/docker/btrfs is a mount, and on a different 58 // filesystem than /var/lib/docker. 59 // If the path does not exist, fall back to using /var/lib/docker for feature detection. 60 testdir := home 61 if _, err := os.Stat(testdir); os.IsNotExist(err) { 62 testdir = filepath.Dir(testdir) 63 } 64 65 fsMagic, err := graphdriver.GetFSMagic(testdir) 66 if err != nil { 67 return nil, err 68 } 69 70 if fsMagic != graphdriver.FsMagicBtrfs { 71 return nil, graphdriver.ErrPrerequisites 72 } 73 74 remappedRoot := idtools.NewIDMappingsFromMaps(uidMaps, gidMaps) 75 currentID := idtools.CurrentIdentity() 76 dirID := idtools.Identity{ 77 UID: currentID.UID, 78 GID: remappedRoot.RootPair().GID, 79 } 80 81 if err := idtools.MkdirAllAndChown(home, 0710, dirID); err != nil { 82 return nil, err 83 } 84 85 opt, userDiskQuota, err := parseOptions(options) 86 if err != nil { 87 return nil, err 88 } 89 90 // For some reason shared mount propagation between a container 91 // and the host does not work for btrfs, and a remedy is to bind 92 // mount graphdriver home to itself (even without changing the 93 // propagation mode). 94 err = mount.MakeMount(home) 95 if err != nil { 96 return nil, errors.Wrapf(err, "failed to make %s a mount", home) 97 } 98 99 driver := &Driver{ 100 home: home, 101 uidMaps: uidMaps, 102 gidMaps: gidMaps, 103 options: opt, 104 } 105 106 if userDiskQuota { 107 if err := driver.enableQuota(); err != nil { 108 return nil, err 109 } 110 } 111 112 return graphdriver.NewNaiveDiffDriver(driver, uidMaps, gidMaps), nil 113 } 114 115 func parseOptions(opt []string) (btrfsOptions, bool, error) { 116 var options btrfsOptions 117 userDiskQuota := false 118 for _, option := range opt { 119 key, val, err := parsers.ParseKeyValueOpt(option) 120 if err != nil { 121 return options, userDiskQuota, err 122 } 123 key = strings.ToLower(key) 124 switch key { 125 case "btrfs.min_space": 126 minSpace, err := units.RAMInBytes(val) 127 if err != nil { 128 return options, userDiskQuota, err 129 } 130 userDiskQuota = true 131 options.minSpace = uint64(minSpace) 132 default: 133 return options, userDiskQuota, fmt.Errorf("Unknown option %s", key) 134 } 135 } 136 return options, userDiskQuota, nil 137 } 138 139 // Driver contains information about the filesystem mounted. 140 type Driver struct { 141 // root of the file system 142 home string 143 uidMaps []idtools.IDMap 144 gidMaps []idtools.IDMap 145 options btrfsOptions 146 quotaEnabled bool 147 once sync.Once 148 } 149 150 // String prints the name of the driver (btrfs). 151 func (d *Driver) String() string { 152 return "btrfs" 153 } 154 155 // Status returns current driver information in a two dimensional string array. 156 // Output contains "Build Version" and "Library Version" of the btrfs libraries used. 157 // Version information can be used to check compatibility with your kernel. 158 func (d *Driver) Status() [][2]string { 159 status := [][2]string{} 160 if bv := btrfsBuildVersion(); bv != "-" { 161 status = append(status, [2]string{"Build Version", bv}) 162 } 163 if lv := btrfsLibVersion(); lv != -1 { 164 status = append(status, [2]string{"Library Version", fmt.Sprintf("%d", lv)}) 165 } 166 return status 167 } 168 169 // GetMetadata returns empty metadata for this driver. 170 func (d *Driver) GetMetadata(id string) (map[string]string, error) { 171 return nil, nil 172 } 173 174 // Cleanup unmounts the home directory. 175 func (d *Driver) Cleanup() error { 176 if err := mount.Unmount(d.home); err != nil { 177 return err 178 } 179 180 return nil 181 } 182 183 func free(p *C.char) { 184 C.free(unsafe.Pointer(p)) 185 } 186 187 func openDir(path string) (*C.DIR, error) { 188 Cpath := C.CString(path) 189 defer free(Cpath) 190 191 dir := C.opendir(Cpath) 192 if dir == nil { 193 return nil, fmt.Errorf("Can't open dir") 194 } 195 return dir, nil 196 } 197 198 func closeDir(dir *C.DIR) { 199 if dir != nil { 200 C.closedir(dir) 201 } 202 } 203 204 func getDirFd(dir *C.DIR) uintptr { 205 return uintptr(C.dirfd(dir)) 206 } 207 208 func subvolCreate(path, name string) error { 209 dir, err := openDir(path) 210 if err != nil { 211 return err 212 } 213 defer closeDir(dir) 214 215 var args C.struct_btrfs_ioctl_vol_args 216 for i, c := range []byte(name) { 217 args.name[i] = C.char(c) 218 } 219 220 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SUBVOL_CREATE, 221 uintptr(unsafe.Pointer(&args))) 222 if errno != 0 { 223 return fmt.Errorf("Failed to create btrfs subvolume: %v", errno.Error()) 224 } 225 return nil 226 } 227 228 func subvolSnapshot(src, dest, name string) error { 229 srcDir, err := openDir(src) 230 if err != nil { 231 return err 232 } 233 defer closeDir(srcDir) 234 235 destDir, err := openDir(dest) 236 if err != nil { 237 return err 238 } 239 defer closeDir(destDir) 240 241 var args C.struct_btrfs_ioctl_vol_args_v2 242 args.fd = C.__s64(getDirFd(srcDir)) 243 244 var cs = C.CString(name) 245 C.set_name_btrfs_ioctl_vol_args_v2(&args, cs) 246 C.free(unsafe.Pointer(cs)) 247 248 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(destDir), C.BTRFS_IOC_SNAP_CREATE_V2, 249 uintptr(unsafe.Pointer(&args))) 250 if errno != 0 { 251 return fmt.Errorf("Failed to create btrfs snapshot: %v", errno.Error()) 252 } 253 return nil 254 } 255 256 func isSubvolume(p string) (bool, error) { 257 var bufStat unix.Stat_t 258 if err := unix.Lstat(p, &bufStat); err != nil { 259 return false, err 260 } 261 262 // return true if it is a btrfs subvolume 263 return bufStat.Ino == C.BTRFS_FIRST_FREE_OBJECTID, nil 264 } 265 266 func subvolDelete(dirpath, name string, quotaEnabled bool) error { 267 dir, err := openDir(dirpath) 268 if err != nil { 269 return err 270 } 271 defer closeDir(dir) 272 fullPath := path.Join(dirpath, name) 273 274 var args C.struct_btrfs_ioctl_vol_args 275 276 // walk the btrfs subvolumes 277 walkSubvolumes := func(p string, f os.FileInfo, err error) error { 278 if err != nil { 279 if os.IsNotExist(err) && p != fullPath { 280 // missing most likely because the path was a subvolume that got removed in the previous iteration 281 // since it's gone anyway, we don't care 282 return nil 283 } 284 return fmt.Errorf("error walking subvolumes: %v", err) 285 } 286 // we want to check children only so skip itself 287 // it will be removed after the filepath walk anyways 288 if f.IsDir() && p != fullPath { 289 sv, err := isSubvolume(p) 290 if err != nil { 291 return fmt.Errorf("Failed to test if %s is a btrfs subvolume: %v", p, err) 292 } 293 if sv { 294 if err := subvolDelete(path.Dir(p), f.Name(), quotaEnabled); err != nil { 295 return fmt.Errorf("Failed to destroy btrfs child subvolume (%s) of parent (%s): %v", p, dirpath, err) 296 } 297 } 298 } 299 return nil 300 } 301 if err := filepath.Walk(path.Join(dirpath, name), walkSubvolumes); err != nil { 302 return fmt.Errorf("Recursively walking subvolumes for %s failed: %v", dirpath, err) 303 } 304 305 if quotaEnabled { 306 if qgroupid, err := subvolLookupQgroup(fullPath); err == nil { 307 var args C.struct_btrfs_ioctl_qgroup_create_args 308 args.qgroupid = C.__u64(qgroupid) 309 310 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_CREATE, 311 uintptr(unsafe.Pointer(&args))) 312 if errno != 0 { 313 logrus.WithField("storage-driver", "btrfs").Errorf("Failed to delete btrfs qgroup %v for %s: %v", qgroupid, fullPath, errno.Error()) 314 } 315 } else { 316 logrus.WithField("storage-driver", "btrfs").Errorf("Failed to lookup btrfs qgroup for %s: %v", fullPath, err.Error()) 317 } 318 } 319 320 // all subvolumes have been removed 321 // now remove the one originally passed in 322 for i, c := range []byte(name) { 323 args.name[i] = C.char(c) 324 } 325 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SNAP_DESTROY, 326 uintptr(unsafe.Pointer(&args))) 327 if errno != 0 { 328 return fmt.Errorf("Failed to destroy btrfs snapshot %s for %s: %v", dirpath, name, errno.Error()) 329 } 330 return nil 331 } 332 333 func (d *Driver) updateQuotaStatus() { 334 d.once.Do(func() { 335 if !d.quotaEnabled { 336 // In case quotaEnabled is not set, check qgroup and update quotaEnabled as needed 337 if err := qgroupStatus(d.home); err != nil { 338 // quota is still not enabled 339 return 340 } 341 d.quotaEnabled = true 342 } 343 }) 344 } 345 346 func (d *Driver) enableQuota() error { 347 d.updateQuotaStatus() 348 349 if d.quotaEnabled { 350 return nil 351 } 352 353 dir, err := openDir(d.home) 354 if err != nil { 355 return err 356 } 357 defer closeDir(dir) 358 359 var args C.struct_btrfs_ioctl_quota_ctl_args 360 args.cmd = C.BTRFS_QUOTA_CTL_ENABLE 361 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL, 362 uintptr(unsafe.Pointer(&args))) 363 if errno != 0 { 364 return fmt.Errorf("Failed to enable btrfs quota for %s: %v", dir, errno.Error()) 365 } 366 367 d.quotaEnabled = true 368 369 return nil 370 } 371 372 func (d *Driver) subvolRescanQuota() error { 373 d.updateQuotaStatus() 374 375 if !d.quotaEnabled { 376 return nil 377 } 378 379 dir, err := openDir(d.home) 380 if err != nil { 381 return err 382 } 383 defer closeDir(dir) 384 385 var args C.struct_btrfs_ioctl_quota_rescan_args 386 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_RESCAN_WAIT, 387 uintptr(unsafe.Pointer(&args))) 388 if errno != 0 { 389 return fmt.Errorf("Failed to rescan btrfs quota for %s: %v", dir, errno.Error()) 390 } 391 392 return nil 393 } 394 395 func subvolLimitQgroup(path string, size uint64) error { 396 dir, err := openDir(path) 397 if err != nil { 398 return err 399 } 400 defer closeDir(dir) 401 402 var args C.struct_btrfs_ioctl_qgroup_limit_args 403 args.lim.max_referenced = C.__u64(size) 404 args.lim.flags = C.BTRFS_QGROUP_LIMIT_MAX_RFER 405 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_LIMIT, 406 uintptr(unsafe.Pointer(&args))) 407 if errno != 0 { 408 return fmt.Errorf("Failed to limit qgroup for %s: %v", dir, errno.Error()) 409 } 410 411 return nil 412 } 413 414 // qgroupStatus performs a BTRFS_IOC_TREE_SEARCH on the root path 415 // with search key of BTRFS_QGROUP_STATUS_KEY. 416 // In case qgroup is enabled, the retuned key type will match BTRFS_QGROUP_STATUS_KEY. 417 // For more details please see https://github.com/kdave/btrfs-progs/blob/v4.9/qgroup.c#L1035 418 func qgroupStatus(path string) error { 419 dir, err := openDir(path) 420 if err != nil { 421 return err 422 } 423 defer closeDir(dir) 424 425 var args C.struct_btrfs_ioctl_search_args 426 args.key.tree_id = C.BTRFS_QUOTA_TREE_OBJECTID 427 args.key.min_type = C.BTRFS_QGROUP_STATUS_KEY 428 args.key.max_type = C.BTRFS_QGROUP_STATUS_KEY 429 args.key.max_objectid = C.__u64(math.MaxUint64) 430 args.key.max_offset = C.__u64(math.MaxUint64) 431 args.key.max_transid = C.__u64(math.MaxUint64) 432 args.key.nr_items = 4096 433 434 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_TREE_SEARCH, 435 uintptr(unsafe.Pointer(&args))) 436 if errno != 0 { 437 return fmt.Errorf("Failed to search qgroup for %s: %v", path, errno.Error()) 438 } 439 sh := (*C.struct_btrfs_ioctl_search_header)(unsafe.Pointer(&args.buf)) 440 if sh._type != C.BTRFS_QGROUP_STATUS_KEY { 441 return fmt.Errorf("Invalid qgroup search header type for %s: %v", path, sh._type) 442 } 443 return nil 444 } 445 446 func subvolLookupQgroup(path string) (uint64, error) { 447 dir, err := openDir(path) 448 if err != nil { 449 return 0, err 450 } 451 defer closeDir(dir) 452 453 var args C.struct_btrfs_ioctl_ino_lookup_args 454 args.objectid = C.BTRFS_FIRST_FREE_OBJECTID 455 456 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_INO_LOOKUP, 457 uintptr(unsafe.Pointer(&args))) 458 if errno != 0 { 459 return 0, fmt.Errorf("Failed to lookup qgroup for %s: %v", dir, errno.Error()) 460 } 461 if args.treeid == 0 { 462 return 0, fmt.Errorf("Invalid qgroup id for %s: 0", dir) 463 } 464 465 return uint64(args.treeid), nil 466 } 467 468 func (d *Driver) subvolumesDir() string { 469 return path.Join(d.home, "subvolumes") 470 } 471 472 func (d *Driver) subvolumesDirID(id string) string { 473 return path.Join(d.subvolumesDir(), id) 474 } 475 476 func (d *Driver) quotasDir() string { 477 return path.Join(d.home, "quotas") 478 } 479 480 func (d *Driver) quotasDirID(id string) string { 481 return path.Join(d.quotasDir(), id) 482 } 483 484 // CreateReadWrite creates a layer that is writable for use as a container 485 // file system. 486 func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error { 487 return d.Create(id, parent, opts) 488 } 489 490 // Create the filesystem with given id. 491 func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error { 492 quotas := path.Join(d.home, "quotas") 493 subvolumes := path.Join(d.home, "subvolumes") 494 rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps) 495 if err != nil { 496 return err 497 } 498 499 currentID := idtools.CurrentIdentity() 500 dirID := idtools.Identity{ 501 UID: currentID.UID, 502 GID: rootGID, 503 } 504 505 if err := idtools.MkdirAllAndChown(subvolumes, 0710, dirID); err != nil { 506 return err 507 } 508 if parent == "" { 509 if err := subvolCreate(subvolumes, id); err != nil { 510 return err 511 } 512 } else { 513 parentDir := d.subvolumesDirID(parent) 514 st, err := os.Stat(parentDir) 515 if err != nil { 516 return err 517 } 518 if !st.IsDir() { 519 return fmt.Errorf("%s: not a directory", parentDir) 520 } 521 if err := subvolSnapshot(parentDir, subvolumes, id); err != nil { 522 return err 523 } 524 } 525 526 var storageOpt map[string]string 527 if opts != nil { 528 storageOpt = opts.StorageOpt 529 } 530 531 if _, ok := storageOpt["size"]; ok { 532 driver := &Driver{} 533 if err := d.parseStorageOpt(storageOpt, driver); err != nil { 534 return err 535 } 536 537 if err := d.setStorageSize(path.Join(subvolumes, id), driver); err != nil { 538 return err 539 } 540 if err := idtools.MkdirAllAndChown(quotas, 0700, idtools.CurrentIdentity()); err != nil { 541 return err 542 } 543 if err := os.WriteFile(path.Join(quotas, id), []byte(fmt.Sprint(driver.options.size)), 0644); err != nil { 544 return err 545 } 546 } 547 548 // if we have a remapped root (user namespaces enabled), change the created snapshot 549 // dir ownership to match 550 if rootUID != 0 || rootGID != 0 { 551 if err := os.Chown(path.Join(subvolumes, id), rootUID, rootGID); err != nil { 552 return err 553 } 554 } 555 556 mountLabel := "" 557 if opts != nil { 558 mountLabel = opts.MountLabel 559 } 560 561 return label.Relabel(path.Join(subvolumes, id), mountLabel, false) 562 } 563 564 // Parse btrfs storage options 565 func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error { 566 // Read size to change the subvolume disk quota per container 567 for key, val := range storageOpt { 568 key := strings.ToLower(key) 569 switch key { 570 case "size": 571 size, err := units.RAMInBytes(val) 572 if err != nil { 573 return err 574 } 575 driver.options.size = uint64(size) 576 default: 577 return fmt.Errorf("Unknown option %s", key) 578 } 579 } 580 581 return nil 582 } 583 584 // Set btrfs storage size 585 func (d *Driver) setStorageSize(dir string, driver *Driver) error { 586 if driver.options.size == 0 { 587 return fmt.Errorf("btrfs: invalid storage size: %s", units.HumanSize(float64(driver.options.size))) 588 } 589 if d.options.minSpace > 0 && driver.options.size < d.options.minSpace { 590 return fmt.Errorf("btrfs: storage size cannot be less than %s", units.HumanSize(float64(d.options.minSpace))) 591 } 592 if err := d.enableQuota(); err != nil { 593 return err 594 } 595 return subvolLimitQgroup(dir, driver.options.size) 596 } 597 598 // Remove the filesystem with given id. 599 func (d *Driver) Remove(id string) error { 600 dir := d.subvolumesDirID(id) 601 if _, err := os.Stat(dir); err != nil { 602 return err 603 } 604 quotasDir := d.quotasDirID(id) 605 if _, err := os.Stat(quotasDir); err == nil { 606 if err := os.Remove(quotasDir); err != nil { 607 return err 608 } 609 } else if !os.IsNotExist(err) { 610 return err 611 } 612 613 // Call updateQuotaStatus() to invoke status update 614 d.updateQuotaStatus() 615 616 if err := subvolDelete(d.subvolumesDir(), id, d.quotaEnabled); err != nil { 617 if d.quotaEnabled { 618 // use strings.Contains() rather than errors.Is(), because subvolDelete() does not use %w yet 619 if userns.RunningInUserNS() && strings.Contains(err.Error(), "operation not permitted") { 620 err = errors.Wrap(err, `failed to delete subvolume without root (hint: remount btrfs on "user_subvol_rm_allowed" option, or update the kernel to >= 4.18, or change the storage driver to "fuse-overlayfs")`) 621 } 622 return err 623 } 624 // If quota is not enabled, fallback to rmdir syscall to delete subvolumes. 625 // This would allow unprivileged user to delete their owned subvolumes 626 // in kernel >= 4.18 without user_subvol_rm_allowed mount option. 627 // 628 // From https://github.com/containers/storage/pull/508/commits/831e32b6bdcb530acc4c1cb9059d3c6dba14208c 629 } 630 if err := system.EnsureRemoveAll(dir); err != nil { 631 return err 632 } 633 return d.subvolRescanQuota() 634 } 635 636 // Get the requested filesystem id. 637 func (d *Driver) Get(id, mountLabel string) (containerfs.ContainerFS, error) { 638 dir := d.subvolumesDirID(id) 639 st, err := os.Stat(dir) 640 if err != nil { 641 return nil, err 642 } 643 644 if !st.IsDir() { 645 return nil, fmt.Errorf("%s: not a directory", dir) 646 } 647 648 if quota, err := os.ReadFile(d.quotasDirID(id)); err == nil { 649 if size, err := strconv.ParseUint(string(quota), 10, 64); err == nil && size >= d.options.minSpace { 650 if err := d.enableQuota(); err != nil { 651 return nil, err 652 } 653 if err := subvolLimitQgroup(dir, size); err != nil { 654 return nil, err 655 } 656 } 657 } 658 659 return containerfs.NewLocalContainerFS(dir), nil 660 } 661 662 // Put is not implemented for BTRFS as there is no cleanup required for the id. 663 func (d *Driver) Put(id string) error { 664 // Get() creates no runtime resources (like e.g. mounts) 665 // so this doesn't need to do anything. 666 return nil 667 } 668 669 // Exists checks if the id exists in the filesystem. 670 func (d *Driver) Exists(id string) bool { 671 dir := d.subvolumesDirID(id) 672 _, err := os.Stat(dir) 673 return err == nil 674 }