github.com/jfrazelle/docker@v1.1.2-0.20210712172922-bf78e25fe508/daemon/graphdriver/btrfs/btrfs.go (about) 1 // +build linux 2 3 package btrfs // import "github.com/docker/docker/daemon/graphdriver/btrfs" 4 5 /* 6 #include <stdlib.h> 7 #include <dirent.h> 8 #include <btrfs/ioctl.h> 9 #include <btrfs/ctree.h> 10 11 static void set_name_btrfs_ioctl_vol_args_v2(struct btrfs_ioctl_vol_args_v2* btrfs_struct, const char* value) { 12 snprintf(btrfs_struct->name, BTRFS_SUBVOL_NAME_MAX, "%s", value); 13 } 14 */ 15 import "C" 16 17 import ( 18 "fmt" 19 "io/ioutil" 20 "math" 21 "os" 22 "path" 23 "path/filepath" 24 "strconv" 25 "strings" 26 "sync" 27 "unsafe" 28 29 "github.com/docker/docker/daemon/graphdriver" 30 "github.com/docker/docker/pkg/containerfs" 31 "github.com/docker/docker/pkg/idtools" 32 "github.com/docker/docker/pkg/parsers" 33 "github.com/docker/docker/pkg/system" 34 units "github.com/docker/go-units" 35 "github.com/moby/sys/mount" 36 "github.com/opencontainers/selinux/go-selinux/label" 37 "github.com/pkg/errors" 38 "github.com/sirupsen/logrus" 39 "golang.org/x/sys/unix" 40 ) 41 42 func init() { 43 graphdriver.Register("btrfs", Init) 44 } 45 46 type btrfsOptions struct { 47 minSpace uint64 48 size uint64 49 } 50 51 // Init returns a new BTRFS driver. 52 // An error is returned if BTRFS is not supported. 53 func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) { 54 55 // Perform feature detection on /var/lib/docker/btrfs if it's an existing directory. 56 // This covers situations where /var/lib/docker/btrfs is a mount, and on a different 57 // filesystem than /var/lib/docker. 58 // If the path does not exist, fall back to using /var/lib/docker for feature detection. 59 testdir := home 60 if _, err := os.Stat(testdir); os.IsNotExist(err) { 61 testdir = filepath.Dir(testdir) 62 } 63 64 fsMagic, err := graphdriver.GetFSMagic(testdir) 65 if err != nil { 66 return nil, err 67 } 68 69 if fsMagic != graphdriver.FsMagicBtrfs { 70 return nil, graphdriver.ErrPrerequisites 71 } 72 73 if err := idtools.MkdirAllAndChown(home, 0701, idtools.CurrentIdentity()); err != nil { 74 return nil, err 75 } 76 77 opt, userDiskQuota, err := parseOptions(options) 78 if err != nil { 79 return nil, err 80 } 81 82 // For some reason shared mount propagation between a container 83 // and the host does not work for btrfs, and a remedy is to bind 84 // mount graphdriver home to itself (even without changing the 85 // propagation mode). 86 err = mount.MakeMount(home) 87 if err != nil { 88 return nil, errors.Wrapf(err, "failed to make %s a mount", home) 89 } 90 91 driver := &Driver{ 92 home: home, 93 uidMaps: uidMaps, 94 gidMaps: gidMaps, 95 options: opt, 96 } 97 98 if userDiskQuota { 99 if err := driver.enableQuota(); err != nil { 100 return nil, err 101 } 102 } 103 104 return graphdriver.NewNaiveDiffDriver(driver, uidMaps, gidMaps), nil 105 } 106 107 func parseOptions(opt []string) (btrfsOptions, bool, error) { 108 var options btrfsOptions 109 userDiskQuota := false 110 for _, option := range opt { 111 key, val, err := parsers.ParseKeyValueOpt(option) 112 if err != nil { 113 return options, userDiskQuota, err 114 } 115 key = strings.ToLower(key) 116 switch key { 117 case "btrfs.min_space": 118 minSpace, err := units.RAMInBytes(val) 119 if err != nil { 120 return options, userDiskQuota, err 121 } 122 userDiskQuota = true 123 options.minSpace = uint64(minSpace) 124 default: 125 return options, userDiskQuota, fmt.Errorf("Unknown option %s", key) 126 } 127 } 128 return options, userDiskQuota, nil 129 } 130 131 // Driver contains information about the filesystem mounted. 132 type Driver struct { 133 // root of the file system 134 home string 135 uidMaps []idtools.IDMap 136 gidMaps []idtools.IDMap 137 options btrfsOptions 138 quotaEnabled bool 139 once sync.Once 140 } 141 142 // String prints the name of the driver (btrfs). 143 func (d *Driver) String() string { 144 return "btrfs" 145 } 146 147 // Status returns current driver information in a two dimensional string array. 148 // Output contains "Build Version" and "Library Version" of the btrfs libraries used. 149 // Version information can be used to check compatibility with your kernel. 150 func (d *Driver) Status() [][2]string { 151 status := [][2]string{} 152 if bv := btrfsBuildVersion(); bv != "-" { 153 status = append(status, [2]string{"Build Version", bv}) 154 } 155 if lv := btrfsLibVersion(); lv != -1 { 156 status = append(status, [2]string{"Library Version", fmt.Sprintf("%d", lv)}) 157 } 158 return status 159 } 160 161 // GetMetadata returns empty metadata for this driver. 162 func (d *Driver) GetMetadata(id string) (map[string]string, error) { 163 return nil, nil 164 } 165 166 // Cleanup unmounts the home directory. 167 func (d *Driver) Cleanup() error { 168 if err := mount.Unmount(d.home); err != nil { 169 return err 170 } 171 172 return nil 173 } 174 175 func free(p *C.char) { 176 C.free(unsafe.Pointer(p)) 177 } 178 179 func openDir(path string) (*C.DIR, error) { 180 Cpath := C.CString(path) 181 defer free(Cpath) 182 183 dir := C.opendir(Cpath) 184 if dir == nil { 185 return nil, fmt.Errorf("Can't open dir") 186 } 187 return dir, nil 188 } 189 190 func closeDir(dir *C.DIR) { 191 if dir != nil { 192 C.closedir(dir) 193 } 194 } 195 196 func getDirFd(dir *C.DIR) uintptr { 197 return uintptr(C.dirfd(dir)) 198 } 199 200 func subvolCreate(path, name string) error { 201 dir, err := openDir(path) 202 if err != nil { 203 return err 204 } 205 defer closeDir(dir) 206 207 var args C.struct_btrfs_ioctl_vol_args 208 for i, c := range []byte(name) { 209 args.name[i] = C.char(c) 210 } 211 212 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SUBVOL_CREATE, 213 uintptr(unsafe.Pointer(&args))) 214 if errno != 0 { 215 return fmt.Errorf("Failed to create btrfs subvolume: %v", errno.Error()) 216 } 217 return nil 218 } 219 220 func subvolSnapshot(src, dest, name string) error { 221 srcDir, err := openDir(src) 222 if err != nil { 223 return err 224 } 225 defer closeDir(srcDir) 226 227 destDir, err := openDir(dest) 228 if err != nil { 229 return err 230 } 231 defer closeDir(destDir) 232 233 var args C.struct_btrfs_ioctl_vol_args_v2 234 args.fd = C.__s64(getDirFd(srcDir)) 235 236 var cs = C.CString(name) 237 C.set_name_btrfs_ioctl_vol_args_v2(&args, cs) 238 C.free(unsafe.Pointer(cs)) 239 240 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(destDir), C.BTRFS_IOC_SNAP_CREATE_V2, 241 uintptr(unsafe.Pointer(&args))) 242 if errno != 0 { 243 return fmt.Errorf("Failed to create btrfs snapshot: %v", errno.Error()) 244 } 245 return nil 246 } 247 248 func isSubvolume(p string) (bool, error) { 249 var bufStat unix.Stat_t 250 if err := unix.Lstat(p, &bufStat); err != nil { 251 return false, err 252 } 253 254 // return true if it is a btrfs subvolume 255 return bufStat.Ino == C.BTRFS_FIRST_FREE_OBJECTID, nil 256 } 257 258 func subvolDelete(dirpath, name string, quotaEnabled bool) error { 259 dir, err := openDir(dirpath) 260 if err != nil { 261 return err 262 } 263 defer closeDir(dir) 264 fullPath := path.Join(dirpath, name) 265 266 var args C.struct_btrfs_ioctl_vol_args 267 268 // walk the btrfs subvolumes 269 walkSubvolumes := func(p string, f os.FileInfo, err error) error { 270 if err != nil { 271 if os.IsNotExist(err) && p != fullPath { 272 // missing most likely because the path was a subvolume that got removed in the previous iteration 273 // since it's gone anyway, we don't care 274 return nil 275 } 276 return fmt.Errorf("error walking subvolumes: %v", err) 277 } 278 // we want to check children only so skip itself 279 // it will be removed after the filepath walk anyways 280 if f.IsDir() && p != fullPath { 281 sv, err := isSubvolume(p) 282 if err != nil { 283 return fmt.Errorf("Failed to test if %s is a btrfs subvolume: %v", p, err) 284 } 285 if sv { 286 if err := subvolDelete(path.Dir(p), f.Name(), quotaEnabled); err != nil { 287 return fmt.Errorf("Failed to destroy btrfs child subvolume (%s) of parent (%s): %v", p, dirpath, err) 288 } 289 } 290 } 291 return nil 292 } 293 if err := filepath.Walk(path.Join(dirpath, name), walkSubvolumes); err != nil { 294 return fmt.Errorf("Recursively walking subvolumes for %s failed: %v", dirpath, err) 295 } 296 297 if quotaEnabled { 298 if qgroupid, err := subvolLookupQgroup(fullPath); err == nil { 299 var args C.struct_btrfs_ioctl_qgroup_create_args 300 args.qgroupid = C.__u64(qgroupid) 301 302 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_CREATE, 303 uintptr(unsafe.Pointer(&args))) 304 if errno != 0 { 305 logrus.WithField("storage-driver", "btrfs").Errorf("Failed to delete btrfs qgroup %v for %s: %v", qgroupid, fullPath, errno.Error()) 306 } 307 } else { 308 logrus.WithField("storage-driver", "btrfs").Errorf("Failed to lookup btrfs qgroup for %s: %v", fullPath, err.Error()) 309 } 310 } 311 312 // all subvolumes have been removed 313 // now remove the one originally passed in 314 for i, c := range []byte(name) { 315 args.name[i] = C.char(c) 316 } 317 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SNAP_DESTROY, 318 uintptr(unsafe.Pointer(&args))) 319 if errno != 0 { 320 return fmt.Errorf("Failed to destroy btrfs snapshot %s for %s: %v", dirpath, name, errno.Error()) 321 } 322 return nil 323 } 324 325 func (d *Driver) updateQuotaStatus() { 326 d.once.Do(func() { 327 if !d.quotaEnabled { 328 // In case quotaEnabled is not set, check qgroup and update quotaEnabled as needed 329 if err := qgroupStatus(d.home); err != nil { 330 // quota is still not enabled 331 return 332 } 333 d.quotaEnabled = true 334 } 335 }) 336 } 337 338 func (d *Driver) enableQuota() error { 339 d.updateQuotaStatus() 340 341 if d.quotaEnabled { 342 return nil 343 } 344 345 dir, err := openDir(d.home) 346 if err != nil { 347 return err 348 } 349 defer closeDir(dir) 350 351 var args C.struct_btrfs_ioctl_quota_ctl_args 352 args.cmd = C.BTRFS_QUOTA_CTL_ENABLE 353 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL, 354 uintptr(unsafe.Pointer(&args))) 355 if errno != 0 { 356 return fmt.Errorf("Failed to enable btrfs quota for %s: %v", dir, errno.Error()) 357 } 358 359 d.quotaEnabled = true 360 361 return nil 362 } 363 364 func (d *Driver) subvolRescanQuota() error { 365 d.updateQuotaStatus() 366 367 if !d.quotaEnabled { 368 return nil 369 } 370 371 dir, err := openDir(d.home) 372 if err != nil { 373 return err 374 } 375 defer closeDir(dir) 376 377 var args C.struct_btrfs_ioctl_quota_rescan_args 378 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_RESCAN_WAIT, 379 uintptr(unsafe.Pointer(&args))) 380 if errno != 0 { 381 return fmt.Errorf("Failed to rescan btrfs quota for %s: %v", dir, errno.Error()) 382 } 383 384 return nil 385 } 386 387 func subvolLimitQgroup(path string, size uint64) error { 388 dir, err := openDir(path) 389 if err != nil { 390 return err 391 } 392 defer closeDir(dir) 393 394 var args C.struct_btrfs_ioctl_qgroup_limit_args 395 args.lim.max_referenced = C.__u64(size) 396 args.lim.flags = C.BTRFS_QGROUP_LIMIT_MAX_RFER 397 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_LIMIT, 398 uintptr(unsafe.Pointer(&args))) 399 if errno != 0 { 400 return fmt.Errorf("Failed to limit qgroup for %s: %v", dir, errno.Error()) 401 } 402 403 return nil 404 } 405 406 // qgroupStatus performs a BTRFS_IOC_TREE_SEARCH on the root path 407 // with search key of BTRFS_QGROUP_STATUS_KEY. 408 // In case qgroup is enabled, the retuned key type will match BTRFS_QGROUP_STATUS_KEY. 409 // For more details please see https://github.com/kdave/btrfs-progs/blob/v4.9/qgroup.c#L1035 410 func qgroupStatus(path string) error { 411 dir, err := openDir(path) 412 if err != nil { 413 return err 414 } 415 defer closeDir(dir) 416 417 var args C.struct_btrfs_ioctl_search_args 418 args.key.tree_id = C.BTRFS_QUOTA_TREE_OBJECTID 419 args.key.min_type = C.BTRFS_QGROUP_STATUS_KEY 420 args.key.max_type = C.BTRFS_QGROUP_STATUS_KEY 421 args.key.max_objectid = C.__u64(math.MaxUint64) 422 args.key.max_offset = C.__u64(math.MaxUint64) 423 args.key.max_transid = C.__u64(math.MaxUint64) 424 args.key.nr_items = 4096 425 426 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_TREE_SEARCH, 427 uintptr(unsafe.Pointer(&args))) 428 if errno != 0 { 429 return fmt.Errorf("Failed to search qgroup for %s: %v", path, errno.Error()) 430 } 431 sh := (*C.struct_btrfs_ioctl_search_header)(unsafe.Pointer(&args.buf)) 432 if sh._type != C.BTRFS_QGROUP_STATUS_KEY { 433 return fmt.Errorf("Invalid qgroup search header type for %s: %v", path, sh._type) 434 } 435 return nil 436 } 437 438 func subvolLookupQgroup(path string) (uint64, error) { 439 dir, err := openDir(path) 440 if err != nil { 441 return 0, err 442 } 443 defer closeDir(dir) 444 445 var args C.struct_btrfs_ioctl_ino_lookup_args 446 args.objectid = C.BTRFS_FIRST_FREE_OBJECTID 447 448 _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_INO_LOOKUP, 449 uintptr(unsafe.Pointer(&args))) 450 if errno != 0 { 451 return 0, fmt.Errorf("Failed to lookup qgroup for %s: %v", dir, errno.Error()) 452 } 453 if args.treeid == 0 { 454 return 0, fmt.Errorf("Invalid qgroup id for %s: 0", dir) 455 } 456 457 return uint64(args.treeid), nil 458 } 459 460 func (d *Driver) subvolumesDir() string { 461 return path.Join(d.home, "subvolumes") 462 } 463 464 func (d *Driver) subvolumesDirID(id string) string { 465 return path.Join(d.subvolumesDir(), id) 466 } 467 468 func (d *Driver) quotasDir() string { 469 return path.Join(d.home, "quotas") 470 } 471 472 func (d *Driver) quotasDirID(id string) string { 473 return path.Join(d.quotasDir(), id) 474 } 475 476 // CreateReadWrite creates a layer that is writable for use as a container 477 // file system. 478 func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error { 479 return d.Create(id, parent, opts) 480 } 481 482 // Create the filesystem with given id. 483 func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error { 484 quotas := path.Join(d.home, "quotas") 485 subvolumes := path.Join(d.home, "subvolumes") 486 rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps) 487 if err != nil { 488 return err 489 } 490 if err := idtools.MkdirAllAndChown(subvolumes, 0701, idtools.CurrentIdentity()); err != nil { 491 return err 492 } 493 if parent == "" { 494 if err := subvolCreate(subvolumes, id); err != nil { 495 return err 496 } 497 } else { 498 parentDir := d.subvolumesDirID(parent) 499 st, err := os.Stat(parentDir) 500 if err != nil { 501 return err 502 } 503 if !st.IsDir() { 504 return fmt.Errorf("%s: not a directory", parentDir) 505 } 506 if err := subvolSnapshot(parentDir, subvolumes, id); err != nil { 507 return err 508 } 509 } 510 511 var storageOpt map[string]string 512 if opts != nil { 513 storageOpt = opts.StorageOpt 514 } 515 516 if _, ok := storageOpt["size"]; ok { 517 driver := &Driver{} 518 if err := d.parseStorageOpt(storageOpt, driver); err != nil { 519 return err 520 } 521 522 if err := d.setStorageSize(path.Join(subvolumes, id), driver); err != nil { 523 return err 524 } 525 if err := idtools.MkdirAllAndChown(quotas, 0700, idtools.CurrentIdentity()); err != nil { 526 return err 527 } 528 if err := ioutil.WriteFile(path.Join(quotas, id), []byte(fmt.Sprint(driver.options.size)), 0644); err != nil { 529 return err 530 } 531 } 532 533 // if we have a remapped root (user namespaces enabled), change the created snapshot 534 // dir ownership to match 535 if rootUID != 0 || rootGID != 0 { 536 if err := os.Chown(path.Join(subvolumes, id), rootUID, rootGID); err != nil { 537 return err 538 } 539 } 540 541 mountLabel := "" 542 if opts != nil { 543 mountLabel = opts.MountLabel 544 } 545 546 return label.Relabel(path.Join(subvolumes, id), mountLabel, false) 547 } 548 549 // Parse btrfs storage options 550 func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error { 551 // Read size to change the subvolume disk quota per container 552 for key, val := range storageOpt { 553 key := strings.ToLower(key) 554 switch key { 555 case "size": 556 size, err := units.RAMInBytes(val) 557 if err != nil { 558 return err 559 } 560 driver.options.size = uint64(size) 561 default: 562 return fmt.Errorf("Unknown option %s", key) 563 } 564 } 565 566 return nil 567 } 568 569 // Set btrfs storage size 570 func (d *Driver) setStorageSize(dir string, driver *Driver) error { 571 if driver.options.size == 0 { 572 return fmt.Errorf("btrfs: invalid storage size: %s", units.HumanSize(float64(driver.options.size))) 573 } 574 if d.options.minSpace > 0 && driver.options.size < d.options.minSpace { 575 return fmt.Errorf("btrfs: storage size cannot be less than %s", units.HumanSize(float64(d.options.minSpace))) 576 } 577 if err := d.enableQuota(); err != nil { 578 return err 579 } 580 return subvolLimitQgroup(dir, driver.options.size) 581 } 582 583 // Remove the filesystem with given id. 584 func (d *Driver) Remove(id string) error { 585 dir := d.subvolumesDirID(id) 586 if _, err := os.Stat(dir); err != nil { 587 return err 588 } 589 quotasDir := d.quotasDirID(id) 590 if _, err := os.Stat(quotasDir); err == nil { 591 if err := os.Remove(quotasDir); err != nil { 592 return err 593 } 594 } else if !os.IsNotExist(err) { 595 return err 596 } 597 598 // Call updateQuotaStatus() to invoke status update 599 d.updateQuotaStatus() 600 601 if err := subvolDelete(d.subvolumesDir(), id, d.quotaEnabled); err != nil { 602 if d.quotaEnabled { 603 return err 604 } 605 // If quota is not enabled, fallback to rmdir syscall to delete subvolumes. 606 // This would allow unprivileged user to delete their owned subvolumes 607 // in kernel >= 4.18 without user_subvol_rm_allowed mount option. 608 // 609 // From https://github.com/containers/storage/pull/508/commits/831e32b6bdcb530acc4c1cb9059d3c6dba14208c 610 } 611 if err := system.EnsureRemoveAll(dir); err != nil { 612 return err 613 } 614 return d.subvolRescanQuota() 615 } 616 617 // Get the requested filesystem id. 618 func (d *Driver) Get(id, mountLabel string) (containerfs.ContainerFS, error) { 619 dir := d.subvolumesDirID(id) 620 st, err := os.Stat(dir) 621 if err != nil { 622 return nil, err 623 } 624 625 if !st.IsDir() { 626 return nil, fmt.Errorf("%s: not a directory", dir) 627 } 628 629 if quota, err := ioutil.ReadFile(d.quotasDirID(id)); err == nil { 630 if size, err := strconv.ParseUint(string(quota), 10, 64); err == nil && size >= d.options.minSpace { 631 if err := d.enableQuota(); err != nil { 632 return nil, err 633 } 634 if err := subvolLimitQgroup(dir, size); err != nil { 635 return nil, err 636 } 637 } 638 } 639 640 return containerfs.NewLocalContainerFS(dir), nil 641 } 642 643 // Put is not implemented for BTRFS as there is no cleanup required for the id. 644 func (d *Driver) Put(id string) error { 645 // Get() creates no runtime resources (like e.g. mounts) 646 // so this doesn't need to do anything. 647 return nil 648 } 649 650 // Exists checks if the id exists in the filesystem. 651 func (d *Driver) Exists(id string) bool { 652 dir := d.subvolumesDirID(id) 653 _, err := os.Stat(dir) 654 return err == nil 655 }