github.com/tonistiigi/docker@v0.10.1-0.20240229224939-974013b0dc6a/quota/projectquota.go (about)

     1  //go:build linux && !exclude_disk_quota && cgo
     2  
     3  //
     4  // projectquota.go - implements XFS project quota controls
     5  // for setting quota limits on a newly created directory.
     6  // It currently supports the legacy XFS specific ioctls.
     7  //
     8  // TODO: use generic quota control ioctl FS_IOC_FS{GET,SET}XATTR
     9  //       for both xfs/ext4 for kernel version >= v4.5
    10  //
    11  
    12  package quota // import "github.com/docker/docker/quota"
    13  
    14  /*
    15  #include <stdlib.h>
    16  #include <dirent.h>
    17  #include <linux/fs.h>
    18  #include <linux/quota.h>
    19  #include <linux/dqblk_xfs.h>
    20  
    21  #ifndef FS_XFLAG_PROJINHERIT
    22  struct fsxattr {
    23  	__u32		fsx_xflags;
    24  	__u32		fsx_extsize;
    25  	__u32		fsx_nextents;
    26  	__u32		fsx_projid;
    27  	unsigned char	fsx_pad[12];
    28  };
    29  #define FS_XFLAG_PROJINHERIT	0x00000200
    30  #endif
    31  #ifndef FS_IOC_FSGETXATTR
    32  #define FS_IOC_FSGETXATTR		_IOR ('X', 31, struct fsxattr)
    33  #endif
    34  #ifndef FS_IOC_FSSETXATTR
    35  #define FS_IOC_FSSETXATTR		_IOW ('X', 32, struct fsxattr)
    36  #endif
    37  
    38  #ifndef PRJQUOTA
    39  #define PRJQUOTA	2
    40  #endif
    41  #ifndef XFS_PROJ_QUOTA
    42  #define XFS_PROJ_QUOTA	2
    43  #endif
    44  #ifndef Q_XSETPQLIM
    45  #define Q_XSETPQLIM QCMD(Q_XSETQLIM, PRJQUOTA)
    46  #endif
    47  #ifndef Q_XGETPQUOTA
    48  #define Q_XGETPQUOTA QCMD(Q_XGETQUOTA, PRJQUOTA)
    49  #endif
    50  
    51  const int Q_XGETQSTAT_PRJQUOTA = QCMD(Q_XGETQSTAT, PRJQUOTA);
    52  */
    53  import "C"
    54  
    55  import (
    56  	"context"
    57  	"os"
    58  	"path"
    59  	"path/filepath"
    60  	"sync"
    61  	"unsafe"
    62  
    63  	"github.com/containerd/containerd/pkg/userns"
    64  	"github.com/containerd/log"
    65  	"github.com/pkg/errors"
    66  	"golang.org/x/sys/unix"
    67  )
    68  
    69  type pquotaState struct {
    70  	sync.Mutex
    71  	nextProjectID uint32
    72  }
    73  
    74  var (
    75  	pquotaStateInst *pquotaState
    76  	pquotaStateOnce sync.Once
    77  )
    78  
    79  // getPquotaState - get global pquota state tracker instance
    80  func getPquotaState() *pquotaState {
    81  	pquotaStateOnce.Do(func() {
    82  		pquotaStateInst = &pquotaState{
    83  			nextProjectID: 1,
    84  		}
    85  	})
    86  	return pquotaStateInst
    87  }
    88  
    89  // registerBasePath - register a new base path and update nextProjectID
    90  func (state *pquotaState) updateMinProjID(minProjectID uint32) {
    91  	state.Lock()
    92  	defer state.Unlock()
    93  	if state.nextProjectID <= minProjectID {
    94  		state.nextProjectID = minProjectID + 1
    95  	}
    96  }
    97  
    98  // NewControl - initialize project quota support.
    99  // Test to make sure that quota can be set on a test dir and find
   100  // the first project id to be used for the next container create.
   101  //
   102  // Returns nil (and error) if project quota is not supported.
   103  //
   104  // First get the project id of the home directory.
   105  // This test will fail if the backing fs is not xfs.
   106  //
   107  // xfs_quota tool can be used to assign a project id to the driver home directory, e.g.:
   108  //
   109  //	echo 999:/var/lib/docker/overlay2 >> /etc/projects
   110  //	echo docker:999 >> /etc/projid
   111  //	xfs_quota -x -c 'project -s docker' /<xfs mount point>
   112  //
   113  // In that case, the home directory project id will be used as a "start offset"
   114  // and all containers will be assigned larger project ids (e.g. >= 1000).
   115  // This is a way to prevent xfs_quota management from conflicting with docker.
   116  //
   117  // Then try to create a test directory with the next project id and set a quota
   118  // on it. If that works, continue to scan existing containers to map allocated
   119  // project ids.
   120  func NewControl(basePath string) (*Control, error) {
   121  	//
   122  	// If we are running in a user namespace quota won't be supported for
   123  	// now since makeBackingFsDev() will try to mknod().
   124  	//
   125  	if userns.RunningInUserNS() {
   126  		return nil, ErrQuotaNotSupported
   127  	}
   128  
   129  	//
   130  	// create backing filesystem device node
   131  	//
   132  	backingFsBlockDev, err := makeBackingFsDev(basePath)
   133  	if err != nil {
   134  		return nil, err
   135  	}
   136  
   137  	// check if we can call quotactl with project quotas
   138  	// as a mechanism to determine (early) if we have support
   139  	hasQuotaSupport, err := hasQuotaSupport(backingFsBlockDev)
   140  	if err != nil {
   141  		return nil, err
   142  	}
   143  	if !hasQuotaSupport {
   144  		return nil, ErrQuotaNotSupported
   145  	}
   146  
   147  	//
   148  	// Get project id of parent dir as minimal id to be used by driver
   149  	//
   150  	baseProjectID, err := getProjectID(basePath)
   151  	if err != nil {
   152  		return nil, err
   153  	}
   154  	minProjectID := baseProjectID + 1
   155  
   156  	//
   157  	// Test if filesystem supports project quotas by trying to set
   158  	// a quota on the first available project id
   159  	//
   160  	quota := Quota{
   161  		Size: 0,
   162  	}
   163  	if err := setProjectQuota(backingFsBlockDev, minProjectID, quota); err != nil {
   164  		return nil, err
   165  	}
   166  
   167  	q := Control{
   168  		backingFsBlockDev: backingFsBlockDev,
   169  		quotas:            make(map[string]uint32),
   170  	}
   171  
   172  	//
   173  	// update minimum project ID
   174  	//
   175  	state := getPquotaState()
   176  	state.updateMinProjID(minProjectID)
   177  
   178  	//
   179  	// get first project id to be used for next container
   180  	//
   181  	err = q.findNextProjectID(basePath, baseProjectID)
   182  	if err != nil {
   183  		return nil, err
   184  	}
   185  
   186  	log.G(context.TODO()).Debugf("NewControl(%s): nextProjectID = %d", basePath, state.nextProjectID)
   187  	return &q, nil
   188  }
   189  
   190  // SetQuota - assign a unique project id to directory and set the quota limits
   191  // for that project id
   192  func (q *Control) SetQuota(targetPath string, quota Quota) error {
   193  	q.RLock()
   194  	projectID, ok := q.quotas[targetPath]
   195  	q.RUnlock()
   196  	if !ok {
   197  		state := getPquotaState()
   198  		state.Lock()
   199  		projectID = state.nextProjectID
   200  
   201  		//
   202  		// assign project id to new container directory
   203  		//
   204  		err := setProjectID(targetPath, projectID)
   205  		if err != nil {
   206  			state.Unlock()
   207  			return err
   208  		}
   209  
   210  		state.nextProjectID++
   211  		state.Unlock()
   212  
   213  		q.Lock()
   214  		q.quotas[targetPath] = projectID
   215  		q.Unlock()
   216  	}
   217  
   218  	//
   219  	// set the quota limit for the container's project id
   220  	//
   221  	log.G(context.TODO()).Debugf("SetQuota(%s, %d): projectID=%d", targetPath, quota.Size, projectID)
   222  	return setProjectQuota(q.backingFsBlockDev, projectID, quota)
   223  }
   224  
   225  // setProjectQuota - set the quota for project id on xfs block device
   226  func setProjectQuota(backingFsBlockDev string, projectID uint32, quota Quota) error {
   227  	var d C.fs_disk_quota_t
   228  	d.d_version = C.FS_DQUOT_VERSION
   229  	d.d_id = C.__u32(projectID)
   230  	d.d_flags = C.XFS_PROJ_QUOTA
   231  
   232  	d.d_fieldmask = C.FS_DQ_BHARD | C.FS_DQ_BSOFT
   233  	d.d_blk_hardlimit = C.__u64(quota.Size / 512)
   234  	d.d_blk_softlimit = d.d_blk_hardlimit
   235  
   236  	cs := C.CString(backingFsBlockDev)
   237  	defer C.free(unsafe.Pointer(cs))
   238  
   239  	_, _, errno := unix.Syscall6(unix.SYS_QUOTACTL, C.Q_XSETPQLIM,
   240  		uintptr(unsafe.Pointer(cs)), uintptr(d.d_id),
   241  		uintptr(unsafe.Pointer(&d)), 0, 0)
   242  	if errno != 0 {
   243  		return errors.Wrapf(errno, "failed to set quota limit for projid %d on %s",
   244  			projectID, backingFsBlockDev)
   245  	}
   246  
   247  	return nil
   248  }
   249  
   250  // GetQuota - get the quota limits of a directory that was configured with SetQuota
   251  func (q *Control) GetQuota(targetPath string, quota *Quota) error {
   252  	q.RLock()
   253  	projectID, ok := q.quotas[targetPath]
   254  	q.RUnlock()
   255  	if !ok {
   256  		return errors.Errorf("quota not found for path: %s", targetPath)
   257  	}
   258  
   259  	//
   260  	// get the quota limit for the container's project id
   261  	//
   262  	var d C.fs_disk_quota_t
   263  
   264  	cs := C.CString(q.backingFsBlockDev)
   265  	defer C.free(unsafe.Pointer(cs))
   266  
   267  	_, _, errno := unix.Syscall6(unix.SYS_QUOTACTL, C.Q_XGETPQUOTA,
   268  		uintptr(unsafe.Pointer(cs)), uintptr(C.__u32(projectID)),
   269  		uintptr(unsafe.Pointer(&d)), 0, 0)
   270  	if errno != 0 {
   271  		return errors.Wrapf(errno, "Failed to get quota limit for projid %d on %s",
   272  			projectID, q.backingFsBlockDev)
   273  	}
   274  	quota.Size = uint64(d.d_blk_hardlimit) * 512
   275  
   276  	return nil
   277  }
   278  
   279  // getProjectID - get the project id of path on xfs
   280  func getProjectID(targetPath string) (uint32, error) {
   281  	dir, err := openDir(targetPath)
   282  	if err != nil {
   283  		return 0, err
   284  	}
   285  	defer closeDir(dir)
   286  
   287  	var fsx C.struct_fsxattr
   288  	_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSGETXATTR,
   289  		uintptr(unsafe.Pointer(&fsx)))
   290  	if errno != 0 {
   291  		return 0, errors.Wrapf(errno, "failed to get projid for %s", targetPath)
   292  	}
   293  
   294  	return uint32(fsx.fsx_projid), nil
   295  }
   296  
   297  // setProjectID - set the project id of path on xfs
   298  func setProjectID(targetPath string, projectID uint32) error {
   299  	dir, err := openDir(targetPath)
   300  	if err != nil {
   301  		return err
   302  	}
   303  	defer closeDir(dir)
   304  
   305  	var fsx C.struct_fsxattr
   306  	_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSGETXATTR,
   307  		uintptr(unsafe.Pointer(&fsx)))
   308  	if errno != 0 {
   309  		return errors.Wrapf(errno, "failed to get projid for %s", targetPath)
   310  	}
   311  	fsx.fsx_projid = C.__u32(projectID)
   312  	fsx.fsx_xflags |= C.FS_XFLAG_PROJINHERIT
   313  	_, _, errno = unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSSETXATTR,
   314  		uintptr(unsafe.Pointer(&fsx)))
   315  	if errno != 0 {
   316  		return errors.Wrapf(errno, "failed to set projid for %s", targetPath)
   317  	}
   318  
   319  	return nil
   320  }
   321  
   322  // findNextProjectID - find the next project id to be used for containers
   323  // by scanning driver home directory to find used project ids
   324  func (q *Control) findNextProjectID(home string, baseID uint32) error {
   325  	state := getPquotaState()
   326  	state.Lock()
   327  	defer state.Unlock()
   328  
   329  	checkProjID := func(path string) (uint32, error) {
   330  		projid, err := getProjectID(path)
   331  		if err != nil {
   332  			return projid, err
   333  		}
   334  		if projid > 0 {
   335  			q.quotas[path] = projid
   336  		}
   337  		if state.nextProjectID <= projid {
   338  			state.nextProjectID = projid + 1
   339  		}
   340  		return projid, nil
   341  	}
   342  
   343  	files, err := os.ReadDir(home)
   344  	if err != nil {
   345  		return errors.Errorf("read directory failed: %s", home)
   346  	}
   347  	for _, file := range files {
   348  		if !file.IsDir() {
   349  			continue
   350  		}
   351  		path := filepath.Join(home, file.Name())
   352  		projid, err := checkProjID(path)
   353  		if err != nil {
   354  			return err
   355  		}
   356  		if projid > 0 && projid != baseID {
   357  			continue
   358  		}
   359  		subfiles, err := os.ReadDir(path)
   360  		if err != nil {
   361  			return errors.Errorf("read directory failed: %s", path)
   362  		}
   363  		for _, subfile := range subfiles {
   364  			if !subfile.IsDir() {
   365  				continue
   366  			}
   367  			subpath := filepath.Join(path, subfile.Name())
   368  			_, err := checkProjID(subpath)
   369  			if err != nil {
   370  				return err
   371  			}
   372  		}
   373  	}
   374  
   375  	return nil
   376  }
   377  
   378  func free(p *C.char) {
   379  	C.free(unsafe.Pointer(p))
   380  }
   381  
   382  func openDir(path string) (*C.DIR, error) {
   383  	Cpath := C.CString(path)
   384  	defer free(Cpath)
   385  
   386  	dir := C.opendir(Cpath)
   387  	if dir == nil {
   388  		return nil, errors.Errorf("failed to open dir: %s", path)
   389  	}
   390  	return dir, nil
   391  }
   392  
   393  func closeDir(dir *C.DIR) {
   394  	if dir != nil {
   395  		C.closedir(dir)
   396  	}
   397  }
   398  
   399  func getDirFd(dir *C.DIR) uintptr {
   400  	return uintptr(C.dirfd(dir))
   401  }
   402  
   403  // makeBackingFsDev gets the backing block device of the driver home directory
   404  // and creates a block device node under the home directory to be used by
   405  // quotactl commands.
   406  func makeBackingFsDev(home string) (string, error) {
   407  	var stat unix.Stat_t
   408  	if err := unix.Stat(home, &stat); err != nil {
   409  		return "", err
   410  	}
   411  
   412  	backingFsBlockDev := path.Join(home, "backingFsBlockDev")
   413  	// Re-create just in case someone copied the home directory over to a new device
   414  	unix.Unlink(backingFsBlockDev)
   415  	err := unix.Mknod(backingFsBlockDev, unix.S_IFBLK|0o600, int(stat.Dev))
   416  	switch err {
   417  	case nil:
   418  		return backingFsBlockDev, nil
   419  
   420  	case unix.ENOSYS, unix.EPERM:
   421  		return "", ErrQuotaNotSupported
   422  
   423  	default:
   424  		return "", errors.Wrapf(err, "failed to mknod %s", backingFsBlockDev)
   425  	}
   426  }
   427  
   428  func hasQuotaSupport(backingFsBlockDev string) (bool, error) {
   429  	cs := C.CString(backingFsBlockDev)
   430  	defer free(cs)
   431  	var qstat C.fs_quota_stat_t
   432  
   433  	_, _, errno := unix.Syscall6(unix.SYS_QUOTACTL, uintptr(C.Q_XGETQSTAT_PRJQUOTA), uintptr(unsafe.Pointer(cs)), 0, uintptr(unsafe.Pointer(&qstat)), 0, 0)
   434  	if errno == 0 && qstat.qs_flags&C.FS_QUOTA_PDQ_ENFD > 0 && qstat.qs_flags&C.FS_QUOTA_PDQ_ACCT > 0 {
   435  		return true, nil
   436  	}
   437  
   438  	switch errno {
   439  	// These are the known fatal errors, consider all other errors (ENOTTY, etc.. not supporting quota)
   440  	case unix.EFAULT, unix.ENOENT, unix.ENOTBLK, unix.EPERM:
   441  	default:
   442  		return false, nil
   443  	}
   444  
   445  	return false, errno
   446  }