github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/ext/disklayout/superblock.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package disklayout
    16  
    17  import (
    18  	"github.com/SagerNet/gvisor/pkg/marshal"
    19  )
    20  
    21  const (
    22  	// SbOffset is the absolute offset at which the superblock is placed.
    23  	SbOffset = 1024
    24  )
    25  
    26  // SuperBlock should be implemented by structs representing the ext superblock.
    27  // The superblock holds a lot of information about the enclosing filesystem.
    28  // This interface aims to provide access methods to important information held
    29  // by the superblock. It does NOT expose all fields of the superblock, only the
    30  // ones necessary. This can be expanded when need be.
    31  //
    32  // Location and replication:
    33  //     - The superblock is located at offset 1024 in block group 0.
    34  //     - Redundant copies of the superblock and group descriptors are kept in
    35  //       all groups if SbSparse feature flag is NOT set. If it is set, the
    36  //       replicas only exist in groups whose group number is either 0 or a
    37  //       power of 3, 5, or 7.
    38  //     - There is also a sparse superblock feature v2 in which there are just
    39  //       two replicas saved in the block groups pointed by sb.s_backup_bgs.
    40  //
    41  // Replicas should eventually be updated if the superblock is updated.
    42  //
    43  // See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#super-block.
    44  type SuperBlock interface {
    45  	marshal.Marshallable
    46  
    47  	// InodesCount returns the total number of inodes in this filesystem.
    48  	InodesCount() uint32
    49  
    50  	// BlocksCount returns the total number of data blocks in this filesystem.
    51  	BlocksCount() uint64
    52  
    53  	// FreeBlocksCount returns the number of free blocks in this filesystem.
    54  	FreeBlocksCount() uint64
    55  
    56  	// FreeInodesCount returns the number of free inodes in this filesystem.
    57  	FreeInodesCount() uint32
    58  
    59  	// MountCount returns the number of mounts since the last fsck.
    60  	MountCount() uint16
    61  
    62  	// MaxMountCount returns the number of mounts allowed beyond which a fsck is
    63  	// needed.
    64  	MaxMountCount() uint16
    65  
    66  	// FirstDataBlock returns the absolute block number of the first data block,
    67  	// which contains the super block itself.
    68  	//
    69  	// If the filesystem has 1kb data blocks then this should return 1. For all
    70  	// other configurations, this typically returns 0.
    71  	FirstDataBlock() uint32
    72  
    73  	// BlockSize returns the size of one data block in this filesystem.
    74  	// This can be calculated by 2^(10 + sb.s_log_block_size). This ensures that
    75  	// the smallest block size is 1kb.
    76  	BlockSize() uint64
    77  
    78  	// BlocksPerGroup returns the number of data blocks in a block group.
    79  	BlocksPerGroup() uint32
    80  
    81  	// ClusterSize returns block cluster size (set during mkfs time by admin).
    82  	// This can be calculated by 2^(10 + sb.s_log_cluster_size). This ensures that
    83  	// the smallest cluster size is 1kb.
    84  	//
    85  	// sb.s_log_cluster_size must equal sb.s_log_block_size if bigalloc feature
    86  	// is NOT set and consequently BlockSize() = ClusterSize() in that case.
    87  	ClusterSize() uint64
    88  
    89  	// ClustersPerGroup returns:
    90  	//     - number of clusters per group        if bigalloc is enabled.
    91  	//     - BlocksPerGroup()                    otherwise.
    92  	ClustersPerGroup() uint32
    93  
    94  	// InodeSize returns the size of the inode disk record size in bytes. Use this
    95  	// to iterate over inode arrays on disk.
    96  	//
    97  	// In ext2 and ext3:
    98  	//     - Each inode had a disk record of 128 bytes.
    99  	//     - The inode struct size was fixed at 128 bytes.
   100  	//
   101  	// In ext4 its possible to allocate larger on-disk inodes:
   102  	//     - Inode disk record size = sb.s_inode_size (function return value).
   103  	//                              = 256 (default)
   104  	//     - Inode struct size = 128 + inode.i_extra_isize.
   105  	//                         = 128 + 32 = 160 (default)
   106  	InodeSize() uint16
   107  
   108  	// InodesPerGroup returns the number of inodes in a block group.
   109  	InodesPerGroup() uint32
   110  
   111  	// BgDescSize returns the size of the block group descriptor struct.
   112  	//
   113  	// In ext2, ext3, ext4 (without 64-bit feature), the block group descriptor
   114  	// is only 32 bytes long.
   115  	// In ext4 with 64-bit feature, the block group descriptor expands to AT LEAST
   116  	// 64 bytes. It might be bigger than that.
   117  	BgDescSize() uint16
   118  
   119  	// CompatibleFeatures returns the CompatFeatures struct which holds all the
   120  	// compatible features this fs supports.
   121  	CompatibleFeatures() CompatFeatures
   122  
   123  	// IncompatibleFeatures returns the CompatFeatures struct which holds all the
   124  	// incompatible features this fs supports.
   125  	IncompatibleFeatures() IncompatFeatures
   126  
   127  	// ReadOnlyCompatibleFeatures returns the CompatFeatures struct which holds all the
   128  	// readonly compatible features this fs supports.
   129  	ReadOnlyCompatibleFeatures() RoCompatFeatures
   130  
   131  	// Magic() returns the magic signature which must be 0xef53.
   132  	Magic() uint16
   133  
   134  	// Revision returns the superblock revision. Superblock struct fields from
   135  	// offset 0x54 till 0x150 should only be used if superblock has DynamicRev.
   136  	Revision() SbRevision
   137  }
   138  
   139  // SbRevision is the type for superblock revisions.
   140  type SbRevision uint32
   141  
   142  // Super block revisions.
   143  const (
   144  	// OldRev is the good old (original) format.
   145  	OldRev SbRevision = 0
   146  
   147  	// DynamicRev is v2 format w/ dynamic inode sizes.
   148  	DynamicRev SbRevision = 1
   149  )
   150  
   151  // Superblock compatible features.
   152  // This is not exhaustive, unused features are not listed.
   153  const (
   154  	// SbDirPrealloc indicates directory preallocation.
   155  	SbDirPrealloc = 0x1
   156  
   157  	// SbHasJournal indicates the presence of a journal. jbd2 should only work
   158  	// with this being set.
   159  	SbHasJournal = 0x4
   160  
   161  	// SbExtAttr indicates extended attributes support.
   162  	SbExtAttr = 0x8
   163  
   164  	// SbResizeInode indicates that the fs has reserved GDT blocks (right after
   165  	// group descriptors) for fs expansion.
   166  	SbResizeInode = 0x10
   167  
   168  	// SbDirIndex indicates that the fs has directory indices.
   169  	SbDirIndex = 0x20
   170  
   171  	// SbSparseV2 stands for Sparse superblock version 2.
   172  	SbSparseV2 = 0x200
   173  )
   174  
   175  // CompatFeatures represents a superblock's compatible feature set. If the
   176  // kernel does not understand any of these feature, it can still read/write
   177  // to this fs.
   178  type CompatFeatures struct {
   179  	DirPrealloc bool
   180  	HasJournal  bool
   181  	ExtAttr     bool
   182  	ResizeInode bool
   183  	DirIndex    bool
   184  	SparseV2    bool
   185  }
   186  
   187  // ToInt converts superblock compatible features back to its 32-bit rep.
   188  func (f CompatFeatures) ToInt() uint32 {
   189  	var res uint32
   190  
   191  	if f.DirPrealloc {
   192  		res |= SbDirPrealloc
   193  	}
   194  	if f.HasJournal {
   195  		res |= SbHasJournal
   196  	}
   197  	if f.ExtAttr {
   198  		res |= SbExtAttr
   199  	}
   200  	if f.ResizeInode {
   201  		res |= SbResizeInode
   202  	}
   203  	if f.DirIndex {
   204  		res |= SbDirIndex
   205  	}
   206  	if f.SparseV2 {
   207  		res |= SbSparseV2
   208  	}
   209  
   210  	return res
   211  }
   212  
   213  // CompatFeaturesFromInt converts the integer representation of superblock
   214  // compatible features to CompatFeatures struct.
   215  func CompatFeaturesFromInt(f uint32) CompatFeatures {
   216  	return CompatFeatures{
   217  		DirPrealloc: f&SbDirPrealloc > 0,
   218  		HasJournal:  f&SbHasJournal > 0,
   219  		ExtAttr:     f&SbExtAttr > 0,
   220  		ResizeInode: f&SbResizeInode > 0,
   221  		DirIndex:    f&SbDirIndex > 0,
   222  		SparseV2:    f&SbSparseV2 > 0,
   223  	}
   224  }
   225  
   226  // Superblock incompatible features.
   227  // This is not exhaustive, unused features are not listed.
   228  const (
   229  	// SbDirentFileType indicates that directory entries record the file type.
   230  	// We should use struct DirentNew for dirents then.
   231  	SbDirentFileType = 0x2
   232  
   233  	// SbRecovery indicates that the filesystem needs recovery.
   234  	SbRecovery = 0x4
   235  
   236  	// SbJournalDev indicates that the filesystem has a separate journal device.
   237  	SbJournalDev = 0x8
   238  
   239  	// SbMetaBG indicates that the filesystem is using Meta block groups. Moves
   240  	// the group descriptors from the congested first block group into the first
   241  	// group of each metablock group to increase the maximum block groups limit
   242  	// and hence support much larger filesystems.
   243  	//
   244  	// See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#meta-block-groups.
   245  	SbMetaBG = 0x10
   246  
   247  	// SbExtents indicates that the filesystem uses extents. Must be set in ext4
   248  	// filesystems.
   249  	SbExtents = 0x40
   250  
   251  	// SbIs64Bit indicates that this filesystem addresses blocks with 64-bits.
   252  	// Hence can support 2^64 data blocks.
   253  	SbIs64Bit = 0x80
   254  
   255  	// SbMMP indicates that this filesystem has multiple mount protection.
   256  	//
   257  	// See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#multiple-mount-protection.
   258  	SbMMP = 0x100
   259  
   260  	// SbFlexBg indicates that this filesystem has flexible block groups. Several
   261  	// block groups are tied into one logical block group so that all the metadata
   262  	// for the block groups (bitmaps and inode tables) are close together for
   263  	// faster loading. Consequently, large files will be continuous on disk.
   264  	// However, this does not affect the placement of redundant superblocks and
   265  	// group descriptors.
   266  	//
   267  	// See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#flexible-block-groups.
   268  	SbFlexBg = 0x200
   269  
   270  	// SbLargeDir shows that large directory enabled. Directory htree can be 3
   271  	// levels deep. Directory htrees are allowed to be 2 levels deep otherwise.
   272  	SbLargeDir = 0x4000
   273  
   274  	// SbInlineData allows inline data in inodes for really small files.
   275  	SbInlineData = 0x8000
   276  
   277  	// SbEncrypted indicates that this fs contains encrypted inodes.
   278  	SbEncrypted = 0x10000
   279  )
   280  
   281  // IncompatFeatures represents a superblock's incompatible feature set. If the
   282  // kernel does not understand any of these feature, it should refuse to mount.
   283  type IncompatFeatures struct {
   284  	DirentFileType bool
   285  	Recovery       bool
   286  	JournalDev     bool
   287  	MetaBG         bool
   288  	Extents        bool
   289  	Is64Bit        bool
   290  	MMP            bool
   291  	FlexBg         bool
   292  	LargeDir       bool
   293  	InlineData     bool
   294  	Encrypted      bool
   295  }
   296  
   297  // ToInt converts superblock incompatible features back to its 32-bit rep.
   298  func (f IncompatFeatures) ToInt() uint32 {
   299  	var res uint32
   300  
   301  	if f.DirentFileType {
   302  		res |= SbDirentFileType
   303  	}
   304  	if f.Recovery {
   305  		res |= SbRecovery
   306  	}
   307  	if f.JournalDev {
   308  		res |= SbJournalDev
   309  	}
   310  	if f.MetaBG {
   311  		res |= SbMetaBG
   312  	}
   313  	if f.Extents {
   314  		res |= SbExtents
   315  	}
   316  	if f.Is64Bit {
   317  		res |= SbIs64Bit
   318  	}
   319  	if f.MMP {
   320  		res |= SbMMP
   321  	}
   322  	if f.FlexBg {
   323  		res |= SbFlexBg
   324  	}
   325  	if f.LargeDir {
   326  		res |= SbLargeDir
   327  	}
   328  	if f.InlineData {
   329  		res |= SbInlineData
   330  	}
   331  	if f.Encrypted {
   332  		res |= SbEncrypted
   333  	}
   334  
   335  	return res
   336  }
   337  
   338  // IncompatFeaturesFromInt converts the integer representation of superblock
   339  // incompatible features to IncompatFeatures struct.
   340  func IncompatFeaturesFromInt(f uint32) IncompatFeatures {
   341  	return IncompatFeatures{
   342  		DirentFileType: f&SbDirentFileType > 0,
   343  		Recovery:       f&SbRecovery > 0,
   344  		JournalDev:     f&SbJournalDev > 0,
   345  		MetaBG:         f&SbMetaBG > 0,
   346  		Extents:        f&SbExtents > 0,
   347  		Is64Bit:        f&SbIs64Bit > 0,
   348  		MMP:            f&SbMMP > 0,
   349  		FlexBg:         f&SbFlexBg > 0,
   350  		LargeDir:       f&SbLargeDir > 0,
   351  		InlineData:     f&SbInlineData > 0,
   352  		Encrypted:      f&SbEncrypted > 0,
   353  	}
   354  }
   355  
   356  // Superblock readonly compatible features.
   357  // This is not exhaustive, unused features are not listed.
   358  const (
   359  	// SbSparse indicates sparse superblocks. Only groups with number either 0 or
   360  	// a power of 3, 5, or 7 will have redundant copies of the superblock and
   361  	// block descriptors.
   362  	SbSparse = 0x1
   363  
   364  	// SbLargeFile indicates that this fs has been used to store a file >= 2GiB.
   365  	SbLargeFile = 0x2
   366  
   367  	// SbHugeFile indicates that this fs contains files whose sizes are
   368  	// represented in units of logicals blocks, not 512-byte sectors.
   369  	SbHugeFile = 0x8
   370  
   371  	// SbGdtCsum indicates that group descriptors have checksums.
   372  	SbGdtCsum = 0x10
   373  
   374  	// SbDirNlink indicates that the new subdirectory limit is 64,999. Ext3 has a
   375  	// 32,000 subdirectory limit.
   376  	SbDirNlink = 0x20
   377  
   378  	// SbExtraIsize indicates that large inodes exist on this filesystem.
   379  	SbExtraIsize = 0x40
   380  
   381  	// SbHasSnapshot indicates the existence of a snapshot.
   382  	SbHasSnapshot = 0x80
   383  
   384  	// SbQuota enables usage tracking for all quota types.
   385  	SbQuota = 0x100
   386  
   387  	// SbBigalloc maps to the bigalloc feature. When set, the minimum allocation
   388  	// unit becomes a cluster rather than a data block. Then block bitmaps track
   389  	// clusters, not data blocks.
   390  	//
   391  	// See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#bigalloc.
   392  	SbBigalloc = 0x200
   393  
   394  	// SbMetadataCsum indicates that the fs supports metadata checksumming.
   395  	SbMetadataCsum = 0x400
   396  
   397  	// SbReadOnly marks this filesystem as readonly. Should refuse to mount in
   398  	// read/write mode.
   399  	SbReadOnly = 0x1000
   400  )
   401  
   402  // RoCompatFeatures represents a superblock's readonly compatible feature set.
   403  // If the kernel does not understand any of these feature, it can still mount
   404  // readonly. But if the user wants to mount read/write, the kernel should
   405  // refuse to mount.
   406  type RoCompatFeatures struct {
   407  	Sparse       bool
   408  	LargeFile    bool
   409  	HugeFile     bool
   410  	GdtCsum      bool
   411  	DirNlink     bool
   412  	ExtraIsize   bool
   413  	HasSnapshot  bool
   414  	Quota        bool
   415  	Bigalloc     bool
   416  	MetadataCsum bool
   417  	ReadOnly     bool
   418  }
   419  
   420  // ToInt converts superblock readonly compatible features to its 32-bit rep.
   421  func (f RoCompatFeatures) ToInt() uint32 {
   422  	var res uint32
   423  
   424  	if f.Sparse {
   425  		res |= SbSparse
   426  	}
   427  	if f.LargeFile {
   428  		res |= SbLargeFile
   429  	}
   430  	if f.HugeFile {
   431  		res |= SbHugeFile
   432  	}
   433  	if f.GdtCsum {
   434  		res |= SbGdtCsum
   435  	}
   436  	if f.DirNlink {
   437  		res |= SbDirNlink
   438  	}
   439  	if f.ExtraIsize {
   440  		res |= SbExtraIsize
   441  	}
   442  	if f.HasSnapshot {
   443  		res |= SbHasSnapshot
   444  	}
   445  	if f.Quota {
   446  		res |= SbQuota
   447  	}
   448  	if f.Bigalloc {
   449  		res |= SbBigalloc
   450  	}
   451  	if f.MetadataCsum {
   452  		res |= SbMetadataCsum
   453  	}
   454  	if f.ReadOnly {
   455  		res |= SbReadOnly
   456  	}
   457  
   458  	return res
   459  }
   460  
   461  // RoCompatFeaturesFromInt converts the integer representation of superblock
   462  // readonly compatible features to RoCompatFeatures struct.
   463  func RoCompatFeaturesFromInt(f uint32) RoCompatFeatures {
   464  	return RoCompatFeatures{
   465  		Sparse:       f&SbSparse > 0,
   466  		LargeFile:    f&SbLargeFile > 0,
   467  		HugeFile:     f&SbHugeFile > 0,
   468  		GdtCsum:      f&SbGdtCsum > 0,
   469  		DirNlink:     f&SbDirNlink > 0,
   470  		ExtraIsize:   f&SbExtraIsize > 0,
   471  		HasSnapshot:  f&SbHasSnapshot > 0,
   472  		Quota:        f&SbQuota > 0,
   473  		Bigalloc:     f&SbBigalloc > 0,
   474  		MetadataCsum: f&SbMetadataCsum > 0,
   475  		ReadOnly:     f&SbReadOnly > 0,
   476  	}
   477  }