github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/ext/disklayout/superblock.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package disklayout 16 17 import ( 18 "github.com/SagerNet/gvisor/pkg/marshal" 19 ) 20 21 const ( 22 // SbOffset is the absolute offset at which the superblock is placed. 23 SbOffset = 1024 24 ) 25 26 // SuperBlock should be implemented by structs representing the ext superblock. 27 // The superblock holds a lot of information about the enclosing filesystem. 28 // This interface aims to provide access methods to important information held 29 // by the superblock. It does NOT expose all fields of the superblock, only the 30 // ones necessary. This can be expanded when need be. 31 // 32 // Location and replication: 33 // - The superblock is located at offset 1024 in block group 0. 34 // - Redundant copies of the superblock and group descriptors are kept in 35 // all groups if SbSparse feature flag is NOT set. If it is set, the 36 // replicas only exist in groups whose group number is either 0 or a 37 // power of 3, 5, or 7. 38 // - There is also a sparse superblock feature v2 in which there are just 39 // two replicas saved in the block groups pointed by sb.s_backup_bgs. 40 // 41 // Replicas should eventually be updated if the superblock is updated. 42 // 43 // See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#super-block. 44 type SuperBlock interface { 45 marshal.Marshallable 46 47 // InodesCount returns the total number of inodes in this filesystem. 48 InodesCount() uint32 49 50 // BlocksCount returns the total number of data blocks in this filesystem. 51 BlocksCount() uint64 52 53 // FreeBlocksCount returns the number of free blocks in this filesystem. 54 FreeBlocksCount() uint64 55 56 // FreeInodesCount returns the number of free inodes in this filesystem. 57 FreeInodesCount() uint32 58 59 // MountCount returns the number of mounts since the last fsck. 60 MountCount() uint16 61 62 // MaxMountCount returns the number of mounts allowed beyond which a fsck is 63 // needed. 64 MaxMountCount() uint16 65 66 // FirstDataBlock returns the absolute block number of the first data block, 67 // which contains the super block itself. 68 // 69 // If the filesystem has 1kb data blocks then this should return 1. For all 70 // other configurations, this typically returns 0. 71 FirstDataBlock() uint32 72 73 // BlockSize returns the size of one data block in this filesystem. 74 // This can be calculated by 2^(10 + sb.s_log_block_size). This ensures that 75 // the smallest block size is 1kb. 76 BlockSize() uint64 77 78 // BlocksPerGroup returns the number of data blocks in a block group. 79 BlocksPerGroup() uint32 80 81 // ClusterSize returns block cluster size (set during mkfs time by admin). 82 // This can be calculated by 2^(10 + sb.s_log_cluster_size). This ensures that 83 // the smallest cluster size is 1kb. 84 // 85 // sb.s_log_cluster_size must equal sb.s_log_block_size if bigalloc feature 86 // is NOT set and consequently BlockSize() = ClusterSize() in that case. 87 ClusterSize() uint64 88 89 // ClustersPerGroup returns: 90 // - number of clusters per group if bigalloc is enabled. 91 // - BlocksPerGroup() otherwise. 92 ClustersPerGroup() uint32 93 94 // InodeSize returns the size of the inode disk record size in bytes. Use this 95 // to iterate over inode arrays on disk. 96 // 97 // In ext2 and ext3: 98 // - Each inode had a disk record of 128 bytes. 99 // - The inode struct size was fixed at 128 bytes. 100 // 101 // In ext4 its possible to allocate larger on-disk inodes: 102 // - Inode disk record size = sb.s_inode_size (function return value). 103 // = 256 (default) 104 // - Inode struct size = 128 + inode.i_extra_isize. 105 // = 128 + 32 = 160 (default) 106 InodeSize() uint16 107 108 // InodesPerGroup returns the number of inodes in a block group. 109 InodesPerGroup() uint32 110 111 // BgDescSize returns the size of the block group descriptor struct. 112 // 113 // In ext2, ext3, ext4 (without 64-bit feature), the block group descriptor 114 // is only 32 bytes long. 115 // In ext4 with 64-bit feature, the block group descriptor expands to AT LEAST 116 // 64 bytes. It might be bigger than that. 117 BgDescSize() uint16 118 119 // CompatibleFeatures returns the CompatFeatures struct which holds all the 120 // compatible features this fs supports. 121 CompatibleFeatures() CompatFeatures 122 123 // IncompatibleFeatures returns the CompatFeatures struct which holds all the 124 // incompatible features this fs supports. 125 IncompatibleFeatures() IncompatFeatures 126 127 // ReadOnlyCompatibleFeatures returns the CompatFeatures struct which holds all the 128 // readonly compatible features this fs supports. 129 ReadOnlyCompatibleFeatures() RoCompatFeatures 130 131 // Magic() returns the magic signature which must be 0xef53. 132 Magic() uint16 133 134 // Revision returns the superblock revision. Superblock struct fields from 135 // offset 0x54 till 0x150 should only be used if superblock has DynamicRev. 136 Revision() SbRevision 137 } 138 139 // SbRevision is the type for superblock revisions. 140 type SbRevision uint32 141 142 // Super block revisions. 143 const ( 144 // OldRev is the good old (original) format. 145 OldRev SbRevision = 0 146 147 // DynamicRev is v2 format w/ dynamic inode sizes. 148 DynamicRev SbRevision = 1 149 ) 150 151 // Superblock compatible features. 152 // This is not exhaustive, unused features are not listed. 153 const ( 154 // SbDirPrealloc indicates directory preallocation. 155 SbDirPrealloc = 0x1 156 157 // SbHasJournal indicates the presence of a journal. jbd2 should only work 158 // with this being set. 159 SbHasJournal = 0x4 160 161 // SbExtAttr indicates extended attributes support. 162 SbExtAttr = 0x8 163 164 // SbResizeInode indicates that the fs has reserved GDT blocks (right after 165 // group descriptors) for fs expansion. 166 SbResizeInode = 0x10 167 168 // SbDirIndex indicates that the fs has directory indices. 169 SbDirIndex = 0x20 170 171 // SbSparseV2 stands for Sparse superblock version 2. 172 SbSparseV2 = 0x200 173 ) 174 175 // CompatFeatures represents a superblock's compatible feature set. If the 176 // kernel does not understand any of these feature, it can still read/write 177 // to this fs. 178 type CompatFeatures struct { 179 DirPrealloc bool 180 HasJournal bool 181 ExtAttr bool 182 ResizeInode bool 183 DirIndex bool 184 SparseV2 bool 185 } 186 187 // ToInt converts superblock compatible features back to its 32-bit rep. 188 func (f CompatFeatures) ToInt() uint32 { 189 var res uint32 190 191 if f.DirPrealloc { 192 res |= SbDirPrealloc 193 } 194 if f.HasJournal { 195 res |= SbHasJournal 196 } 197 if f.ExtAttr { 198 res |= SbExtAttr 199 } 200 if f.ResizeInode { 201 res |= SbResizeInode 202 } 203 if f.DirIndex { 204 res |= SbDirIndex 205 } 206 if f.SparseV2 { 207 res |= SbSparseV2 208 } 209 210 return res 211 } 212 213 // CompatFeaturesFromInt converts the integer representation of superblock 214 // compatible features to CompatFeatures struct. 215 func CompatFeaturesFromInt(f uint32) CompatFeatures { 216 return CompatFeatures{ 217 DirPrealloc: f&SbDirPrealloc > 0, 218 HasJournal: f&SbHasJournal > 0, 219 ExtAttr: f&SbExtAttr > 0, 220 ResizeInode: f&SbResizeInode > 0, 221 DirIndex: f&SbDirIndex > 0, 222 SparseV2: f&SbSparseV2 > 0, 223 } 224 } 225 226 // Superblock incompatible features. 227 // This is not exhaustive, unused features are not listed. 228 const ( 229 // SbDirentFileType indicates that directory entries record the file type. 230 // We should use struct DirentNew for dirents then. 231 SbDirentFileType = 0x2 232 233 // SbRecovery indicates that the filesystem needs recovery. 234 SbRecovery = 0x4 235 236 // SbJournalDev indicates that the filesystem has a separate journal device. 237 SbJournalDev = 0x8 238 239 // SbMetaBG indicates that the filesystem is using Meta block groups. Moves 240 // the group descriptors from the congested first block group into the first 241 // group of each metablock group to increase the maximum block groups limit 242 // and hence support much larger filesystems. 243 // 244 // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#meta-block-groups. 245 SbMetaBG = 0x10 246 247 // SbExtents indicates that the filesystem uses extents. Must be set in ext4 248 // filesystems. 249 SbExtents = 0x40 250 251 // SbIs64Bit indicates that this filesystem addresses blocks with 64-bits. 252 // Hence can support 2^64 data blocks. 253 SbIs64Bit = 0x80 254 255 // SbMMP indicates that this filesystem has multiple mount protection. 256 // 257 // See https://www.kernel.org/doc/html/latest/filesystems/ext4/globals.html#multiple-mount-protection. 258 SbMMP = 0x100 259 260 // SbFlexBg indicates that this filesystem has flexible block groups. Several 261 // block groups are tied into one logical block group so that all the metadata 262 // for the block groups (bitmaps and inode tables) are close together for 263 // faster loading. Consequently, large files will be continuous on disk. 264 // However, this does not affect the placement of redundant superblocks and 265 // group descriptors. 266 // 267 // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#flexible-block-groups. 268 SbFlexBg = 0x200 269 270 // SbLargeDir shows that large directory enabled. Directory htree can be 3 271 // levels deep. Directory htrees are allowed to be 2 levels deep otherwise. 272 SbLargeDir = 0x4000 273 274 // SbInlineData allows inline data in inodes for really small files. 275 SbInlineData = 0x8000 276 277 // SbEncrypted indicates that this fs contains encrypted inodes. 278 SbEncrypted = 0x10000 279 ) 280 281 // IncompatFeatures represents a superblock's incompatible feature set. If the 282 // kernel does not understand any of these feature, it should refuse to mount. 283 type IncompatFeatures struct { 284 DirentFileType bool 285 Recovery bool 286 JournalDev bool 287 MetaBG bool 288 Extents bool 289 Is64Bit bool 290 MMP bool 291 FlexBg bool 292 LargeDir bool 293 InlineData bool 294 Encrypted bool 295 } 296 297 // ToInt converts superblock incompatible features back to its 32-bit rep. 298 func (f IncompatFeatures) ToInt() uint32 { 299 var res uint32 300 301 if f.DirentFileType { 302 res |= SbDirentFileType 303 } 304 if f.Recovery { 305 res |= SbRecovery 306 } 307 if f.JournalDev { 308 res |= SbJournalDev 309 } 310 if f.MetaBG { 311 res |= SbMetaBG 312 } 313 if f.Extents { 314 res |= SbExtents 315 } 316 if f.Is64Bit { 317 res |= SbIs64Bit 318 } 319 if f.MMP { 320 res |= SbMMP 321 } 322 if f.FlexBg { 323 res |= SbFlexBg 324 } 325 if f.LargeDir { 326 res |= SbLargeDir 327 } 328 if f.InlineData { 329 res |= SbInlineData 330 } 331 if f.Encrypted { 332 res |= SbEncrypted 333 } 334 335 return res 336 } 337 338 // IncompatFeaturesFromInt converts the integer representation of superblock 339 // incompatible features to IncompatFeatures struct. 340 func IncompatFeaturesFromInt(f uint32) IncompatFeatures { 341 return IncompatFeatures{ 342 DirentFileType: f&SbDirentFileType > 0, 343 Recovery: f&SbRecovery > 0, 344 JournalDev: f&SbJournalDev > 0, 345 MetaBG: f&SbMetaBG > 0, 346 Extents: f&SbExtents > 0, 347 Is64Bit: f&SbIs64Bit > 0, 348 MMP: f&SbMMP > 0, 349 FlexBg: f&SbFlexBg > 0, 350 LargeDir: f&SbLargeDir > 0, 351 InlineData: f&SbInlineData > 0, 352 Encrypted: f&SbEncrypted > 0, 353 } 354 } 355 356 // Superblock readonly compatible features. 357 // This is not exhaustive, unused features are not listed. 358 const ( 359 // SbSparse indicates sparse superblocks. Only groups with number either 0 or 360 // a power of 3, 5, or 7 will have redundant copies of the superblock and 361 // block descriptors. 362 SbSparse = 0x1 363 364 // SbLargeFile indicates that this fs has been used to store a file >= 2GiB. 365 SbLargeFile = 0x2 366 367 // SbHugeFile indicates that this fs contains files whose sizes are 368 // represented in units of logicals blocks, not 512-byte sectors. 369 SbHugeFile = 0x8 370 371 // SbGdtCsum indicates that group descriptors have checksums. 372 SbGdtCsum = 0x10 373 374 // SbDirNlink indicates that the new subdirectory limit is 64,999. Ext3 has a 375 // 32,000 subdirectory limit. 376 SbDirNlink = 0x20 377 378 // SbExtraIsize indicates that large inodes exist on this filesystem. 379 SbExtraIsize = 0x40 380 381 // SbHasSnapshot indicates the existence of a snapshot. 382 SbHasSnapshot = 0x80 383 384 // SbQuota enables usage tracking for all quota types. 385 SbQuota = 0x100 386 387 // SbBigalloc maps to the bigalloc feature. When set, the minimum allocation 388 // unit becomes a cluster rather than a data block. Then block bitmaps track 389 // clusters, not data blocks. 390 // 391 // See https://www.kernel.org/doc/html/latest/filesystems/ext4/overview.html#bigalloc. 392 SbBigalloc = 0x200 393 394 // SbMetadataCsum indicates that the fs supports metadata checksumming. 395 SbMetadataCsum = 0x400 396 397 // SbReadOnly marks this filesystem as readonly. Should refuse to mount in 398 // read/write mode. 399 SbReadOnly = 0x1000 400 ) 401 402 // RoCompatFeatures represents a superblock's readonly compatible feature set. 403 // If the kernel does not understand any of these feature, it can still mount 404 // readonly. But if the user wants to mount read/write, the kernel should 405 // refuse to mount. 406 type RoCompatFeatures struct { 407 Sparse bool 408 LargeFile bool 409 HugeFile bool 410 GdtCsum bool 411 DirNlink bool 412 ExtraIsize bool 413 HasSnapshot bool 414 Quota bool 415 Bigalloc bool 416 MetadataCsum bool 417 ReadOnly bool 418 } 419 420 // ToInt converts superblock readonly compatible features to its 32-bit rep. 421 func (f RoCompatFeatures) ToInt() uint32 { 422 var res uint32 423 424 if f.Sparse { 425 res |= SbSparse 426 } 427 if f.LargeFile { 428 res |= SbLargeFile 429 } 430 if f.HugeFile { 431 res |= SbHugeFile 432 } 433 if f.GdtCsum { 434 res |= SbGdtCsum 435 } 436 if f.DirNlink { 437 res |= SbDirNlink 438 } 439 if f.ExtraIsize { 440 res |= SbExtraIsize 441 } 442 if f.HasSnapshot { 443 res |= SbHasSnapshot 444 } 445 if f.Quota { 446 res |= SbQuota 447 } 448 if f.Bigalloc { 449 res |= SbBigalloc 450 } 451 if f.MetadataCsum { 452 res |= SbMetadataCsum 453 } 454 if f.ReadOnly { 455 res |= SbReadOnly 456 } 457 458 return res 459 } 460 461 // RoCompatFeaturesFromInt converts the integer representation of superblock 462 // readonly compatible features to RoCompatFeatures struct. 463 func RoCompatFeaturesFromInt(f uint32) RoCompatFeatures { 464 return RoCompatFeatures{ 465 Sparse: f&SbSparse > 0, 466 LargeFile: f&SbLargeFile > 0, 467 HugeFile: f&SbHugeFile > 0, 468 GdtCsum: f&SbGdtCsum > 0, 469 DirNlink: f&SbDirNlink > 0, 470 ExtraIsize: f&SbExtraIsize > 0, 471 HasSnapshot: f&SbHasSnapshot > 0, 472 Quota: f&SbQuota > 0, 473 Bigalloc: f&SbBigalloc > 0, 474 MetadataCsum: f&SbMetadataCsum > 0, 475 ReadOnly: f&SbReadOnly > 0, 476 } 477 }