github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/fsimpl/sys/sys.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package sys implements sysfs. 16 package sys 17 18 import ( 19 "bytes" 20 "fmt" 21 "strconv" 22 23 "golang.org/x/sys/unix" 24 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 25 "github.com/MerlinKodo/gvisor/pkg/context" 26 "github.com/MerlinKodo/gvisor/pkg/coverage" 27 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 28 "github.com/MerlinKodo/gvisor/pkg/log" 29 "github.com/MerlinKodo/gvisor/pkg/sentry/fsimpl/kernfs" 30 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel" 31 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth" 32 "github.com/MerlinKodo/gvisor/pkg/sentry/vfs" 33 ) 34 35 const ( 36 // Name is the default filesystem name. 37 Name = "sysfs" 38 defaultSysMode = linux.FileMode(0444) 39 defaultSysDirMode = linux.FileMode(0755) 40 defaultMaxCachedDentries = uint64(1000) 41 ) 42 43 // FilesystemType implements vfs.FilesystemType. 44 // 45 // +stateify savable 46 type FilesystemType struct{} 47 48 // InternalData contains internal data passed in via 49 // vfs.GetFilesystemOptions.InternalData. 50 // 51 // +stateify savable 52 type InternalData struct { 53 // ProductName is the value to be set to devices/virtual/dmi/id/product_name. 54 ProductName string 55 // EnableAccelSysfs is whether to populate sysfs paths used by hardware 56 // accelerators. 57 EnableAccelSysfs bool 58 } 59 60 // filesystem implements vfs.FilesystemImpl. 61 // 62 // +stateify savable 63 type filesystem struct { 64 kernfs.Filesystem 65 66 devMinor uint32 67 } 68 69 // Name implements vfs.FilesystemType.Name. 70 func (FilesystemType) Name() string { 71 return Name 72 } 73 74 // Release implements vfs.FilesystemType.Release. 75 func (FilesystemType) Release(ctx context.Context) {} 76 77 // GetFilesystem implements vfs.FilesystemType.GetFilesystem. 78 func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { 79 devMinor, err := vfsObj.GetAnonBlockDevMinor() 80 if err != nil { 81 return nil, nil, err 82 } 83 84 mopts := vfs.GenericParseMountOptions(opts.Data) 85 maxCachedDentries := defaultMaxCachedDentries 86 if str, ok := mopts["dentry_cache_limit"]; ok { 87 delete(mopts, "dentry_cache_limit") 88 maxCachedDentries, err = strconv.ParseUint(str, 10, 64) 89 if err != nil { 90 ctx.Warningf("sys.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str) 91 return nil, nil, linuxerr.EINVAL 92 } 93 } 94 95 fs := &filesystem{ 96 devMinor: devMinor, 97 } 98 fs.MaxCachedDentries = maxCachedDentries 99 fs.VFSFilesystem().Init(vfsObj, &fsType, fs) 100 101 k := kernel.KernelFromContext(ctx) 102 fsDirChildren := make(map[string]kernfs.Inode) 103 // Create an empty directory to serve as the mount point for cgroupfs when 104 // cgroups are available. This emulates Linux behaviour, see 105 // kernel/cgroup.c:cgroup_init(). Note that in Linux, userspace (typically 106 // the init process) is ultimately responsible for actually mounting 107 // cgroupfs, but the kernel creates the mountpoint. For the sentry, the 108 // launcher mounts cgroupfs. 109 if k.CgroupRegistry() != nil { 110 fsDirChildren["cgroup"] = fs.newDir(ctx, creds, defaultSysDirMode, nil) 111 } 112 113 classSub := map[string]kernfs.Inode{ 114 "power_supply": fs.newDir(ctx, creds, defaultSysDirMode, nil), 115 "net": fs.newDir(ctx, creds, defaultSysDirMode, fs.newNetDir(ctx, creds, defaultSysDirMode)), 116 } 117 devicesSub := map[string]kernfs.Inode{ 118 "system": fs.newDir(ctx, creds, defaultSysDirMode, map[string]kernfs.Inode{ 119 "cpu": cpuDir(ctx, fs, creds), 120 }), 121 } 122 123 productName := "" 124 var busSub map[string]kernfs.Inode 125 if opts.InternalData != nil { 126 idata := opts.InternalData.(*InternalData) 127 productName = idata.ProductName 128 if idata.EnableAccelSysfs { 129 pciMainBusSub, err := fs.mirrorPCIBusDeviceDir(ctx, creds, pciMainBusDevicePath) 130 if err != nil { 131 return nil, nil, err 132 } 133 devicesSub["pci0000:00"] = fs.newDir(ctx, creds, defaultSysDirMode, pciMainBusSub) 134 135 accelSub, err := fs.newAccelDir(ctx, creds) 136 if err != nil { 137 return nil, nil, err 138 } 139 classSub["accel"] = fs.newDir(ctx, creds, defaultSysDirMode, accelSub) 140 141 pciDevicesSub, err := fs.newPCIDevicesDir(ctx, creds) 142 if err != nil { 143 return nil, nil, err 144 } 145 busSub = map[string]kernfs.Inode{ 146 "pci": fs.newDir(ctx, creds, defaultSysDirMode, map[string]kernfs.Inode{ 147 "devices": fs.newDir(ctx, creds, defaultSysDirMode, pciDevicesSub), 148 }), 149 } 150 } 151 } 152 153 if len(productName) > 0 { 154 log.Debugf("Setting product_name: %q", productName) 155 classSub["dmi"] = fs.newDir(ctx, creds, defaultSysDirMode, map[string]kernfs.Inode{ 156 "id": kernfs.NewStaticSymlink(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "../../devices/virtual/dmi/id"), 157 }) 158 devicesSub["virtual"] = fs.newDir(ctx, creds, defaultSysDirMode, map[string]kernfs.Inode{ 159 "dmi": fs.newDir(ctx, creds, defaultSysDirMode, map[string]kernfs.Inode{ 160 "id": fs.newDir(ctx, creds, defaultSysDirMode, map[string]kernfs.Inode{ 161 "product_name": fs.newStaticFile(ctx, creds, defaultSysMode, productName+"\n"), 162 }), 163 }), 164 }) 165 } 166 root := fs.newDir(ctx, creds, defaultSysDirMode, map[string]kernfs.Inode{ 167 "block": fs.newDir(ctx, creds, defaultSysDirMode, nil), 168 "bus": fs.newDir(ctx, creds, defaultSysDirMode, busSub), 169 "class": fs.newDir(ctx, creds, defaultSysDirMode, classSub), 170 "dev": fs.newDir(ctx, creds, defaultSysDirMode, nil), 171 "devices": fs.newDir(ctx, creds, defaultSysDirMode, devicesSub), 172 "firmware": fs.newDir(ctx, creds, defaultSysDirMode, nil), 173 "fs": fs.newDir(ctx, creds, defaultSysDirMode, fsDirChildren), 174 "kernel": kernelDir(ctx, fs, creds), 175 "module": fs.newDir(ctx, creds, defaultSysDirMode, nil), 176 "power": fs.newDir(ctx, creds, defaultSysDirMode, nil), 177 }) 178 var rootD kernfs.Dentry 179 rootD.InitRoot(&fs.Filesystem, root) 180 return fs.VFSFilesystem(), rootD.VFSDentry(), nil 181 } 182 183 func cpuDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) kernfs.Inode { 184 k := kernel.KernelFromContext(ctx) 185 maxCPUCores := k.ApplicationCores() 186 children := map[string]kernfs.Inode{ 187 "online": fs.newCPUFile(ctx, creds, maxCPUCores, linux.FileMode(0444)), 188 "possible": fs.newCPUFile(ctx, creds, maxCPUCores, linux.FileMode(0444)), 189 "present": fs.newCPUFile(ctx, creds, maxCPUCores, linux.FileMode(0444)), 190 } 191 for i := uint(0); i < maxCPUCores; i++ { 192 children[fmt.Sprintf("cpu%d", i)] = fs.newDir(ctx, creds, linux.FileMode(0555), nil) 193 } 194 return fs.newDir(ctx, creds, defaultSysDirMode, children) 195 } 196 197 func kernelDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) kernfs.Inode { 198 // Set up /sys/kernel/debug/kcov. Technically, debugfs should be 199 // mounted at debug/, but for our purposes, it is sufficient to keep it 200 // in sys. 201 var children map[string]kernfs.Inode 202 if coverage.KcovSupported() { 203 log.Debugf("Set up /sys/kernel/debug/kcov") 204 children = map[string]kernfs.Inode{ 205 "debug": fs.newDir(ctx, creds, linux.FileMode(0700), map[string]kernfs.Inode{ 206 "kcov": fs.newKcovFile(ctx, creds), 207 }), 208 } 209 } 210 return fs.newDir(ctx, creds, defaultSysDirMode, children) 211 } 212 213 // Release implements vfs.FilesystemImpl.Release. 214 func (fs *filesystem) Release(ctx context.Context) { 215 fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) 216 fs.Filesystem.Release(ctx) 217 } 218 219 // MountOptions implements vfs.FilesystemImpl.MountOptions. 220 func (fs *filesystem) MountOptions() string { 221 return fmt.Sprintf("dentry_cache_limit=%d", fs.MaxCachedDentries) 222 } 223 224 // dir implements kernfs.Inode. 225 // 226 // +stateify savable 227 type dir struct { 228 dirRefs 229 kernfs.InodeAlwaysValid 230 kernfs.InodeAttrs 231 kernfs.InodeDirectoryNoNewChildren 232 kernfs.InodeNotAnonymous 233 kernfs.InodeNotSymlink 234 kernfs.InodeTemporary 235 kernfs.InodeWatches 236 kernfs.OrderedChildren 237 238 locks vfs.FileLocks 239 } 240 241 func (fs *filesystem) newDir(ctx context.Context, creds *auth.Credentials, mode linux.FileMode, contents map[string]kernfs.Inode) kernfs.Inode { 242 d := &dir{} 243 d.InodeAttrs.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755) 244 d.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) 245 d.InitRefs() 246 d.IncLinks(d.OrderedChildren.Populate(contents)) 247 return d 248 } 249 250 // SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. 251 func (*dir) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { 252 return linuxerr.EPERM 253 } 254 255 // Open implements kernfs.Inode.Open. 256 func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, kd *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { 257 opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC | 258 linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NONBLOCK | linux.O_NOCTTY 259 fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), kd, &d.OrderedChildren, &d.locks, &opts, kernfs.GenericDirectoryFDOptions{ 260 SeekEnd: kernfs.SeekEndStaticEntries, 261 }) 262 if err != nil { 263 return nil, err 264 } 265 return fd.VFSFileDescription(), nil 266 } 267 268 // DecRef implements kernfs.Inode.DecRef. 269 func (d *dir) DecRef(ctx context.Context) { 270 d.dirRefs.DecRef(func() { d.Destroy(ctx) }) 271 } 272 273 // StatFS implements kernfs.Inode.StatFS. 274 func (d *dir) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) { 275 return vfs.GenericStatFS(linux.SYSFS_MAGIC), nil 276 } 277 278 // cpuFile implements kernfs.Inode. 279 // 280 // +stateify savable 281 type cpuFile struct { 282 implStatFS 283 kernfs.DynamicBytesFile 284 285 maxCores uint 286 } 287 288 // Generate implements vfs.DynamicBytesSource.Generate. 289 func (c *cpuFile) Generate(ctx context.Context, buf *bytes.Buffer) error { 290 fmt.Fprintf(buf, "0-%d\n", c.maxCores-1) 291 return nil 292 } 293 294 func (fs *filesystem) newCPUFile(ctx context.Context, creds *auth.Credentials, maxCores uint, mode linux.FileMode) kernfs.Inode { 295 c := &cpuFile{maxCores: maxCores} 296 c.DynamicBytesFile.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), c, mode) 297 return c 298 } 299 300 // +stateify savable 301 type implStatFS struct{} 302 303 // StatFS implements kernfs.Inode.StatFS. 304 func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) { 305 return vfs.GenericStatFS(linux.SYSFS_MAGIC), nil 306 } 307 308 // +stateify savable 309 type staticFile struct { 310 kernfs.DynamicBytesFile 311 vfs.StaticData 312 } 313 314 func (fs *filesystem) newStaticFile(ctx context.Context, creds *auth.Credentials, mode linux.FileMode, data string) kernfs.Inode { 315 s := &staticFile{StaticData: vfs.StaticData{Data: data}} 316 s.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), s, mode) 317 return s 318 } 319 320 // hostFile is an inode whose contents are generated by reading from the 321 // host. 322 // 323 // +stateify savable 324 type hostFile struct { 325 kernfs.DynamicBytesFile 326 hostPath string 327 } 328 329 func (hf *hostFile) Generate(ctx context.Context, buf *bytes.Buffer) error { 330 fd, err := unix.Openat(-1, hf.hostPath, unix.O_RDONLY|unix.O_NOFOLLOW, 0) 331 if err != nil { 332 return err 333 } 334 var data [hostFileBufSize]byte 335 n, err := unix.Read(fd, data[:]) 336 if err != nil { 337 return err 338 } 339 unix.Close(fd) 340 buf.Write(data[:n]) 341 return nil 342 } 343 344 func (fs *filesystem) newHostFile(ctx context.Context, creds *auth.Credentials, mode linux.FileMode, hostPath string) kernfs.Inode { 345 hf := &hostFile{hostPath: hostPath} 346 hf.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), hf, mode) 347 return hf 348 }