github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/dev/dev.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package dev provides a filesystem implementation for /dev. 16 package dev 17 18 import ( 19 "fmt" 20 "path" 21 22 "github.com/metacubex/gvisor/pkg/abi/linux" 23 "github.com/metacubex/gvisor/pkg/context" 24 "github.com/metacubex/gvisor/pkg/errors/linuxerr" 25 "github.com/metacubex/gvisor/pkg/fspath" 26 "github.com/metacubex/gvisor/pkg/sentry/fsimpl/tmpfs" 27 "github.com/metacubex/gvisor/pkg/sentry/kernel/auth" 28 "github.com/metacubex/gvisor/pkg/sentry/vfs" 29 ) 30 31 // Name is the dev filesystem name. 32 const Name = "dev" 33 34 // FilesystemType implements vfs.FilesystemType. 35 // 36 // +stateify savable 37 type FilesystemType struct{} 38 39 // Name implements vfs.FilesystemType.Name. 40 func (FilesystemType) Name() string { 41 return Name 42 } 43 44 // GetFilesystem implements vfs.FilesystemType.GetFilesystem. 45 func (fst FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { 46 mntns, err := vfsObj.NewMountNamespace(ctx, creds, source /* source */, tmpfs.Name, &vfs.MountOptions{GetFilesystemOptions: vfs.GetFilesystemOptions{ 47 Data: "mode=0755", // opts from drivers/base/devtmpfs.c:devtmpfs_init() 48 }}, nil) 49 if err != nil { 50 return nil, nil, err 51 } 52 defer mntns.DecRef(ctx) 53 54 root := mntns.Root(ctx) 55 defer root.DecRef(ctx) 56 57 iopts, _ := opts.InternalData.(InternalData) // If not provided, zero value is OK. 58 59 // Initialize contents. 60 if err := userspaceInit(ctx, vfsObj, creds, root, iopts.ShmMode); err != nil { 61 return nil, nil, err 62 } 63 if err := vfsObj.ForEachDevice(func(pathname string, kind vfs.DeviceKind, major, minor uint32, perms uint16) error { 64 if pathname == "" { 65 return nil 66 } 67 mode := linux.FileMode(perms) 68 switch kind { 69 case vfs.CharDevice: 70 mode |= linux.S_IFCHR 71 case vfs.BlockDevice: 72 mode |= linux.S_IFBLK 73 default: 74 panic(fmt.Sprintf("invalid DeviceKind: %v", kind)) 75 } 76 return CreateDeviceFile(ctx, vfsObj, creds, root, pathname, major, minor, mode, nil /* uid */, nil /* gid */) 77 }); err != nil { 78 return nil, nil, err 79 } 80 81 root.Mount().Filesystem().IncRef() 82 root.Dentry().IncRef() 83 return root.Mount().Filesystem(), root.Dentry(), nil 84 } 85 86 // Release implements vfs.FilesystemType.Release. 87 func (fst *FilesystemType) Release(ctx context.Context) {} 88 89 // InternalData contains internal data passed in via vfs.GetFilesystemOptions. 90 type InternalData struct { 91 // ShmMode indicates the mode to create the /dev/shm dir with. 92 ShmMode *uint16 93 } 94 95 func pathOperationAt(root vfs.VirtualDentry, pathname string) *vfs.PathOperation { 96 return &vfs.PathOperation{ 97 Root: root, 98 Start: root, 99 Path: fspath.Parse(pathname), 100 } 101 } 102 103 // CreateDeviceFile creates a device special file at the given pathname from root. 104 func CreateDeviceFile(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, root vfs.VirtualDentry, pathname string, major, minor uint32, mode linux.FileMode, uid, gid *uint32) error { 105 // Create any parent directories. See 106 // devtmpfs.c:handle_create()=>create_path(). 107 parent := path.Dir(pathname) 108 if err := vfsObj.MkdirAllAt(ctx, parent, root, creds, &vfs.MkdirOptions{ 109 Mode: 0755, 110 }, true /* mustBeDir */); err != nil { 111 return fmt.Errorf("failed to create device parent directory %q: %v", parent, err) 112 } 113 created := true 114 pop := pathOperationAt(root, pathname) 115 if err := vfsObj.MknodAt(ctx, creds, pop, &vfs.MknodOptions{Mode: mode, DevMajor: major, DevMinor: minor}); err != nil { 116 if linuxerr.Equals(linuxerr.EEXIST, err) { 117 // EEXIST is silently ignored; compare 118 // opencontainers/runc:libcontainer/rootfs_linux.go:createDeviceNode(). 119 created = false 120 } else { 121 return fmt.Errorf("failed to create device file at %q: %w", pathname, err) 122 } 123 } 124 if created && (uid != nil || gid != nil) { 125 var opts vfs.SetStatOptions 126 if uid != nil { 127 opts.Stat.Mask |= linux.STATX_UID 128 opts.Stat.UID = *uid 129 } 130 if gid != nil { 131 opts.Stat.Mask |= linux.STATX_GID 132 opts.Stat.GID = *gid 133 } 134 if err := vfsObj.SetStatAt(ctx, creds, pop, &opts); err != nil { 135 return fmt.Errorf("failed to set UID/GID for device file %q: %w", pathname, err) 136 } 137 } 138 return nil 139 } 140 141 // userspaceInit creates symbolic links and mount points in the devtmpfs 142 // instance that are created by userspace in Linux. It does not create mounts. 143 func userspaceInit(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, root vfs.VirtualDentry, shmMode *uint16) error { 144 // Initialize symlinks. 145 for _, symlink := range []struct { 146 source string 147 target string 148 }{ 149 // systemd: src/shared/dev-setup.c:dev_setup() 150 {source: "fd", target: "/proc/self/fd"}, 151 {source: "stdin", target: "/proc/self/fd/0"}, 152 {source: "stdout", target: "/proc/self/fd/1"}, 153 {source: "stderr", target: "/proc/self/fd/2"}, 154 // /proc/kcore is not implemented. 155 156 // Linux implements /dev/ptmx as a device node, but advises 157 // container implementations to create /dev/ptmx as a symlink 158 // to pts/ptmx (Documentation/filesystems/devpts.txt). Systemd 159 // follows this advice (src/nspawn/nspawn.c:setup_pts()), while 160 // LXC tries to create a bind mount and falls back to a symlink 161 // (src/lxc/conf.c:lxc_setup_devpts()). 162 {source: "ptmx", target: "pts/ptmx"}, 163 } { 164 if err := vfsObj.SymlinkAt(ctx, creds, pathOperationAt(root, symlink.source), symlink.target); err != nil { 165 return fmt.Errorf("failed to create symlink %q => %q: %v", symlink.source, symlink.target, err) 166 } 167 } 168 169 // systemd: src/core/mount-setup.c:mount_table 170 for _, dir := range []string{ 171 "shm", 172 "pts", 173 } { 174 // "The access mode here doesn't really matter too much, since the 175 // mounted file system will take precedence anyway" 176 // - systemd: src/core/mount-setup.c:mount_one() 177 accessMode := linux.FileMode(0755) 178 if shmMode != nil && dir == "shm" { 179 accessMode = linux.FileMode(*shmMode) 180 } 181 if err := vfsObj.MkdirAt(ctx, creds, pathOperationAt(root, dir), &vfs.MkdirOptions{ 182 Mode: accessMode, 183 }); err != nil { 184 return fmt.Errorf("failed to create directory %q: %v", dir, err) 185 } 186 } 187 188 return nil 189 }