github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/fsimpl/devtmpfs/devtmpfs.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package devtmpfs provides an implementation of /dev based on tmpfs, 16 // analogous to Linux's devtmpfs. 17 package devtmpfs 18 19 import ( 20 "fmt" 21 "path" 22 23 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 24 "github.com/nicocha30/gvisor-ligolo/pkg/context" 25 "github.com/nicocha30/gvisor-ligolo/pkg/fspath" 26 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/tmpfs" 27 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth" 28 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs" 29 "github.com/nicocha30/gvisor-ligolo/pkg/sync" 30 ) 31 32 // Name is the default filesystem name. 33 const Name = "devtmpfs" 34 35 // FilesystemType implements vfs.FilesystemType. 36 // 37 // +stateify savable 38 type FilesystemType struct { 39 initOnce sync.Once `state:"nosave"` // FIXME(gvisor.dev/issue/1663): not yet supported. 40 initErr error 41 42 // fs is the tmpfs filesystem that backs all mounts of this FilesystemType. 43 // root is fs' root. fs and root are immutable. 44 fs *vfs.Filesystem 45 root *vfs.Dentry 46 } 47 48 // Name implements vfs.FilesystemType.Name. 49 func (*FilesystemType) Name() string { 50 return Name 51 } 52 53 // GetFilesystem implements vfs.FilesystemType.GetFilesystem. 54 func (fst *FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { 55 fst.initOnce.Do(func() { 56 fs, root, err := tmpfs.FilesystemType{}.GetFilesystem(ctx, vfsObj, creds, "" /* source */, vfs.GetFilesystemOptions{ 57 Data: "mode=0755", // opts from drivers/base/devtmpfs.c:devtmpfs_init() 58 }) 59 if err != nil { 60 fst.initErr = err 61 return 62 } 63 fst.fs = fs 64 fst.root = root 65 }) 66 if fst.initErr != nil { 67 return nil, nil, fst.initErr 68 } 69 fst.fs.IncRef() 70 fst.root.IncRef() 71 return fst.fs, fst.root, nil 72 } 73 74 // Release implements vfs.FilesystemType.Release. 75 func (fst *FilesystemType) Release(ctx context.Context) { 76 if fst.fs != nil { 77 // Release the original reference obtained when creating the filesystem. 78 fst.root.DecRef(ctx) 79 fst.fs.DecRef(ctx) 80 } 81 } 82 83 // Accessor allows devices to create device special files in devtmpfs. 84 type Accessor struct { 85 vfsObj *vfs.VirtualFilesystem 86 mntns *vfs.MountNamespace 87 root vfs.VirtualDentry 88 creds *auth.Credentials 89 } 90 91 // NewAccessor returns an Accessor that supports creation of device special 92 // files in the devtmpfs instance registered with name fsTypeName in vfsObj. 93 func NewAccessor(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, fsTypeName string) (*Accessor, error) { 94 mntns, err := vfsObj.NewMountNamespace(ctx, creds, "devtmpfs" /* source */, fsTypeName, &vfs.MountOptions{}) 95 if err != nil { 96 return nil, err 97 } 98 // Pass a reference on root to the Accessor. 99 root := mntns.Root() 100 root.IncRef() 101 return &Accessor{ 102 vfsObj: vfsObj, 103 mntns: mntns, 104 root: root, 105 creds: creds, 106 }, nil 107 } 108 109 // Release must be called when a is no longer in use. 110 func (a *Accessor) Release(ctx context.Context) { 111 a.root.DecRef(ctx) 112 a.mntns.DecRef(ctx) 113 } 114 115 // accessorContext implements context.Context by extending an existing 116 // context.Context with an Accessor's values for VFS-relevant state. 117 type accessorContext struct { 118 context.Context 119 a *Accessor 120 } 121 122 func (a *Accessor) wrapContext(ctx context.Context) *accessorContext { 123 return &accessorContext{ 124 Context: ctx, 125 a: a, 126 } 127 } 128 129 // Value implements context.Context.Value. 130 func (ac *accessorContext) Value(key any) any { 131 switch key { 132 case vfs.CtxMountNamespace: 133 ac.a.mntns.IncRef() 134 return ac.a.mntns 135 case vfs.CtxRoot: 136 ac.a.root.IncRef() 137 return ac.a.root 138 default: 139 return ac.Context.Value(key) 140 } 141 } 142 143 func (a *Accessor) pathOperationAt(pathname string) *vfs.PathOperation { 144 return &vfs.PathOperation{ 145 Root: a.root, 146 Start: a.root, 147 Path: fspath.Parse(pathname), 148 } 149 } 150 151 // CreateDeviceFile creates a device special file at the given pathname in the 152 // devtmpfs instance accessed by the Accessor. 153 func (a *Accessor) CreateDeviceFile(ctx context.Context, pathname string, kind vfs.DeviceKind, major, minor uint32, perms uint16) error { 154 actx := a.wrapContext(ctx) 155 156 mode := (linux.FileMode)(perms) 157 switch kind { 158 case vfs.BlockDevice: 159 mode |= linux.S_IFBLK 160 case vfs.CharDevice: 161 mode |= linux.S_IFCHR 162 default: 163 panic(fmt.Sprintf("invalid vfs.DeviceKind: %v", kind)) 164 } 165 166 // Create any parent directories. See 167 // devtmpfs.c:handle_create()=>path_create(). 168 parent := path.Dir(pathname) 169 if err := a.vfsObj.MkdirAllAt(ctx, parent, a.root, a.creds, &vfs.MkdirOptions{ 170 Mode: 0755, 171 }, true /* mustBeDir */); err != nil { 172 return fmt.Errorf("failed to create device parent directory %q: %v", parent, err) 173 } 174 175 // NOTE: Linux's devtmpfs refuses to automatically delete files it didn't 176 // create, which it recognizes by storing a pointer to the kdevtmpfs struct 177 // thread in struct inode::i_private. Accessor doesn't yet support deletion 178 // of files at all, and probably won't as long as we don't need to support 179 // kernel modules, so this is moot for now. 180 return a.vfsObj.MknodAt(actx, a.creds, a.pathOperationAt(pathname), &vfs.MknodOptions{ 181 Mode: mode, 182 DevMajor: major, 183 DevMinor: minor, 184 }) 185 } 186 187 // UserspaceInit creates symbolic links and mount points in the devtmpfs 188 // instance accessed by the Accessor that are created by userspace in Linux. It 189 // does not create mounts. 190 func (a *Accessor) UserspaceInit(ctx context.Context) error { 191 actx := a.wrapContext(ctx) 192 193 // Initialize symlinks. 194 for _, symlink := range []struct { 195 source string 196 target string 197 }{ 198 // systemd: src/shared/dev-setup.c:dev_setup() 199 {source: "fd", target: "/proc/self/fd"}, 200 {source: "stdin", target: "/proc/self/fd/0"}, 201 {source: "stdout", target: "/proc/self/fd/1"}, 202 {source: "stderr", target: "/proc/self/fd/2"}, 203 // /proc/kcore is not implemented. 204 205 // Linux implements /dev/ptmx as a device node, but advises 206 // container implementations to create /dev/ptmx as a symlink 207 // to pts/ptmx (Documentation/filesystems/devpts.txt). Systemd 208 // follows this advice (src/nspawn/nspawn.c:setup_pts()), while 209 // LXC tries to create a bind mount and falls back to a symlink 210 // (src/lxc/conf.c:lxc_setup_devpts()). 211 {source: "ptmx", target: "pts/ptmx"}, 212 } { 213 if err := a.vfsObj.SymlinkAt(actx, a.creds, a.pathOperationAt(symlink.source), symlink.target); err != nil { 214 return fmt.Errorf("failed to create symlink %q => %q: %v", symlink.source, symlink.target, err) 215 } 216 } 217 218 // systemd: src/core/mount-setup.c:mount_table 219 for _, dir := range []string{ 220 "shm", 221 "pts", 222 } { 223 if err := a.vfsObj.MkdirAt(actx, a.creds, a.pathOperationAt(dir), &vfs.MkdirOptions{ 224 // systemd: src/core/mount-setup.c:mount_one() 225 Mode: 0755, 226 }); err != nil { 227 return fmt.Errorf("failed to create directory %q: %v", dir, err) 228 } 229 } 230 231 return nil 232 }