github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/socket/unix/unix_vfs2.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package unix 16 17 import ( 18 "github.com/SagerNet/gvisor/pkg/abi/linux" 19 "github.com/SagerNet/gvisor/pkg/context" 20 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 21 "github.com/SagerNet/gvisor/pkg/fspath" 22 "github.com/SagerNet/gvisor/pkg/hostarch" 23 "github.com/SagerNet/gvisor/pkg/marshal" 24 "github.com/SagerNet/gvisor/pkg/sentry/arch" 25 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/sockfs" 26 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 27 "github.com/SagerNet/gvisor/pkg/sentry/socket" 28 "github.com/SagerNet/gvisor/pkg/sentry/socket/control" 29 "github.com/SagerNet/gvisor/pkg/sentry/socket/netstack" 30 "github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport" 31 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 32 "github.com/SagerNet/gvisor/pkg/syserr" 33 "github.com/SagerNet/gvisor/pkg/syserror" 34 "github.com/SagerNet/gvisor/pkg/tcpip" 35 "github.com/SagerNet/gvisor/pkg/usermem" 36 "github.com/SagerNet/gvisor/pkg/waiter" 37 ) 38 39 // SocketVFS2 implements socket.SocketVFS2 (and by extension, 40 // vfs.FileDescriptionImpl) for Unix sockets. 41 // 42 // +stateify savable 43 type SocketVFS2 struct { 44 vfsfd vfs.FileDescription 45 vfs.FileDescriptionDefaultImpl 46 vfs.DentryMetadataFileDescriptionImpl 47 vfs.LockFD 48 49 socketVFS2Refs 50 socketOpsCommon 51 } 52 53 var _ = socket.SocketVFS2(&SocketVFS2{}) 54 55 // NewSockfsFile creates a new socket file in the global sockfs mount and 56 // returns a corresponding file description. 57 func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) (*vfs.FileDescription, *syserr.Error) { 58 mnt := t.Kernel().SocketMount() 59 d := sockfs.NewDentry(t, mnt) 60 defer d.DecRef(t) 61 62 fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d, &vfs.FileLocks{}) 63 if err != nil { 64 return nil, syserr.FromError(err) 65 } 66 return fd, nil 67 } 68 69 // NewFileDescription creates and returns a socket file description 70 // corresponding to the given mount and dentry. 71 func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint32, mnt *vfs.Mount, d *vfs.Dentry, locks *vfs.FileLocks) (*vfs.FileDescription, error) { 72 // You can create AF_UNIX, SOCK_RAW sockets. They're the same as 73 // SOCK_DGRAM and don't require CAP_NET_RAW. 74 if stype == linux.SOCK_RAW { 75 stype = linux.SOCK_DGRAM 76 } 77 78 sock := &SocketVFS2{ 79 socketOpsCommon: socketOpsCommon{ 80 ep: ep, 81 stype: stype, 82 }, 83 } 84 sock.InitRefs() 85 sock.LockFD.Init(locks) 86 vfsfd := &sock.vfsfd 87 if err := vfsfd.Init(sock, flags, mnt, d, &vfs.FileDescriptionOptions{ 88 DenyPRead: true, 89 DenyPWrite: true, 90 UseDentryMetadata: true, 91 }); err != nil { 92 return nil, err 93 } 94 return vfsfd, nil 95 } 96 97 // DecRef implements RefCounter.DecRef. 98 func (s *SocketVFS2) DecRef(ctx context.Context) { 99 s.socketVFS2Refs.DecRef(func() { 100 kernel.KernelFromContext(ctx).DeleteSocketVFS2(&s.vfsfd) 101 s.ep.Close(ctx) 102 if s.abstractNamespace != nil { 103 s.abstractNamespace.Remove(s.abstractName, s) 104 } 105 }) 106 } 107 108 // Release implements vfs.FileDescriptionImpl.Release. 109 func (s *SocketVFS2) Release(ctx context.Context) { 110 // Release only decrements a reference on s because s may be referenced in 111 // the abstract socket namespace. 112 s.DecRef(ctx) 113 } 114 115 // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by 116 // a transport.Endpoint. 117 func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) { 118 return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outPtr, outLen) 119 } 120 121 // blockingAccept implements a blocking version of accept(2), that is, if no 122 // connections are ready to be accept, it will block until one becomes ready. 123 func (s *SocketVFS2) blockingAccept(t *kernel.Task, peerAddr *tcpip.FullAddress) (transport.Endpoint, *syserr.Error) { 124 // Register for notifications. 125 e, ch := waiter.NewChannelEntry(nil) 126 s.socketOpsCommon.EventRegister(&e, waiter.ReadableEvents) 127 defer s.socketOpsCommon.EventUnregister(&e) 128 129 // Try to accept the connection; if it fails, then wait until we get a 130 // notification. 131 for { 132 if ep, err := s.ep.Accept(peerAddr); err != syserr.ErrWouldBlock { 133 return ep, err 134 } 135 136 if err := t.Block(ch); err != nil { 137 return nil, syserr.FromError(err) 138 } 139 } 140 } 141 142 // Accept implements the linux syscall accept(2) for sockets backed by 143 // a transport.Endpoint. 144 func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) { 145 var peerAddr *tcpip.FullAddress 146 if peerRequested { 147 peerAddr = &tcpip.FullAddress{} 148 } 149 ep, err := s.ep.Accept(peerAddr) 150 if err != nil { 151 if err != syserr.ErrWouldBlock || !blocking { 152 return 0, nil, 0, err 153 } 154 155 var err *syserr.Error 156 ep, err = s.blockingAccept(t, peerAddr) 157 if err != nil { 158 return 0, nil, 0, err 159 } 160 } 161 162 ns, err := NewSockfsFile(t, ep, s.stype) 163 if err != nil { 164 return 0, nil, 0, err 165 } 166 defer ns.DecRef(t) 167 168 if flags&linux.SOCK_NONBLOCK != 0 { 169 ns.SetStatusFlags(t, t.Credentials(), linux.SOCK_NONBLOCK) 170 } 171 172 var addr linux.SockAddr 173 var addrLen uint32 174 if peerAddr != nil { 175 addr, addrLen = socket.ConvertAddress(linux.AF_UNIX, *peerAddr) 176 } 177 178 fd, e := t.NewFDFromVFS2(0, ns, kernel.FDFlags{ 179 CloseOnExec: flags&linux.SOCK_CLOEXEC != 0, 180 }) 181 if e != nil { 182 return 0, nil, 0, syserr.FromError(e) 183 } 184 185 t.Kernel().RecordSocketVFS2(ns) 186 return fd, addr, addrLen, nil 187 } 188 189 // Bind implements the linux syscall bind(2) for unix sockets. 190 func (s *SocketVFS2) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { 191 p, e := extractPath(sockaddr) 192 if e != nil { 193 return e 194 } 195 196 bep, ok := s.ep.(transport.BoundEndpoint) 197 if !ok { 198 // This socket can't be bound. 199 return syserr.ErrInvalidArgument 200 } 201 202 return s.ep.Bind(tcpip.FullAddress{Addr: tcpip.Address(p)}, func() *syserr.Error { 203 // Is it abstract? 204 if p[0] == 0 { 205 if t.IsNetworkNamespaced() { 206 return syserr.ErrInvalidEndpointState 207 } 208 asn := t.AbstractSockets() 209 name := p[1:] 210 if err := asn.Bind(t, name, bep, s); err != nil { 211 // syserr.ErrPortInUse corresponds to EADDRINUSE. 212 return syserr.ErrPortInUse 213 } 214 s.abstractName = name 215 s.abstractNamespace = asn 216 } else { 217 path := fspath.Parse(p) 218 root := t.FSContext().RootDirectoryVFS2() 219 defer root.DecRef(t) 220 start := root 221 relPath := !path.Absolute 222 if relPath { 223 start = t.FSContext().WorkingDirectoryVFS2() 224 defer start.DecRef(t) 225 } 226 pop := vfs.PathOperation{ 227 Root: root, 228 Start: start, 229 Path: path, 230 } 231 stat, err := s.vfsfd.Stat(t, vfs.StatOptions{Mask: linux.STATX_MODE}) 232 if err != nil { 233 return syserr.FromError(err) 234 } 235 err = t.Kernel().VFS().MknodAt(t, t.Credentials(), &pop, &vfs.MknodOptions{ 236 // File permissions correspond to net/unix/af_unix.c:unix_bind. 237 Mode: linux.FileMode(linux.S_IFSOCK | uint(stat.Mode)&^t.FSContext().Umask()), 238 Endpoint: bep, 239 }) 240 if linuxerr.Equals(linuxerr.EEXIST, err) { 241 return syserr.ErrAddressInUse 242 } 243 return syserr.FromError(err) 244 } 245 246 return nil 247 }) 248 } 249 250 // Ioctl implements vfs.FileDescriptionImpl. 251 func (s *SocketVFS2) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { 252 return netstack.Ioctl(ctx, s.ep, uio, args) 253 } 254 255 // PRead implements vfs.FileDescriptionImpl. 256 func (s *SocketVFS2) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { 257 return 0, linuxerr.ESPIPE 258 } 259 260 // Read implements vfs.FileDescriptionImpl. 261 func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { 262 // All flags other than RWF_NOWAIT should be ignored. 263 // TODO(github.com/SagerNet/issue/2601): Support RWF_NOWAIT. 264 if opts.Flags != 0 { 265 return 0, syserror.EOPNOTSUPP 266 } 267 268 if dst.NumBytes() == 0 { 269 return 0, nil 270 } 271 r := &EndpointReader{ 272 Ctx: ctx, 273 Endpoint: s.ep, 274 NumRights: 0, 275 Peek: false, 276 From: nil, 277 } 278 n, err := dst.CopyOutFrom(ctx, r) 279 // Drop control messages. 280 r.Control.Release(ctx) 281 return n, err 282 } 283 284 // PWrite implements vfs.FileDescriptionImpl. 285 func (s *SocketVFS2) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { 286 return 0, linuxerr.ESPIPE 287 } 288 289 // Write implements vfs.FileDescriptionImpl. 290 func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { 291 // All flags other than RWF_NOWAIT should be ignored. 292 // TODO(github.com/SagerNet/issue/2601): Support RWF_NOWAIT. 293 if opts.Flags != 0 { 294 return 0, syserror.EOPNOTSUPP 295 } 296 297 t := kernel.TaskFromContext(ctx) 298 ctrl := control.New(t, s.ep, nil) 299 300 if src.NumBytes() == 0 { 301 nInt, err := s.ep.SendMsg(ctx, [][]byte{}, ctrl, nil) 302 return int64(nInt), err.ToError() 303 } 304 305 return src.CopyInTo(ctx, &EndpointWriter{ 306 Ctx: ctx, 307 Endpoint: s.ep, 308 Control: ctrl, 309 To: nil, 310 }) 311 } 312 313 // Readiness implements waiter.Waitable.Readiness. 314 func (s *SocketVFS2) Readiness(mask waiter.EventMask) waiter.EventMask { 315 return s.socketOpsCommon.Readiness(mask) 316 } 317 318 // EventRegister implements waiter.Waitable.EventRegister. 319 func (s *SocketVFS2) EventRegister(e *waiter.Entry, mask waiter.EventMask) { 320 s.socketOpsCommon.EventRegister(e, mask) 321 } 322 323 // EventUnregister implements waiter.Waitable.EventUnregister. 324 func (s *SocketVFS2) EventUnregister(e *waiter.Entry) { 325 s.socketOpsCommon.EventUnregister(e) 326 } 327 328 // SetSockOpt implements the linux syscall setsockopt(2) for sockets backed by 329 // a transport.Endpoint. 330 func (s *SocketVFS2) SetSockOpt(t *kernel.Task, level int, name int, optVal []byte) *syserr.Error { 331 return netstack.SetSockOpt(t, s, s.ep, level, name, optVal) 332 } 333 334 // providerVFS2 is a unix domain socket provider for VFS2. 335 type providerVFS2 struct{} 336 337 func (*providerVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *syserr.Error) { 338 // Check arguments. 339 if protocol != 0 && protocol != linux.AF_UNIX /* PF_UNIX */ { 340 return nil, syserr.ErrProtocolNotSupported 341 } 342 343 // Create the endpoint and socket. 344 var ep transport.Endpoint 345 switch stype { 346 case linux.SOCK_DGRAM, linux.SOCK_RAW: 347 ep = transport.NewConnectionless(t) 348 case linux.SOCK_SEQPACKET, linux.SOCK_STREAM: 349 ep = transport.NewConnectioned(t, stype, t.Kernel()) 350 default: 351 return nil, syserr.ErrInvalidArgument 352 } 353 354 f, err := NewSockfsFile(t, ep, stype) 355 if err != nil { 356 ep.Close(t) 357 return nil, err 358 } 359 return f, nil 360 } 361 362 // Pair creates a new pair of AF_UNIX connected sockets. 363 func (*providerVFS2) Pair(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *vfs.FileDescription, *syserr.Error) { 364 // Check arguments. 365 if protocol != 0 && protocol != linux.AF_UNIX /* PF_UNIX */ { 366 return nil, nil, syserr.ErrProtocolNotSupported 367 } 368 369 switch stype { 370 case linux.SOCK_STREAM, linux.SOCK_DGRAM, linux.SOCK_SEQPACKET, linux.SOCK_RAW: 371 // Ok 372 default: 373 return nil, nil, syserr.ErrInvalidArgument 374 } 375 376 // Create the endpoints and sockets. 377 ep1, ep2 := transport.NewPair(t, stype, t.Kernel()) 378 s1, err := NewSockfsFile(t, ep1, stype) 379 if err != nil { 380 ep1.Close(t) 381 ep2.Close(t) 382 return nil, nil, err 383 } 384 s2, err := NewSockfsFile(t, ep2, stype) 385 if err != nil { 386 s1.DecRef(t) 387 ep2.Close(t) 388 return nil, nil, err 389 } 390 391 return s1, s2, nil 392 }