github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/socket/unix/transport/host.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package transport 16 17 import ( 18 "fmt" 19 20 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 21 "github.com/MerlinKodo/gvisor/pkg/atomicbitops" 22 "github.com/MerlinKodo/gvisor/pkg/context" 23 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 24 "github.com/MerlinKodo/gvisor/pkg/fdnotifier" 25 "github.com/MerlinKodo/gvisor/pkg/log" 26 "github.com/MerlinKodo/gvisor/pkg/sync" 27 "github.com/MerlinKodo/gvisor/pkg/syserr" 28 "github.com/MerlinKodo/gvisor/pkg/tcpip" 29 "github.com/MerlinKodo/gvisor/pkg/unet" 30 "github.com/MerlinKodo/gvisor/pkg/waiter" 31 "golang.org/x/sys/unix" 32 ) 33 34 // SCMRights implements RightsControlMessage with host FDs. 35 type SCMRights struct { 36 FDs []int 37 } 38 39 // Clone implements RightsControlMessage.Clone. 40 func (c *SCMRights) Clone() RightsControlMessage { 41 // Host rights never need to be cloned. 42 return nil 43 } 44 45 // Release implements RightsControlMessage.Release. 46 func (c *SCMRights) Release(ctx context.Context) { 47 for _, fd := range c.FDs { 48 unix.Close(fd) 49 } 50 c.FDs = nil 51 } 52 53 // HostConnectedEndpoint is an implementation of ConnectedEndpoint and 54 // Receiver. It is backed by a host fd that was imported at sentry startup. 55 // This fd is shared with a hostfs inode, which retains ownership of it. 56 // 57 // HostConnectedEndpoint is saveable, since we expect that the host will 58 // provide the same fd upon restore. 59 // 60 // As of this writing, we only allow Unix sockets to be imported. 61 // 62 // +stateify savable 63 type HostConnectedEndpoint struct { 64 HostConnectedEndpointRefs 65 66 // mu protects fd below. 67 mu sync.RWMutex `state:"nosave"` 68 69 // fd is the host fd backing this endpoint. 70 fd int 71 72 // addr is the address at which this endpoint is bound. 73 addr string 74 75 // sndbuf is the size of the send buffer. 76 // 77 // N.B. When this is smaller than the host size, we present it via 78 // GetSockOpt and message splitting/rejection in SendMsg, but do not 79 // prevent lots of small messages from filling the real send buffer 80 // size on the host. 81 sndbuf atomicbitops.Int64 `state:"nosave"` 82 83 // stype is the type of Unix socket. 84 stype linux.SockType 85 } 86 87 // init performs initialization required for creating new 88 // HostConnectedEndpoints and for restoring them. 89 func (c *HostConnectedEndpoint) init() *syserr.Error { 90 c.InitRefs() 91 return c.initFromOptions() 92 } 93 94 func (c *HostConnectedEndpoint) initFromOptions() *syserr.Error { 95 family, err := unix.GetsockoptInt(c.fd, unix.SOL_SOCKET, unix.SO_DOMAIN) 96 if err != nil { 97 return syserr.FromError(err) 98 } 99 100 if family != unix.AF_UNIX { 101 // We only allow Unix sockets. 102 return syserr.ErrInvalidEndpointState 103 } 104 105 stype, err := unix.GetsockoptInt(c.fd, unix.SOL_SOCKET, unix.SO_TYPE) 106 if err != nil { 107 return syserr.FromError(err) 108 } 109 110 if err := unix.SetNonblock(c.fd, true); err != nil { 111 return syserr.FromError(err) 112 } 113 114 sndbuf, err := unix.GetsockoptInt(c.fd, unix.SOL_SOCKET, unix.SO_SNDBUF) 115 if err != nil { 116 return syserr.FromError(err) 117 } 118 119 c.stype = linux.SockType(stype) 120 c.sndbuf.Store(int64(sndbuf)) 121 122 return nil 123 } 124 125 // NewHostConnectedEndpoint creates a new HostConnectedEndpoint backed by a 126 // host fd imported at sentry startup. 127 // 128 // The caller is responsible for calling Init(). Additionally, Release needs to 129 // be called twice because HostConnectedEndpoint is both a Receiver and 130 // HostConnectedEndpoint. 131 func NewHostConnectedEndpoint(hostFD int, addr string) (*HostConnectedEndpoint, *syserr.Error) { 132 e := HostConnectedEndpoint{ 133 fd: hostFD, 134 addr: addr, 135 } 136 137 if err := e.init(); err != nil { 138 return nil, err 139 } 140 141 // HostConnectedEndpointRefs start off with a single reference. We need two. 142 e.IncRef() 143 return &e, nil 144 } 145 146 // SockType returns the underlying socket type. 147 func (c *HostConnectedEndpoint) SockType() linux.SockType { 148 return c.stype 149 } 150 151 // Send implements ConnectedEndpoint.Send. 152 func (c *HostConnectedEndpoint) Send(ctx context.Context, data [][]byte, controlMessages ControlMessages, from Address) (int64, bool, *syserr.Error) { 153 c.mu.RLock() 154 defer c.mu.RUnlock() 155 156 if !controlMessages.Empty() { 157 return 0, false, syserr.ErrInvalidEndpointState 158 } 159 160 // Since stream sockets don't preserve message boundaries, we can write 161 // only as much of the message as fits in the send buffer. 162 truncate := c.stype == linux.SOCK_STREAM 163 164 n, totalLen, err := fdWriteVec(c.fd, data, c.SendMaxQueueSize(), truncate) 165 if n < totalLen && err == nil { 166 // The host only returns a short write if it would otherwise 167 // block (and only for stream sockets). 168 err = linuxerr.EAGAIN 169 } 170 if n > 0 && !linuxerr.Equals(linuxerr.EAGAIN, err) { 171 // The caller may need to block to send more data, but 172 // otherwise there isn't anything that can be done about an 173 // error with a partial write. 174 err = nil 175 } 176 177 // There is no need for the callee to call SendNotify because fdWriteVec 178 // uses the host's sendmsg(2) and the host kernel's queue. 179 return n, false, syserr.FromError(err) 180 } 181 182 // SendNotify implements ConnectedEndpoint.SendNotify. 183 func (c *HostConnectedEndpoint) SendNotify() {} 184 185 // CloseSend implements ConnectedEndpoint.CloseSend. 186 func (c *HostConnectedEndpoint) CloseSend() { 187 c.mu.Lock() 188 defer c.mu.Unlock() 189 190 if err := unix.Shutdown(c.fd, unix.SHUT_WR); err != nil { 191 // A well-formed UDS shutdown can't fail. See 192 // net/unix/af_unix.c:unix_shutdown. 193 panic(fmt.Sprintf("failed write shutdown on host socket %+v: %v", c, err)) 194 } 195 } 196 197 // CloseNotify implements ConnectedEndpoint.CloseNotify. 198 func (c *HostConnectedEndpoint) CloseNotify() {} 199 200 // Writable implements ConnectedEndpoint.Writable. 201 func (c *HostConnectedEndpoint) Writable() bool { 202 c.mu.RLock() 203 defer c.mu.RUnlock() 204 205 return fdnotifier.NonBlockingPoll(int32(c.fd), waiter.WritableEvents)&waiter.WritableEvents != 0 206 } 207 208 // Passcred implements ConnectedEndpoint.Passcred. 209 func (c *HostConnectedEndpoint) Passcred() bool { 210 // We don't support credential passing for host sockets. 211 return false 212 } 213 214 // GetLocalAddress implements ConnectedEndpoint.GetLocalAddress. 215 func (c *HostConnectedEndpoint) GetLocalAddress() (Address, tcpip.Error) { 216 return Address{Addr: c.addr}, nil 217 } 218 219 // EventUpdate implements ConnectedEndpoint.EventUpdate. 220 func (c *HostConnectedEndpoint) EventUpdate() error { 221 c.mu.RLock() 222 defer c.mu.RUnlock() 223 if c.fd != -1 { 224 if err := fdnotifier.UpdateFD(int32(c.fd)); err != nil { 225 return err 226 } 227 } 228 return nil 229 } 230 231 // Recv implements Receiver.Recv. 232 func (c *HostConnectedEndpoint) Recv(ctx context.Context, data [][]byte, creds bool, numRights int, peek bool) (int64, int64, ControlMessages, bool, Address, bool, *syserr.Error) { 233 c.mu.RLock() 234 defer c.mu.RUnlock() 235 236 var cm unet.ControlMessage 237 if numRights > 0 { 238 cm.EnableFDs(int(numRights)) 239 } 240 241 // N.B. Unix sockets don't have a receive buffer, the send buffer 242 // serves both purposes. 243 rl, ml, cl, cTrunc, err := fdReadVec(c.fd, data, []byte(cm), peek, c.RecvMaxQueueSize()) 244 if rl > 0 && err != nil { 245 // We got some data, so all we need to do on error is return 246 // the data that we got. Short reads are fine, no need to 247 // block. 248 err = nil 249 } 250 if err != nil { 251 return 0, 0, ControlMessages{}, false, Address{}, false, syserr.FromError(err) 252 } 253 254 // There is no need for the callee to call RecvNotify because fdReadVec uses 255 // the host's recvmsg(2) and the host kernel's queue. 256 257 // Trim the control data if we received less than the full amount. 258 if cl < uint64(len(cm)) { 259 cm = cm[:cl] 260 } 261 262 // Avoid extra allocations in the case where there isn't any control data. 263 if len(cm) == 0 { 264 return rl, ml, ControlMessages{}, cTrunc, Address{Addr: c.addr}, false, nil 265 } 266 267 fds, err := cm.ExtractFDs() 268 if err != nil { 269 return 0, 0, ControlMessages{}, false, Address{}, false, syserr.FromError(err) 270 } 271 272 if len(fds) == 0 { 273 return rl, ml, ControlMessages{}, cTrunc, Address{Addr: c.addr}, false, nil 274 } 275 return rl, ml, ControlMessages{Rights: &SCMRights{fds}}, cTrunc, Address{Addr: c.addr}, false, nil 276 } 277 278 // RecvNotify implements Receiver.RecvNotify. 279 func (c *HostConnectedEndpoint) RecvNotify() {} 280 281 // CloseRecv implements Receiver.CloseRecv. 282 func (c *HostConnectedEndpoint) CloseRecv() { 283 c.mu.Lock() 284 defer c.mu.Unlock() 285 286 if err := unix.Shutdown(c.fd, unix.SHUT_RD); err != nil { 287 // A well-formed UDS shutdown can't fail. See 288 // net/unix/af_unix.c:unix_shutdown. 289 panic(fmt.Sprintf("failed read shutdown on host socket %+v: %v", c, err)) 290 } 291 } 292 293 // Readable implements Receiver.Readable. 294 func (c *HostConnectedEndpoint) Readable() bool { 295 c.mu.RLock() 296 defer c.mu.RUnlock() 297 298 return fdnotifier.NonBlockingPoll(int32(c.fd), waiter.ReadableEvents)&waiter.ReadableEvents != 0 299 } 300 301 // SendQueuedSize implements Receiver.SendQueuedSize. 302 func (c *HostConnectedEndpoint) SendQueuedSize() int64 { 303 // TODO(gvisor.dev/issue/273): SendQueuedSize isn't supported for host 304 // sockets because we don't allow the sentry to call ioctl(2). 305 return -1 306 } 307 308 // RecvQueuedSize implements Receiver.RecvQueuedSize. 309 func (c *HostConnectedEndpoint) RecvQueuedSize() int64 { 310 // TODO(gvisor.dev/issue/273): RecvQueuedSize isn't supported for host 311 // sockets because we don't allow the sentry to call ioctl(2). 312 return -1 313 } 314 315 // SendMaxQueueSize implements Receiver.SendMaxQueueSize. 316 func (c *HostConnectedEndpoint) SendMaxQueueSize() int64 { 317 return c.sndbuf.Load() 318 } 319 320 // RecvMaxQueueSize implements Receiver.RecvMaxQueueSize. 321 func (c *HostConnectedEndpoint) RecvMaxQueueSize() int64 { 322 // N.B. Unix sockets don't use the receive buffer. We'll claim it is 323 // the same size as the send buffer. 324 return c.sndbuf.Load() 325 } 326 327 func (c *HostConnectedEndpoint) destroyLocked() { 328 c.fd = -1 329 } 330 331 // Release implements ConnectedEndpoint.Release and Receiver.Release. 332 func (c *HostConnectedEndpoint) Release(ctx context.Context) { 333 c.DecRef(func() { 334 c.mu.Lock() 335 c.destroyLocked() 336 c.mu.Unlock() 337 }) 338 } 339 340 // CloseUnread implements ConnectedEndpoint.CloseUnread. 341 func (c *HostConnectedEndpoint) CloseUnread() {} 342 343 // SetSendBufferSize implements ConnectedEndpoint.SetSendBufferSize. 344 func (c *HostConnectedEndpoint) SetSendBufferSize(v int64) (newSz int64) { 345 // gVisor does not permit setting of SO_SNDBUF for host backed unix 346 // domain sockets. 347 return c.sndbuf.Load() 348 } 349 350 // SetReceiveBufferSize implements ConnectedEndpoint.SetReceiveBufferSize. 351 func (c *HostConnectedEndpoint) SetReceiveBufferSize(v int64) (newSz int64) { 352 // gVisor does not permit setting of SO_RCVBUF for host backed unix 353 // domain sockets. Receive buffer does not have any effect for unix 354 // sockets and we claim to be the same as send buffer. 355 return c.sndbuf.Load() 356 } 357 358 // SCMConnectedEndpoint represents an endpoint backed by a host fd that was 359 // passed through a gofer Unix socket. It resembles HostConnectedEndpoint, with the 360 // following differences: 361 // - SCMConnectedEndpoint is not saveable, because the host cannot guarantee 362 // the same descriptor number across S/R. 363 // - SCMConnectedEndpoint holds ownership of its fd and notification queue. 364 type SCMConnectedEndpoint struct { 365 HostConnectedEndpoint 366 367 queue *waiter.Queue 368 } 369 370 // Init will do the initialization required without holding other locks. 371 func (e *SCMConnectedEndpoint) Init() error { 372 return fdnotifier.AddFD(int32(e.fd), e.queue) 373 } 374 375 // Release implements ConnectedEndpoint.Release and Receiver.Release. 376 func (e *SCMConnectedEndpoint) Release(ctx context.Context) { 377 e.DecRef(func() { 378 e.mu.Lock() 379 fdnotifier.RemoveFD(int32(e.fd)) 380 if err := unix.Close(e.fd); err != nil { 381 log.Warningf("Failed to close host fd %d: %v", err) 382 } 383 e.destroyLocked() 384 e.mu.Unlock() 385 }) 386 } 387 388 // NewSCMEndpoint creates a new SCMConnectedEndpoint backed by a host fd that 389 // was passed through a Unix socket. 390 // 391 // The caller is responsible for calling Init(). Additionaly, Release needs to 392 // be called twice because ConnectedEndpoint is both a Receiver and 393 // ConnectedEndpoint. 394 func NewSCMEndpoint(hostFD int, queue *waiter.Queue, addr string) (*SCMConnectedEndpoint, *syserr.Error) { 395 e := SCMConnectedEndpoint{ 396 HostConnectedEndpoint: HostConnectedEndpoint{ 397 fd: hostFD, 398 addr: addr, 399 }, 400 queue: queue, 401 } 402 403 if err := e.init(); err != nil { 404 return nil, err 405 } 406 407 // e starts off with a single reference. We need two. 408 e.IncRef() 409 return &e, nil 410 }