github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/socket/unix/transport/connectioned.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package transport 16 17 import ( 18 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 19 "github.com/MerlinKodo/gvisor/pkg/context" 20 "github.com/MerlinKodo/gvisor/pkg/fdnotifier" 21 "github.com/MerlinKodo/gvisor/pkg/sentry/uniqueid" 22 "github.com/MerlinKodo/gvisor/pkg/syserr" 23 "github.com/MerlinKodo/gvisor/pkg/tcpip" 24 "github.com/MerlinKodo/gvisor/pkg/waiter" 25 "golang.org/x/sys/unix" 26 ) 27 28 type locker interface { 29 Lock() 30 Unlock() 31 NestedLock(endpointlockNameIndex) 32 NestedUnlock(endpointlockNameIndex) 33 } 34 35 // A ConnectingEndpoint is a connectioned unix endpoint that is attempting to 36 // establish a bidirectional connection with a BoundEndpoint. 37 type ConnectingEndpoint interface { 38 // ID returns the endpoint's globally unique identifier. This identifier 39 // must be used to determine locking order if more than one endpoint is 40 // to be locked in the same codepath. The endpoint with the smaller 41 // identifier must be locked before endpoints with larger identifiers. 42 ID() uint64 43 44 // Passcred implements socket.Credentialer.Passcred. 45 Passcred() bool 46 47 // Type returns the socket type, typically either SockStream or 48 // SockSeqpacket. The connection attempt must be aborted if this 49 // value doesn't match the BoundEndpoint's type. 50 Type() linux.SockType 51 52 // GetLocalAddress returns the bound path. 53 GetLocalAddress() (Address, tcpip.Error) 54 55 // Locker protects the following methods. While locked, only the holder of 56 // the lock can change the return value of the protected methods. 57 locker 58 59 // Connected returns true iff the ConnectingEndpoint is in the connected 60 // state. ConnectingEndpoints can only be connected to a single endpoint, 61 // so the connection attempt must be aborted if this returns true. 62 Connected() bool 63 64 // ListeningLocked returns true iff the ConnectingEndpoint is in the 65 // listening state. ConnectingEndpoints cannot make connections while 66 // listening, so the connection attempt must be aborted if this returns 67 // true. 68 ListeningLocked() bool 69 70 // WaiterQueue returns a pointer to the endpoint's waiter queue. 71 WaiterQueue() *waiter.Queue 72 } 73 74 // connectionedEndpoint is a Unix-domain connected or connectable endpoint and implements 75 // ConnectingEndpoint, BoundEndpoint and tcpip.Endpoint. 76 // 77 // connectionedEndpoints must be in connected state in order to transfer data. 78 // 79 // This implementation includes STREAM and SEQPACKET Unix sockets created with 80 // socket(2), accept(2) or socketpair(2) and dgram unix sockets created with 81 // socketpair(2). See unix_connectionless.go for the implementation of DGRAM 82 // Unix sockets created with socket(2). 83 // 84 // The state is much simpler than a TCP endpoint, so it is not encoded 85 // explicitly. Instead we enforce the following invariants: 86 // 87 // receiver != nil, connected != nil => connected. 88 // path != "" && acceptedChan == nil => bound, not listening. 89 // path != "" && acceptedChan != nil => bound and listening. 90 // 91 // Only one of these will be true at any moment. 92 // 93 // +stateify savable 94 type connectionedEndpoint struct { 95 baseEndpoint 96 97 // id is the unique endpoint identifier. This is used exclusively for 98 // lock ordering within connect. 99 id uint64 100 101 // idGenerator is used to generate new unique endpoint identifiers. 102 idGenerator uniqueid.Provider 103 104 // stype is used by connecting sockets to ensure that they are the 105 // same type. The value is typically either tcpip.SockSeqpacket or 106 // tcpip.SockStream. 107 stype linux.SockType 108 109 // acceptedChan is per the TCP endpoint implementation. Note that the 110 // sockets in this channel are _already in the connected state_, and 111 // have another associated connectionedEndpoint. 112 // 113 // If nil, then no listen call has been made. 114 acceptedChan chan *connectionedEndpoint `state:".([]*connectionedEndpoint)"` 115 116 // boundSocketFD corresponds to a bound socket on the host filesystem 117 // that may listen and accept incoming connections. 118 // 119 // boundSocketFD is protected by baseEndpoint.mu. 120 boundSocketFD BoundSocketFD 121 } 122 123 var ( 124 _ = BoundEndpoint((*connectionedEndpoint)(nil)) 125 _ = Endpoint((*connectionedEndpoint)(nil)) 126 ) 127 128 // NewConnectioned creates a new unbound connectionedEndpoint. 129 func NewConnectioned(ctx context.Context, stype linux.SockType, uid uniqueid.Provider) Endpoint { 130 return newConnectioned(ctx, stype, uid) 131 } 132 133 func newConnectioned(ctx context.Context, stype linux.SockType, uid uniqueid.Provider) *connectionedEndpoint { 134 ep := &connectionedEndpoint{ 135 baseEndpoint: baseEndpoint{Queue: &waiter.Queue{}}, 136 id: uid.UniqueID(), 137 idGenerator: uid, 138 stype: stype, 139 } 140 141 ep.ops.InitHandler(ep, &stackHandler{}, getSendBufferLimits, getReceiveBufferLimits) 142 ep.ops.SetSendBufferSize(defaultBufferSize, false /* notify */) 143 ep.ops.SetReceiveBufferSize(defaultBufferSize, false /* notify */) 144 return ep 145 } 146 147 // NewPair allocates a new pair of connected unix-domain connectionedEndpoints. 148 func NewPair(ctx context.Context, stype linux.SockType, uid uniqueid.Provider) (Endpoint, Endpoint) { 149 a := newConnectioned(ctx, stype, uid) 150 b := newConnectioned(ctx, stype, uid) 151 152 q1 := &queue{ReaderQueue: a.Queue, WriterQueue: b.Queue, limit: defaultBufferSize} 153 q1.InitRefs() 154 q2 := &queue{ReaderQueue: b.Queue, WriterQueue: a.Queue, limit: defaultBufferSize} 155 q2.InitRefs() 156 157 if stype == linux.SOCK_STREAM { 158 a.receiver = &streamQueueReceiver{queueReceiver: queueReceiver{q1}} 159 b.receiver = &streamQueueReceiver{queueReceiver: queueReceiver{q2}} 160 } else { 161 a.receiver = &queueReceiver{q1} 162 b.receiver = &queueReceiver{q2} 163 } 164 165 q2.IncRef() 166 a.connected = &connectedEndpoint{ 167 endpoint: b, 168 writeQueue: q2, 169 } 170 q1.IncRef() 171 b.connected = &connectedEndpoint{ 172 endpoint: a, 173 writeQueue: q1, 174 } 175 176 return a, b 177 } 178 179 // NewExternal creates a new externally backed Endpoint. It behaves like a 180 // socketpair. 181 func NewExternal(stype linux.SockType, uid uniqueid.Provider, queue *waiter.Queue, receiver Receiver, connected ConnectedEndpoint) Endpoint { 182 ep := &connectionedEndpoint{ 183 baseEndpoint: baseEndpoint{Queue: queue, receiver: receiver, connected: connected}, 184 id: uid.UniqueID(), 185 idGenerator: uid, 186 stype: stype, 187 } 188 ep.ops.InitHandler(ep, &stackHandler{}, getSendBufferLimits, getReceiveBufferLimits) 189 ep.ops.SetSendBufferSize(connected.SendMaxQueueSize(), false /* notify */) 190 ep.ops.SetReceiveBufferSize(defaultBufferSize, false /* notify */) 191 return ep 192 } 193 194 // ID implements ConnectingEndpoint.ID. 195 func (e *connectionedEndpoint) ID() uint64 { 196 return e.id 197 } 198 199 // Type implements ConnectingEndpoint.Type and Endpoint.Type. 200 func (e *connectionedEndpoint) Type() linux.SockType { 201 return e.stype 202 } 203 204 // WaiterQueue implements ConnectingEndpoint.WaiterQueue. 205 func (e *connectionedEndpoint) WaiterQueue() *waiter.Queue { 206 return e.Queue 207 } 208 209 // isBound returns true iff the connectionedEndpoint is bound (but not 210 // listening). 211 func (e *connectionedEndpoint) isBound() bool { 212 return e.path != "" && e.acceptedChan == nil 213 } 214 215 // Listening implements ConnectingEndpoint.Listening. 216 func (e *connectionedEndpoint) Listening() bool { 217 e.Lock() 218 defer e.Unlock() 219 return e.ListeningLocked() 220 } 221 222 func (e *connectionedEndpoint) ListeningLocked() bool { 223 return e.acceptedChan != nil 224 } 225 226 // Close puts the connectionedEndpoint in a closed state and frees all 227 // resources associated with it. 228 // 229 // The socket will be a fresh state after a call to close and may be reused. 230 // That is, close may be used to "unbind" or "disconnect" the socket in error 231 // paths. 232 func (e *connectionedEndpoint) Close(ctx context.Context) { 233 var acceptedChan chan *connectionedEndpoint 234 e.Lock() 235 var ( 236 c ConnectedEndpoint 237 r Receiver 238 ) 239 switch { 240 case e.Connected(): 241 e.connected.CloseSend() 242 e.receiver.CloseRecv() 243 // Still have unread data? If yes, we set this into the write 244 // end so that the peer can get ECONNRESET) when it does read. 245 if e.receiver.RecvQueuedSize() > 0 { 246 e.connected.CloseUnread() 247 } 248 c = e.connected 249 r = e.receiver 250 e.connected = nil 251 e.receiver = nil 252 case e.isBound(): 253 e.path = "" 254 case e.ListeningLocked(): 255 close(e.acceptedChan) 256 acceptedChan = e.acceptedChan 257 e.acceptedChan = nil 258 e.path = "" 259 } 260 e.Unlock() 261 if acceptedChan != nil { 262 for n := range acceptedChan { 263 n.Close(ctx) 264 } 265 } 266 if c != nil { 267 c.CloseNotify() 268 c.Release(ctx) 269 } 270 e.ResetBoundSocketFD(ctx) 271 if r != nil { 272 r.CloseNotify() 273 r.Release(ctx) 274 } 275 } 276 277 // BidirectionalConnect implements BoundEndpoint.BidirectionalConnect. 278 func (e *connectionedEndpoint) BidirectionalConnect(ctx context.Context, ce ConnectingEndpoint, returnConnect func(Receiver, ConnectedEndpoint)) *syserr.Error { 279 if ce.Type() != e.stype { 280 return syserr.ErrWrongProtocolForSocket 281 } 282 283 // Check if ce is e to avoid a deadlock. 284 if ce, ok := ce.(*connectionedEndpoint); ok && ce == e { 285 return syserr.ErrInvalidEndpointState 286 } 287 288 // Do a dance to safely acquire locks on both endpoints. 289 if e.id < ce.ID() { 290 e.Lock() 291 ce.NestedLock(endpointLockHigherid) 292 } else { 293 ce.Lock() 294 e.NestedLock(endpointLockHigherid) 295 } 296 297 // Check connecting state. 298 if ce.Connected() { 299 e.NestedUnlock(endpointLockHigherid) 300 ce.Unlock() 301 return syserr.ErrAlreadyConnected 302 } 303 if ce.ListeningLocked() { 304 e.NestedUnlock(endpointLockHigherid) 305 ce.Unlock() 306 return syserr.ErrInvalidEndpointState 307 } 308 309 // Check bound state. 310 if !e.ListeningLocked() { 311 e.NestedUnlock(endpointLockHigherid) 312 ce.Unlock() 313 return syserr.ErrConnectionRefused 314 } 315 316 // Create a newly bound connectionedEndpoint. 317 ne := &connectionedEndpoint{ 318 baseEndpoint: baseEndpoint{ 319 path: e.path, 320 Queue: &waiter.Queue{}, 321 }, 322 id: e.idGenerator.UniqueID(), 323 idGenerator: e.idGenerator, 324 stype: e.stype, 325 } 326 ne.ops.InitHandler(ne, &stackHandler{}, getSendBufferLimits, getReceiveBufferLimits) 327 ne.ops.SetSendBufferSize(defaultBufferSize, false /* notify */) 328 ne.ops.SetReceiveBufferSize(defaultBufferSize, false /* notify */) 329 ne.SocketOptions().SetPassCred(e.SocketOptions().GetPassCred()) 330 331 readQueue := &queue{ReaderQueue: ce.WaiterQueue(), WriterQueue: ne.Queue, limit: defaultBufferSize} 332 readQueue.InitRefs() 333 ne.connected = &connectedEndpoint{ 334 endpoint: ce, 335 writeQueue: readQueue, 336 } 337 338 // Make sure the accepted endpoint inherits this listening socket's SO_SNDBUF. 339 writeQueue := &queue{ReaderQueue: ne.Queue, WriterQueue: ce.WaiterQueue(), limit: e.ops.GetSendBufferSize()} 340 writeQueue.InitRefs() 341 if e.stype == linux.SOCK_STREAM { 342 ne.receiver = &streamQueueReceiver{queueReceiver: queueReceiver{readQueue: writeQueue}} 343 } else { 344 ne.receiver = &queueReceiver{readQueue: writeQueue} 345 } 346 347 select { 348 case e.acceptedChan <- ne: 349 // Commit state. 350 writeQueue.IncRef() 351 connected := &connectedEndpoint{ 352 endpoint: ne, 353 writeQueue: writeQueue, 354 } 355 readQueue.IncRef() 356 if e.stype == linux.SOCK_STREAM { 357 returnConnect(&streamQueueReceiver{queueReceiver: queueReceiver{readQueue: readQueue}}, connected) 358 } else { 359 returnConnect(&queueReceiver{readQueue: readQueue}, connected) 360 } 361 362 // Notify can deadlock if we are holding these locks. 363 e.NestedUnlock(endpointLockHigherid) 364 ce.Unlock() 365 366 // Notify on both ends. 367 e.Notify(waiter.ReadableEvents) 368 ce.WaiterQueue().Notify(waiter.WritableEvents) 369 370 return nil 371 default: 372 // Busy; return EAGAIN per spec. 373 e.NestedUnlock(endpointLockHigherid) 374 ce.Unlock() 375 ne.Close(ctx) 376 return syserr.ErrTryAgain 377 } 378 } 379 380 // UnidirectionalConnect implements BoundEndpoint.UnidirectionalConnect. 381 func (e *connectionedEndpoint) UnidirectionalConnect(ctx context.Context) (ConnectedEndpoint, *syserr.Error) { 382 return nil, syserr.ErrConnectionRefused 383 } 384 385 // Connect attempts to directly connect to another Endpoint. 386 // Implements Endpoint.Connect. 387 func (e *connectionedEndpoint) Connect(ctx context.Context, server BoundEndpoint) *syserr.Error { 388 returnConnect := func(r Receiver, ce ConnectedEndpoint) { 389 e.receiver = r 390 e.connected = ce 391 // Make sure the newly created connected endpoint's write queue is updated 392 // to reflect this endpoint's send buffer size. 393 if bufSz := e.connected.SetSendBufferSize(e.ops.GetSendBufferSize()); bufSz != e.ops.GetSendBufferSize() { 394 e.ops.SetSendBufferSize(bufSz, false /* notify */) 395 e.ops.SetReceiveBufferSize(bufSz, false /* notify */) 396 } 397 } 398 399 return server.BidirectionalConnect(ctx, e, returnConnect) 400 } 401 402 // Listen starts listening on the connection. 403 func (e *connectionedEndpoint) Listen(ctx context.Context, backlog int) *syserr.Error { 404 e.Lock() 405 defer e.Unlock() 406 if e.ListeningLocked() { 407 // Adjust the size of the channel iff we can fix existing 408 // pending connections into the new one. 409 if len(e.acceptedChan) > backlog { 410 return syserr.ErrInvalidEndpointState 411 } 412 origChan := e.acceptedChan 413 e.acceptedChan = make(chan *connectionedEndpoint, backlog) 414 close(origChan) 415 for ep := range origChan { 416 e.acceptedChan <- ep 417 } 418 if e.boundSocketFD != nil { 419 if err := e.boundSocketFD.Listen(ctx, int32(backlog)); err != nil { 420 return syserr.FromError(err) 421 } 422 } 423 return nil 424 } 425 if !e.isBound() { 426 return syserr.ErrInvalidEndpointState 427 } 428 429 // Normal case. 430 e.acceptedChan = make(chan *connectionedEndpoint, backlog) 431 if e.boundSocketFD != nil { 432 if err := e.boundSocketFD.Listen(ctx, int32(backlog)); err != nil { 433 return syserr.FromError(err) 434 } 435 } 436 437 return nil 438 } 439 440 // Accept accepts a new connection. 441 func (e *connectionedEndpoint) Accept(ctx context.Context, peerAddr *Address) (Endpoint, *syserr.Error) { 442 e.Lock() 443 444 if !e.ListeningLocked() { 445 e.Unlock() 446 return nil, syserr.ErrInvalidEndpointState 447 } 448 449 ne, err := e.getAcceptedEndpointLocked(ctx) 450 e.Unlock() 451 if err != nil { 452 return nil, err 453 } 454 455 if peerAddr != nil { 456 ne.Lock() 457 c := ne.connected 458 ne.Unlock() 459 if c != nil { 460 addr, err := c.GetLocalAddress() 461 if err != nil { 462 return nil, syserr.TranslateNetstackError(err) 463 } 464 *peerAddr = addr 465 } 466 } 467 return ne, nil 468 } 469 470 // Preconditions: 471 // - e.Listening() 472 // - e is locked. 473 func (e *connectionedEndpoint) getAcceptedEndpointLocked(ctx context.Context) (*connectionedEndpoint, *syserr.Error) { 474 // Accept connections from within the sentry first, since this avoids 475 // an RPC to the gofer on the common path. 476 select { 477 case ne := <-e.acceptedChan: 478 return ne, nil 479 default: 480 // No internal connections. 481 } 482 483 if e.boundSocketFD == nil { 484 return nil, syserr.ErrWouldBlock 485 } 486 487 // Check for external connections. 488 nfd, err := e.boundSocketFD.Accept(ctx) 489 if err == unix.EWOULDBLOCK { 490 return nil, syserr.ErrWouldBlock 491 } 492 if err != nil { 493 return nil, syserr.FromError(err) 494 } 495 q := &waiter.Queue{} 496 scme, serr := NewSCMEndpoint(nfd, q, e.path) 497 if serr != nil { 498 unix.Close(nfd) 499 return nil, serr 500 } 501 scme.Init() 502 return NewExternal(e.stype, e.idGenerator, q, scme, scme).(*connectionedEndpoint), nil 503 504 } 505 506 // Bind binds the connection. 507 // 508 // For Unix connectionedEndpoints, this _only sets the address associated with 509 // the socket_. Work associated with sockets in the filesystem or finding those 510 // sockets must be done by a higher level. 511 // 512 // Bind will fail only if the socket is connected, bound or the passed address 513 // is invalid (the empty string). 514 func (e *connectionedEndpoint) Bind(addr Address) *syserr.Error { 515 e.Lock() 516 defer e.Unlock() 517 if e.isBound() || e.ListeningLocked() { 518 return syserr.ErrAlreadyBound 519 } 520 if addr.Addr == "" { 521 // The empty string is not permitted. 522 return syserr.ErrBadLocalAddress 523 } 524 525 // Save the bound address. 526 e.path = addr.Addr 527 return nil 528 } 529 530 // SendMsg writes data and a control message to the endpoint's peer. 531 // This method does not block if the data cannot be written. 532 func (e *connectionedEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (int64, func(), *syserr.Error) { 533 // Stream sockets do not support specifying the endpoint. Seqpacket 534 // sockets ignore the passed endpoint. 535 if e.stype == linux.SOCK_STREAM && to != nil { 536 return 0, nil, syserr.ErrNotSupported 537 } 538 return e.baseEndpoint.SendMsg(ctx, data, c, to) 539 } 540 541 func (e *connectionedEndpoint) isBoundSocketReadable() bool { 542 if e.boundSocketFD == nil { 543 return false 544 } 545 return fdnotifier.NonBlockingPoll(e.boundSocketFD.NotificationFD(), waiter.ReadableEvents)&waiter.ReadableEvents != 0 546 } 547 548 // Readiness returns the current readiness of the connectionedEndpoint. For 549 // example, if waiter.EventIn is set, the connectionedEndpoint is immediately 550 // readable. 551 func (e *connectionedEndpoint) Readiness(mask waiter.EventMask) waiter.EventMask { 552 e.Lock() 553 defer e.Unlock() 554 555 ready := waiter.EventMask(0) 556 switch { 557 case e.Connected(): 558 if mask&waiter.ReadableEvents != 0 && e.receiver.Readable() { 559 ready |= waiter.ReadableEvents 560 } 561 if mask&waiter.WritableEvents != 0 && e.connected.Writable() { 562 ready |= waiter.WritableEvents 563 } 564 case e.ListeningLocked(): 565 if mask&waiter.ReadableEvents != 0 && (len(e.acceptedChan) > 0 || e.isBoundSocketReadable()) { 566 ready |= waiter.ReadableEvents 567 } 568 } 569 570 return ready 571 } 572 573 // State implements socket.Socket.State. 574 func (e *connectionedEndpoint) State() uint32 { 575 e.Lock() 576 defer e.Unlock() 577 578 if e.Connected() { 579 return linux.SS_CONNECTED 580 } 581 return linux.SS_UNCONNECTED 582 } 583 584 // OnSetSendBufferSize implements tcpip.SocketOptionsHandler.OnSetSendBufferSize. 585 func (e *connectionedEndpoint) OnSetSendBufferSize(v int64) (newSz int64) { 586 e.Lock() 587 defer e.Unlock() 588 if e.Connected() { 589 return e.baseEndpoint.connected.SetSendBufferSize(v) 590 } 591 return v 592 } 593 594 // WakeupWriters implements tcpip.SocketOptionsHandler.WakeupWriters. 595 func (e *connectionedEndpoint) WakeupWriters() {} 596 597 // SetBoundSocketFD implement HostBountEndpoint.SetBoundSocketFD. 598 func (e *connectionedEndpoint) SetBoundSocketFD(ctx context.Context, bsFD BoundSocketFD) error { 599 e.Lock() 600 defer e.Unlock() 601 if e.path != "" || e.boundSocketFD != nil { 602 bsFD.Close(ctx) 603 return syserr.ErrAlreadyBound.ToError() 604 } 605 e.boundSocketFD = bsFD 606 fdnotifier.AddFD(bsFD.NotificationFD(), e.Queue) 607 return nil 608 } 609 610 // SetBoundSocketFD implement HostBountEndpoint.ResetBoundSocketFD. 611 func (e *connectionedEndpoint) ResetBoundSocketFD(ctx context.Context) { 612 e.Lock() 613 defer e.Unlock() 614 if e.boundSocketFD == nil { 615 return 616 } 617 fdnotifier.RemoveFD(e.boundSocketFD.NotificationFD()) 618 e.boundSocketFD.Close(ctx) 619 e.boundSocketFD = nil 620 } 621 622 // EventRegister implements waiter.Waitable.EventRegister. 623 func (e *connectionedEndpoint) EventRegister(we *waiter.Entry) error { 624 if err := e.baseEndpoint.EventRegister(we); err != nil { 625 return err 626 } 627 628 e.Lock() 629 bsFD := e.boundSocketFD 630 e.Unlock() 631 if bsFD != nil { 632 fdnotifier.UpdateFD(bsFD.NotificationFD()) 633 } 634 return nil 635 } 636 637 // EventUnregister implements waiter.Waitable.EventUnregister. 638 func (e *connectionedEndpoint) EventUnregister(we *waiter.Entry) { 639 e.baseEndpoint.EventUnregister(we) 640 641 e.Lock() 642 bsFD := e.boundSocketFD 643 e.Unlock() 644 if bsFD != nil { 645 fdnotifier.UpdateFD(bsFD.NotificationFD()) 646 } 647 }