github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/syscalls/linux/vfs2/socket.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package vfs2 16 17 import ( 18 "time" 19 20 "github.com/SagerNet/gvisor/pkg/abi/linux" 21 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 22 "github.com/SagerNet/gvisor/pkg/marshal" 23 "github.com/SagerNet/gvisor/pkg/marshal/primitive" 24 "github.com/SagerNet/gvisor/pkg/sentry/arch" 25 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 26 ktime "github.com/SagerNet/gvisor/pkg/sentry/kernel/time" 27 "github.com/SagerNet/gvisor/pkg/sentry/socket" 28 "github.com/SagerNet/gvisor/pkg/sentry/socket/control" 29 "github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport" 30 slinux "github.com/SagerNet/gvisor/pkg/sentry/syscalls/linux" 31 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 32 "github.com/SagerNet/gvisor/pkg/syserr" 33 "github.com/SagerNet/gvisor/pkg/syserror" 34 "github.com/SagerNet/gvisor/pkg/usermem" 35 36 "github.com/SagerNet/gvisor/pkg/hostarch" 37 ) 38 39 // maxAddrLen is the maximum socket address length we're willing to accept. 40 const maxAddrLen = 200 41 42 // maxOptLen is the maximum sockopt parameter length we're willing to accept. 43 const maxOptLen = 1024 * 8 44 45 // maxControlLen is the maximum length of the msghdr.msg_control buffer we're 46 // willing to accept. Note that this limit is smaller than Linux, which allows 47 // buffers upto INT_MAX. 48 const maxControlLen = 10 * 1024 * 1024 49 50 // maxListenBacklog is the maximum limit of listen backlog supported. 51 const maxListenBacklog = 1024 52 53 // nameLenOffset is the offset from the start of the MessageHeader64 struct to 54 // the NameLen field. 55 const nameLenOffset = 8 56 57 // controlLenOffset is the offset form the start of the MessageHeader64 struct 58 // to the ControlLen field. 59 const controlLenOffset = 40 60 61 // flagsOffset is the offset form the start of the MessageHeader64 struct 62 // to the Flags field. 63 const flagsOffset = 48 64 65 const sizeOfInt32 = 4 66 67 // messageHeader64Len is the length of a MessageHeader64 struct. 68 var messageHeader64Len = uint64((*MessageHeader64)(nil).SizeBytes()) 69 70 // multipleMessageHeader64Len is the length of a multipeMessageHeader64 struct. 71 var multipleMessageHeader64Len = uint64((*multipleMessageHeader64)(nil).SizeBytes()) 72 73 // baseRecvFlags are the flags that are accepted across recvmsg(2), 74 // recvmmsg(2), and recvfrom(2). 75 const baseRecvFlags = linux.MSG_OOB | linux.MSG_DONTROUTE | linux.MSG_DONTWAIT | linux.MSG_NOSIGNAL | linux.MSG_WAITALL | linux.MSG_TRUNC | linux.MSG_CTRUNC 76 77 // MessageHeader64 is the 64-bit representation of the msghdr struct used in 78 // the recvmsg and sendmsg syscalls. 79 // 80 // +marshal 81 type MessageHeader64 struct { 82 // Name is the optional pointer to a network address buffer. 83 Name uint64 84 85 // NameLen is the length of the buffer pointed to by Name. 86 NameLen uint32 87 _ uint32 88 89 // Iov is a pointer to an array of io vectors that describe the memory 90 // locations involved in the io operation. 91 Iov uint64 92 93 // IovLen is the length of the array pointed to by Iov. 94 IovLen uint64 95 96 // Control is the optional pointer to ancillary control data. 97 Control uint64 98 99 // ControlLen is the length of the data pointed to by Control. 100 ControlLen uint64 101 102 // Flags on the sent/received message. 103 Flags int32 104 _ int32 105 } 106 107 // multipleMessageHeader64 is the 64-bit representation of the mmsghdr struct used in 108 // the recvmmsg and sendmmsg syscalls. 109 // 110 // +marshal 111 type multipleMessageHeader64 struct { 112 msgHdr MessageHeader64 113 msgLen uint32 114 _ int32 115 } 116 117 // CaptureAddress allocates memory for and copies a socket address structure 118 // from the untrusted address space range. 119 func CaptureAddress(t *kernel.Task, addr hostarch.Addr, addrlen uint32) ([]byte, error) { 120 if addrlen > maxAddrLen { 121 return nil, linuxerr.EINVAL 122 } 123 124 addrBuf := make([]byte, addrlen) 125 if _, err := t.CopyInBytes(addr, addrBuf); err != nil { 126 return nil, err 127 } 128 129 return addrBuf, nil 130 } 131 132 // writeAddress writes a sockaddr structure and its length to an output buffer 133 // in the unstrusted address space range. If the address is bigger than the 134 // buffer, it is truncated. 135 func writeAddress(t *kernel.Task, addr linux.SockAddr, addrLen uint32, addrPtr hostarch.Addr, addrLenPtr hostarch.Addr) error { 136 // Get the buffer length. 137 var bufLen uint32 138 if _, err := primitive.CopyUint32In(t, addrLenPtr, &bufLen); err != nil { 139 return err 140 } 141 142 if int32(bufLen) < 0 { 143 return linuxerr.EINVAL 144 } 145 146 // Write the length unconditionally. 147 if _, err := primitive.CopyUint32Out(t, addrLenPtr, addrLen); err != nil { 148 return err 149 } 150 151 if addr == nil { 152 return nil 153 } 154 155 if bufLen > addrLen { 156 bufLen = addrLen 157 } 158 159 // Copy as much of the address as will fit in the buffer. 160 encodedAddr := t.CopyScratchBuffer(addr.SizeBytes()) 161 addr.MarshalUnsafe(encodedAddr) 162 if bufLen > uint32(len(encodedAddr)) { 163 bufLen = uint32(len(encodedAddr)) 164 } 165 _, err := t.CopyOutBytes(addrPtr, encodedAddr[:int(bufLen)]) 166 return err 167 } 168 169 // Socket implements the linux syscall socket(2). 170 func Socket(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 171 domain := int(args[0].Int()) 172 stype := args[1].Int() 173 protocol := int(args[2].Int()) 174 175 // Check and initialize the flags. 176 if stype & ^(0xf|linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { 177 return 0, nil, linuxerr.EINVAL 178 } 179 180 // Create the new socket. 181 s, e := socket.NewVFS2(t, domain, linux.SockType(stype&0xf), protocol) 182 if e != nil { 183 return 0, nil, e.ToError() 184 } 185 defer s.DecRef(t) 186 187 if err := s.SetStatusFlags(t, t.Credentials(), uint32(stype&linux.SOCK_NONBLOCK)); err != nil { 188 return 0, nil, err 189 } 190 191 fd, err := t.NewFDFromVFS2(0, s, kernel.FDFlags{ 192 CloseOnExec: stype&linux.SOCK_CLOEXEC != 0, 193 }) 194 if err != nil { 195 return 0, nil, err 196 } 197 198 return uintptr(fd), nil, nil 199 } 200 201 // SocketPair implements the linux syscall socketpair(2). 202 func SocketPair(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 203 domain := int(args[0].Int()) 204 stype := args[1].Int() 205 protocol := int(args[2].Int()) 206 addr := args[3].Pointer() 207 208 // Check and initialize the flags. 209 if stype & ^(0xf|linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { 210 return 0, nil, linuxerr.EINVAL 211 } 212 213 // Create the socket pair. 214 s1, s2, e := socket.PairVFS2(t, domain, linux.SockType(stype&0xf), protocol) 215 if e != nil { 216 return 0, nil, e.ToError() 217 } 218 // Adding to the FD table will cause an extra reference to be acquired. 219 defer s1.DecRef(t) 220 defer s2.DecRef(t) 221 222 nonblocking := uint32(stype & linux.SOCK_NONBLOCK) 223 if err := s1.SetStatusFlags(t, t.Credentials(), nonblocking); err != nil { 224 return 0, nil, err 225 } 226 if err := s2.SetStatusFlags(t, t.Credentials(), nonblocking); err != nil { 227 return 0, nil, err 228 } 229 230 // Create the FDs for the sockets. 231 flags := kernel.FDFlags{ 232 CloseOnExec: stype&linux.SOCK_CLOEXEC != 0, 233 } 234 fds, err := t.NewFDsVFS2(0, []*vfs.FileDescription{s1, s2}, flags) 235 if err != nil { 236 return 0, nil, err 237 } 238 239 if _, err := primitive.CopyInt32SliceOut(t, addr, fds); err != nil { 240 for _, fd := range fds { 241 if _, file := t.FDTable().Remove(t, fd); file != nil { 242 file.DecRef(t) 243 } 244 } 245 return 0, nil, err 246 } 247 248 return 0, nil, nil 249 } 250 251 // Connect implements the linux syscall connect(2). 252 func Connect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 253 fd := args[0].Int() 254 addr := args[1].Pointer() 255 addrlen := args[2].Uint() 256 257 // Get socket from the file descriptor. 258 file := t.GetFileVFS2(fd) 259 if file == nil { 260 return 0, nil, linuxerr.EBADF 261 } 262 defer file.DecRef(t) 263 264 // Extract the socket. 265 s, ok := file.Impl().(socket.SocketVFS2) 266 if !ok { 267 return 0, nil, syserror.ENOTSOCK 268 } 269 270 // Capture address and call syscall implementation. 271 a, err := CaptureAddress(t, addr, addrlen) 272 if err != nil { 273 return 0, nil, err 274 } 275 276 blocking := (file.StatusFlags() & linux.SOCK_NONBLOCK) == 0 277 return 0, nil, syserror.ConvertIntr(s.Connect(t, a, blocking).ToError(), syserror.ERESTARTSYS) 278 } 279 280 // accept is the implementation of the accept syscall. It is called by accept 281 // and accept4 syscall handlers. 282 func accept(t *kernel.Task, fd int32, addr hostarch.Addr, addrLen hostarch.Addr, flags int) (uintptr, error) { 283 // Check that no unsupported flags are passed in. 284 if flags & ^(linux.SOCK_NONBLOCK|linux.SOCK_CLOEXEC) != 0 { 285 return 0, linuxerr.EINVAL 286 } 287 288 // Get socket from the file descriptor. 289 file := t.GetFileVFS2(fd) 290 if file == nil { 291 return 0, linuxerr.EBADF 292 } 293 defer file.DecRef(t) 294 295 // Extract the socket. 296 s, ok := file.Impl().(socket.SocketVFS2) 297 if !ok { 298 return 0, syserror.ENOTSOCK 299 } 300 301 // Call the syscall implementation for this socket, then copy the 302 // output address if one is specified. 303 blocking := (file.StatusFlags() & linux.SOCK_NONBLOCK) == 0 304 305 peerRequested := addrLen != 0 306 nfd, peer, peerLen, e := s.Accept(t, peerRequested, flags, blocking) 307 if e != nil { 308 return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS) 309 } 310 if peerRequested { 311 // NOTE(magi): Linux does not give you an error if it can't 312 // write the data back out so neither do we. 313 if err := writeAddress(t, peer, peerLen, addr, addrLen); linuxerr.Equals(linuxerr.EINVAL, err) { 314 return 0, err 315 } 316 } 317 return uintptr(nfd), nil 318 } 319 320 // Accept4 implements the linux syscall accept4(2). 321 func Accept4(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 322 fd := args[0].Int() 323 addr := args[1].Pointer() 324 addrlen := args[2].Pointer() 325 flags := int(args[3].Int()) 326 327 n, err := accept(t, fd, addr, addrlen, flags) 328 return n, nil, err 329 } 330 331 // Accept implements the linux syscall accept(2). 332 func Accept(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 333 fd := args[0].Int() 334 addr := args[1].Pointer() 335 addrlen := args[2].Pointer() 336 337 n, err := accept(t, fd, addr, addrlen, 0) 338 return n, nil, err 339 } 340 341 // Bind implements the linux syscall bind(2). 342 func Bind(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 343 fd := args[0].Int() 344 addr := args[1].Pointer() 345 addrlen := args[2].Uint() 346 347 // Get socket from the file descriptor. 348 file := t.GetFileVFS2(fd) 349 if file == nil { 350 return 0, nil, linuxerr.EBADF 351 } 352 defer file.DecRef(t) 353 354 // Extract the socket. 355 s, ok := file.Impl().(socket.SocketVFS2) 356 if !ok { 357 return 0, nil, syserror.ENOTSOCK 358 } 359 360 // Capture address and call syscall implementation. 361 a, err := CaptureAddress(t, addr, addrlen) 362 if err != nil { 363 return 0, nil, err 364 } 365 366 return 0, nil, s.Bind(t, a).ToError() 367 } 368 369 // Listen implements the linux syscall listen(2). 370 func Listen(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 371 fd := args[0].Int() 372 backlog := args[1].Uint() 373 374 // Get socket from the file descriptor. 375 file := t.GetFileVFS2(fd) 376 if file == nil { 377 return 0, nil, linuxerr.EBADF 378 } 379 defer file.DecRef(t) 380 381 // Extract the socket. 382 s, ok := file.Impl().(socket.SocketVFS2) 383 if !ok { 384 return 0, nil, syserror.ENOTSOCK 385 } 386 387 if backlog > maxListenBacklog { 388 // Linux treats incoming backlog as uint with a limit defined by 389 // sysctl_somaxconn. 390 // https://github.com/torvalds/linux/blob/7acac4b3196/net/socket.c#L1666 391 backlog = maxListenBacklog 392 } 393 394 // Accept one more than the configured listen backlog to keep in parity with 395 // Linux. Ref, because of missing equality check here: 396 // https://github.com/torvalds/linux/blob/7acac4b3196/include/net/sock.h#L937 397 // 398 // In case of unix domain sockets, the following check 399 // https://github.com/torvalds/linux/blob/7d6beb71da3/net/unix/af_unix.c#L1293 400 // will allow 1 connect through since it checks for a receive queue len > 401 // backlog and not >=. 402 backlog++ 403 404 return 0, nil, s.Listen(t, int(backlog)).ToError() 405 } 406 407 // Shutdown implements the linux syscall shutdown(2). 408 func Shutdown(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 409 fd := args[0].Int() 410 how := args[1].Int() 411 412 // Get socket from the file descriptor. 413 file := t.GetFileVFS2(fd) 414 if file == nil { 415 return 0, nil, linuxerr.EBADF 416 } 417 defer file.DecRef(t) 418 419 // Extract the socket. 420 s, ok := file.Impl().(socket.SocketVFS2) 421 if !ok { 422 return 0, nil, syserror.ENOTSOCK 423 } 424 425 // Validate how, then call syscall implementation. 426 switch how { 427 case linux.SHUT_RD, linux.SHUT_WR, linux.SHUT_RDWR: 428 default: 429 return 0, nil, linuxerr.EINVAL 430 } 431 432 return 0, nil, s.Shutdown(t, int(how)).ToError() 433 } 434 435 // GetSockOpt implements the linux syscall getsockopt(2). 436 func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 437 fd := args[0].Int() 438 level := args[1].Int() 439 name := args[2].Int() 440 optValAddr := args[3].Pointer() 441 optLenAddr := args[4].Pointer() 442 443 // Get socket from the file descriptor. 444 file := t.GetFileVFS2(fd) 445 if file == nil { 446 return 0, nil, linuxerr.EBADF 447 } 448 defer file.DecRef(t) 449 450 // Extract the socket. 451 s, ok := file.Impl().(socket.SocketVFS2) 452 if !ok { 453 return 0, nil, syserror.ENOTSOCK 454 } 455 456 // Read the length. Reject negative values. 457 var optLen int32 458 if _, err := primitive.CopyInt32In(t, optLenAddr, &optLen); err != nil { 459 return 0, nil, err 460 } 461 if optLen < 0 { 462 return 0, nil, linuxerr.EINVAL 463 } 464 465 // Call syscall implementation then copy both value and value len out. 466 v, e := getSockOpt(t, s, int(level), int(name), optValAddr, int(optLen)) 467 if e != nil { 468 return 0, nil, e.ToError() 469 } 470 471 if _, err := primitive.CopyInt32Out(t, optLenAddr, int32(v.SizeBytes())); err != nil { 472 return 0, nil, err 473 } 474 475 if v != nil { 476 if _, err := v.CopyOut(t, optValAddr); err != nil { 477 return 0, nil, err 478 } 479 } 480 481 return 0, nil, nil 482 } 483 484 // getSockOpt tries to handle common socket options, or dispatches to a specific 485 // socket implementation. 486 func getSockOpt(t *kernel.Task, s socket.SocketVFS2, level, name int, optValAddr hostarch.Addr, len int) (marshal.Marshallable, *syserr.Error) { 487 if level == linux.SOL_SOCKET { 488 switch name { 489 case linux.SO_TYPE, linux.SO_DOMAIN, linux.SO_PROTOCOL: 490 if len < sizeOfInt32 { 491 return nil, syserr.ErrInvalidArgument 492 } 493 } 494 495 switch name { 496 case linux.SO_TYPE: 497 _, skType, _ := s.Type() 498 v := primitive.Int32(skType) 499 return &v, nil 500 case linux.SO_DOMAIN: 501 family, _, _ := s.Type() 502 v := primitive.Int32(family) 503 return &v, nil 504 case linux.SO_PROTOCOL: 505 _, _, protocol := s.Type() 506 v := primitive.Int32(protocol) 507 return &v, nil 508 } 509 } 510 511 return s.GetSockOpt(t, level, name, optValAddr, len) 512 } 513 514 // SetSockOpt implements the linux syscall setsockopt(2). 515 // 516 // Note that unlike Linux, enabling SO_PASSCRED does not autobind the socket. 517 func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 518 fd := args[0].Int() 519 level := args[1].Int() 520 name := args[2].Int() 521 optValAddr := args[3].Pointer() 522 optLen := args[4].Int() 523 524 // Get socket from the file descriptor. 525 file := t.GetFileVFS2(fd) 526 if file == nil { 527 return 0, nil, linuxerr.EBADF 528 } 529 defer file.DecRef(t) 530 531 // Extract the socket. 532 s, ok := file.Impl().(socket.SocketVFS2) 533 if !ok { 534 return 0, nil, syserror.ENOTSOCK 535 } 536 537 if optLen < 0 { 538 return 0, nil, linuxerr.EINVAL 539 } 540 if optLen > maxOptLen { 541 return 0, nil, linuxerr.EINVAL 542 } 543 buf := t.CopyScratchBuffer(int(optLen)) 544 if _, err := t.CopyInBytes(optValAddr, buf); err != nil { 545 return 0, nil, err 546 } 547 548 // Call syscall implementation. 549 if err := s.SetSockOpt(t, int(level), int(name), buf); err != nil { 550 return 0, nil, err.ToError() 551 } 552 553 return 0, nil, nil 554 } 555 556 // GetSockName implements the linux syscall getsockname(2). 557 func GetSockName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 558 fd := args[0].Int() 559 addr := args[1].Pointer() 560 addrlen := args[2].Pointer() 561 562 // Get socket from the file descriptor. 563 file := t.GetFileVFS2(fd) 564 if file == nil { 565 return 0, nil, linuxerr.EBADF 566 } 567 defer file.DecRef(t) 568 569 // Extract the socket. 570 s, ok := file.Impl().(socket.SocketVFS2) 571 if !ok { 572 return 0, nil, syserror.ENOTSOCK 573 } 574 575 // Get the socket name and copy it to the caller. 576 v, vl, err := s.GetSockName(t) 577 if err != nil { 578 return 0, nil, err.ToError() 579 } 580 581 return 0, nil, writeAddress(t, v, vl, addr, addrlen) 582 } 583 584 // GetPeerName implements the linux syscall getpeername(2). 585 func GetPeerName(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 586 fd := args[0].Int() 587 addr := args[1].Pointer() 588 addrlen := args[2].Pointer() 589 590 // Get socket from the file descriptor. 591 file := t.GetFileVFS2(fd) 592 if file == nil { 593 return 0, nil, linuxerr.EBADF 594 } 595 defer file.DecRef(t) 596 597 // Extract the socket. 598 s, ok := file.Impl().(socket.SocketVFS2) 599 if !ok { 600 return 0, nil, syserror.ENOTSOCK 601 } 602 603 // Get the socket peer name and copy it to the caller. 604 v, vl, err := s.GetPeerName(t) 605 if err != nil { 606 return 0, nil, err.ToError() 607 } 608 609 return 0, nil, writeAddress(t, v, vl, addr, addrlen) 610 } 611 612 // RecvMsg implements the linux syscall recvmsg(2). 613 func RecvMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 614 fd := args[0].Int() 615 msgPtr := args[1].Pointer() 616 flags := args[2].Int() 617 618 if t.Arch().Width() != 8 { 619 // We only handle 64-bit for now. 620 return 0, nil, linuxerr.EINVAL 621 } 622 623 // Get socket from the file descriptor. 624 file := t.GetFileVFS2(fd) 625 if file == nil { 626 return 0, nil, linuxerr.EBADF 627 } 628 defer file.DecRef(t) 629 630 // Extract the socket. 631 s, ok := file.Impl().(socket.SocketVFS2) 632 if !ok { 633 return 0, nil, syserror.ENOTSOCK 634 } 635 636 // Reject flags that we don't handle yet. 637 if flags & ^(baseRecvFlags|linux.MSG_PEEK|linux.MSG_CMSG_CLOEXEC|linux.MSG_ERRQUEUE) != 0 { 638 return 0, nil, linuxerr.EINVAL 639 } 640 641 if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { 642 flags |= linux.MSG_DONTWAIT 643 } 644 645 var haveDeadline bool 646 var deadline ktime.Time 647 if dl := s.RecvTimeout(); dl > 0 { 648 deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond) 649 haveDeadline = true 650 } else if dl < 0 { 651 flags |= linux.MSG_DONTWAIT 652 } 653 654 n, err := recvSingleMsg(t, s, msgPtr, flags, haveDeadline, deadline) 655 return n, nil, err 656 } 657 658 // RecvMMsg implements the linux syscall recvmmsg(2). 659 func RecvMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 660 fd := args[0].Int() 661 msgPtr := args[1].Pointer() 662 vlen := args[2].Uint() 663 flags := args[3].Int() 664 toPtr := args[4].Pointer() 665 666 if t.Arch().Width() != 8 { 667 // We only handle 64-bit for now. 668 return 0, nil, linuxerr.EINVAL 669 } 670 671 if vlen > linux.UIO_MAXIOV { 672 vlen = linux.UIO_MAXIOV 673 } 674 675 // Reject flags that we don't handle yet. 676 if flags & ^(baseRecvFlags|linux.MSG_CMSG_CLOEXEC|linux.MSG_ERRQUEUE) != 0 { 677 return 0, nil, linuxerr.EINVAL 678 } 679 680 // Get socket from the file descriptor. 681 file := t.GetFileVFS2(fd) 682 if file == nil { 683 return 0, nil, linuxerr.EBADF 684 } 685 defer file.DecRef(t) 686 687 // Extract the socket. 688 s, ok := file.Impl().(socket.SocketVFS2) 689 if !ok { 690 return 0, nil, syserror.ENOTSOCK 691 } 692 693 if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { 694 flags |= linux.MSG_DONTWAIT 695 } 696 697 var haveDeadline bool 698 var deadline ktime.Time 699 if toPtr != 0 { 700 var ts linux.Timespec 701 if _, err := ts.CopyIn(t, toPtr); err != nil { 702 return 0, nil, err 703 } 704 if !ts.Valid() { 705 return 0, nil, linuxerr.EINVAL 706 } 707 deadline = t.Kernel().MonotonicClock().Now().Add(ts.ToDuration()) 708 haveDeadline = true 709 } 710 711 if !haveDeadline { 712 if dl := s.RecvTimeout(); dl > 0 { 713 deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond) 714 haveDeadline = true 715 } else if dl < 0 { 716 flags |= linux.MSG_DONTWAIT 717 } 718 } 719 720 var count uint32 721 var err error 722 for i := uint64(0); i < uint64(vlen); i++ { 723 mp, ok := msgPtr.AddLength(i * multipleMessageHeader64Len) 724 if !ok { 725 return 0, nil, syserror.EFAULT 726 } 727 var n uintptr 728 if n, err = recvSingleMsg(t, s, mp, flags, haveDeadline, deadline); err != nil { 729 break 730 } 731 732 // Copy the received length to the caller. 733 lp, ok := mp.AddLength(messageHeader64Len) 734 if !ok { 735 return 0, nil, syserror.EFAULT 736 } 737 if _, err = primitive.CopyUint32Out(t, lp, uint32(n)); err != nil { 738 break 739 } 740 count++ 741 } 742 743 if count == 0 { 744 return 0, nil, err 745 } 746 return uintptr(count), nil, nil 747 } 748 749 func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr hostarch.Addr, flags int32, haveDeadline bool, deadline ktime.Time) (uintptr, error) { 750 // Capture the message header and io vectors. 751 var msg MessageHeader64 752 if _, err := msg.CopyIn(t, msgPtr); err != nil { 753 return 0, err 754 } 755 756 if msg.IovLen > linux.UIO_MAXIOV { 757 return 0, linuxerr.EMSGSIZE 758 } 759 dst, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ 760 AddressSpaceActive: true, 761 }) 762 if err != nil { 763 return 0, err 764 } 765 766 // Fast path when no control message nor name buffers are provided. 767 if msg.ControlLen == 0 && msg.NameLen == 0 { 768 n, mflags, _, _, cms, err := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, false, 0) 769 if err != nil { 770 return 0, syserror.ConvertIntr(err.ToError(), syserror.ERESTARTSYS) 771 } 772 if !cms.Unix.Empty() { 773 mflags |= linux.MSG_CTRUNC 774 cms.Release(t) 775 } 776 777 if int(msg.Flags) != mflags { 778 // Copy out the flags to the caller. 779 if _, err := primitive.CopyInt32Out(t, msgPtr+flagsOffset, int32(mflags)); err != nil { 780 return 0, err 781 } 782 } 783 784 return uintptr(n), nil 785 } 786 787 if msg.ControlLen > maxControlLen { 788 return 0, linuxerr.ENOBUFS 789 } 790 n, mflags, sender, senderLen, cms, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, msg.NameLen != 0, msg.ControlLen) 791 if e != nil { 792 return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS) 793 } 794 defer cms.Release(t) 795 796 controlData := make([]byte, 0, msg.ControlLen) 797 controlData = control.PackControlMessages(t, cms, controlData) 798 799 if cr, ok := s.(transport.Credentialer); ok && cr.Passcred() { 800 creds, _ := cms.Unix.Credentials.(control.SCMCredentials) 801 controlData, mflags = control.PackCredentials(t, creds, controlData, mflags) 802 } 803 804 if cms.Unix.Rights != nil { 805 controlData, mflags = control.PackRightsVFS2(t, cms.Unix.Rights.(control.SCMRightsVFS2), flags&linux.MSG_CMSG_CLOEXEC != 0, controlData, mflags) 806 } 807 808 // Copy the address to the caller. 809 if msg.NameLen != 0 { 810 if err := writeAddress(t, sender, senderLen, hostarch.Addr(msg.Name), hostarch.Addr(msgPtr+nameLenOffset)); err != nil { 811 return 0, err 812 } 813 } 814 815 // Copy the control data to the caller. 816 if _, err := primitive.CopyUint64Out(t, msgPtr+controlLenOffset, uint64(len(controlData))); err != nil { 817 return 0, err 818 } 819 if len(controlData) > 0 { 820 if _, err := t.CopyOutBytes(hostarch.Addr(msg.Control), controlData); err != nil { 821 return 0, err 822 } 823 } 824 825 // Copy out the flags to the caller. 826 if _, err := primitive.CopyInt32Out(t, msgPtr+flagsOffset, int32(mflags)); err != nil { 827 return 0, err 828 } 829 830 return uintptr(n), nil 831 } 832 833 // recvFrom is the implementation of the recvfrom syscall. It is called by 834 // recvfrom and recv syscall handlers. 835 func recvFrom(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags int32, namePtr hostarch.Addr, nameLenPtr hostarch.Addr) (uintptr, error) { 836 if int(bufLen) < 0 { 837 return 0, linuxerr.EINVAL 838 } 839 840 // Reject flags that we don't handle yet. 841 if flags & ^(baseRecvFlags|linux.MSG_PEEK|linux.MSG_CONFIRM) != 0 { 842 return 0, linuxerr.EINVAL 843 } 844 845 // Get socket from the file descriptor. 846 file := t.GetFileVFS2(fd) 847 if file == nil { 848 return 0, linuxerr.EBADF 849 } 850 defer file.DecRef(t) 851 852 // Extract the socket. 853 s, ok := file.Impl().(socket.SocketVFS2) 854 if !ok { 855 return 0, syserror.ENOTSOCK 856 } 857 858 if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { 859 flags |= linux.MSG_DONTWAIT 860 } 861 862 dst, err := t.SingleIOSequence(bufPtr, int(bufLen), usermem.IOOpts{ 863 AddressSpaceActive: true, 864 }) 865 if err != nil { 866 return 0, err 867 } 868 869 var haveDeadline bool 870 var deadline ktime.Time 871 if dl := s.RecvTimeout(); dl > 0 { 872 deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond) 873 haveDeadline = true 874 } else if dl < 0 { 875 flags |= linux.MSG_DONTWAIT 876 } 877 878 n, _, sender, senderLen, cm, e := s.RecvMsg(t, dst, int(flags), haveDeadline, deadline, nameLenPtr != 0, 0) 879 cm.Release(t) 880 if e != nil { 881 return 0, syserror.ConvertIntr(e.ToError(), syserror.ERESTARTSYS) 882 } 883 884 // Copy the address to the caller. 885 if nameLenPtr != 0 { 886 if err := writeAddress(t, sender, senderLen, namePtr, nameLenPtr); err != nil { 887 return 0, err 888 } 889 } 890 891 return uintptr(n), nil 892 } 893 894 // RecvFrom implements the linux syscall recvfrom(2). 895 func RecvFrom(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 896 fd := args[0].Int() 897 bufPtr := args[1].Pointer() 898 bufLen := args[2].Uint64() 899 flags := args[3].Int() 900 namePtr := args[4].Pointer() 901 nameLenPtr := args[5].Pointer() 902 903 n, err := recvFrom(t, fd, bufPtr, bufLen, flags, namePtr, nameLenPtr) 904 return n, nil, err 905 } 906 907 // SendMsg implements the linux syscall sendmsg(2). 908 func SendMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 909 fd := args[0].Int() 910 msgPtr := args[1].Pointer() 911 flags := args[2].Int() 912 913 if t.Arch().Width() != 8 { 914 // We only handle 64-bit for now. 915 return 0, nil, linuxerr.EINVAL 916 } 917 918 // Get socket from the file descriptor. 919 file := t.GetFileVFS2(fd) 920 if file == nil { 921 return 0, nil, linuxerr.EBADF 922 } 923 defer file.DecRef(t) 924 925 // Extract the socket. 926 s, ok := file.Impl().(socket.SocketVFS2) 927 if !ok { 928 return 0, nil, syserror.ENOTSOCK 929 } 930 931 // Reject flags that we don't handle yet. 932 if flags & ^(linux.MSG_DONTWAIT|linux.MSG_EOR|linux.MSG_MORE|linux.MSG_NOSIGNAL) != 0 { 933 return 0, nil, linuxerr.EINVAL 934 } 935 936 if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { 937 flags |= linux.MSG_DONTWAIT 938 } 939 940 n, err := sendSingleMsg(t, s, file, msgPtr, flags) 941 return n, nil, err 942 } 943 944 // SendMMsg implements the linux syscall sendmmsg(2). 945 func SendMMsg(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 946 fd := args[0].Int() 947 msgPtr := args[1].Pointer() 948 vlen := args[2].Uint() 949 flags := args[3].Int() 950 951 if t.Arch().Width() != 8 { 952 // We only handle 64-bit for now. 953 return 0, nil, linuxerr.EINVAL 954 } 955 956 if vlen > linux.UIO_MAXIOV { 957 vlen = linux.UIO_MAXIOV 958 } 959 960 // Get socket from the file descriptor. 961 file := t.GetFileVFS2(fd) 962 if file == nil { 963 return 0, nil, linuxerr.EBADF 964 } 965 defer file.DecRef(t) 966 967 // Extract the socket. 968 s, ok := file.Impl().(socket.SocketVFS2) 969 if !ok { 970 return 0, nil, syserror.ENOTSOCK 971 } 972 973 // Reject flags that we don't handle yet. 974 if flags & ^(linux.MSG_DONTWAIT|linux.MSG_EOR|linux.MSG_MORE|linux.MSG_NOSIGNAL) != 0 { 975 return 0, nil, linuxerr.EINVAL 976 } 977 978 if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { 979 flags |= linux.MSG_DONTWAIT 980 } 981 982 var count uint32 983 var err error 984 for i := uint64(0); i < uint64(vlen); i++ { 985 mp, ok := msgPtr.AddLength(i * multipleMessageHeader64Len) 986 if !ok { 987 return 0, nil, syserror.EFAULT 988 } 989 var n uintptr 990 if n, err = sendSingleMsg(t, s, file, mp, flags); err != nil { 991 break 992 } 993 994 // Copy the received length to the caller. 995 lp, ok := mp.AddLength(messageHeader64Len) 996 if !ok { 997 return 0, nil, syserror.EFAULT 998 } 999 if _, err = primitive.CopyUint32Out(t, lp, uint32(n)); err != nil { 1000 break 1001 } 1002 count++ 1003 } 1004 1005 if count == 0 { 1006 return 0, nil, err 1007 } 1008 return uintptr(count), nil, nil 1009 } 1010 1011 func sendSingleMsg(t *kernel.Task, s socket.SocketVFS2, file *vfs.FileDescription, msgPtr hostarch.Addr, flags int32) (uintptr, error) { 1012 // Capture the message header. 1013 var msg MessageHeader64 1014 if _, err := msg.CopyIn(t, msgPtr); err != nil { 1015 return 0, err 1016 } 1017 1018 var controlData []byte 1019 if msg.ControlLen > 0 { 1020 // Put an upper bound to prevent large allocations. 1021 if msg.ControlLen > maxControlLen { 1022 return 0, linuxerr.ENOBUFS 1023 } 1024 controlData = make([]byte, msg.ControlLen) 1025 if _, err := t.CopyInBytes(hostarch.Addr(msg.Control), controlData); err != nil { 1026 return 0, err 1027 } 1028 } 1029 1030 // Read the destination address if one is specified. 1031 var to []byte 1032 if msg.NameLen != 0 { 1033 var err error 1034 to, err = CaptureAddress(t, hostarch.Addr(msg.Name), msg.NameLen) 1035 if err != nil { 1036 return 0, err 1037 } 1038 } 1039 1040 // Read data then call the sendmsg implementation. 1041 if msg.IovLen > linux.UIO_MAXIOV { 1042 return 0, linuxerr.EMSGSIZE 1043 } 1044 src, err := t.IovecsIOSequence(hostarch.Addr(msg.Iov), int(msg.IovLen), usermem.IOOpts{ 1045 AddressSpaceActive: true, 1046 }) 1047 if err != nil { 1048 return 0, err 1049 } 1050 1051 controlMessages, err := control.Parse(t, s, controlData, t.Arch().Width()) 1052 if err != nil { 1053 return 0, err 1054 } 1055 1056 var haveDeadline bool 1057 var deadline ktime.Time 1058 if dl := s.SendTimeout(); dl > 0 { 1059 deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond) 1060 haveDeadline = true 1061 } else if dl < 0 { 1062 flags |= linux.MSG_DONTWAIT 1063 } 1064 1065 // Call the syscall implementation. 1066 n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, controlMessages) 1067 err = slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), syserror.ERESTARTSYS, "sendmsg", file) 1068 // Control messages should be released on error as well as for zero-length 1069 // messages, which are discarded by the receiver. 1070 if n == 0 || err != nil { 1071 controlMessages.Release(t) 1072 } 1073 return uintptr(n), err 1074 } 1075 1076 // sendTo is the implementation of the sendto syscall. It is called by sendto 1077 // and send syscall handlers. 1078 func sendTo(t *kernel.Task, fd int32, bufPtr hostarch.Addr, bufLen uint64, flags int32, namePtr hostarch.Addr, nameLen uint32) (uintptr, error) { 1079 bl := int(bufLen) 1080 if bl < 0 { 1081 return 0, linuxerr.EINVAL 1082 } 1083 1084 // Get socket from the file descriptor. 1085 file := t.GetFileVFS2(fd) 1086 if file == nil { 1087 return 0, linuxerr.EBADF 1088 } 1089 defer file.DecRef(t) 1090 1091 // Extract the socket. 1092 s, ok := file.Impl().(socket.SocketVFS2) 1093 if !ok { 1094 return 0, syserror.ENOTSOCK 1095 } 1096 1097 if (file.StatusFlags() & linux.SOCK_NONBLOCK) != 0 { 1098 flags |= linux.MSG_DONTWAIT 1099 } 1100 1101 // Read the destination address if one is specified. 1102 var to []byte 1103 var err error 1104 if namePtr != 0 { 1105 to, err = CaptureAddress(t, namePtr, nameLen) 1106 if err != nil { 1107 return 0, err 1108 } 1109 } 1110 1111 src, err := t.SingleIOSequence(bufPtr, bl, usermem.IOOpts{ 1112 AddressSpaceActive: true, 1113 }) 1114 if err != nil { 1115 return 0, err 1116 } 1117 1118 var haveDeadline bool 1119 var deadline ktime.Time 1120 if dl := s.SendTimeout(); dl > 0 { 1121 deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond) 1122 haveDeadline = true 1123 } else if dl < 0 { 1124 flags |= linux.MSG_DONTWAIT 1125 } 1126 1127 // Call the syscall implementation. 1128 n, e := s.SendMsg(t, src, to, int(flags), haveDeadline, deadline, socket.ControlMessages{Unix: control.New(t, s, nil)}) 1129 return uintptr(n), slinux.HandleIOErrorVFS2(t, n != 0, e.ToError(), syserror.ERESTARTSYS, "sendto", file) 1130 } 1131 1132 // SendTo implements the linux syscall sendto(2). 1133 func SendTo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 1134 fd := args[0].Int() 1135 bufPtr := args[1].Pointer() 1136 bufLen := args[2].Uint64() 1137 flags := args[3].Int() 1138 namePtr := args[4].Pointer() 1139 nameLen := args[5].Uint() 1140 1141 n, err := sendTo(t, fd, bufPtr, bufLen, flags, namePtr, nameLen) 1142 return n, nil, err 1143 }