github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/socket/socket.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package socket provides the interfaces that need to be provided by socket 16 // implementations and providers, as well as per family demultiplexing of socket 17 // creation. 18 package socket 19 20 import ( 21 "bytes" 22 "fmt" 23 "sync/atomic" 24 25 "golang.org/x/sys/unix" 26 "github.com/SagerNet/gvisor/pkg/abi/linux" 27 "github.com/SagerNet/gvisor/pkg/context" 28 "github.com/SagerNet/gvisor/pkg/hostarch" 29 "github.com/SagerNet/gvisor/pkg/marshal" 30 "github.com/SagerNet/gvisor/pkg/sentry/device" 31 "github.com/SagerNet/gvisor/pkg/sentry/fs" 32 "github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil" 33 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 34 ktime "github.com/SagerNet/gvisor/pkg/sentry/kernel/time" 35 "github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport" 36 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 37 "github.com/SagerNet/gvisor/pkg/syserr" 38 "github.com/SagerNet/gvisor/pkg/tcpip" 39 "github.com/SagerNet/gvisor/pkg/tcpip/header" 40 "github.com/SagerNet/gvisor/pkg/usermem" 41 ) 42 43 // ControlMessages represents the union of unix control messages and tcpip 44 // control messages. 45 type ControlMessages struct { 46 Unix transport.ControlMessages 47 IP IPControlMessages 48 } 49 50 // packetInfoToLinux converts IPPacketInfo from tcpip format to Linux format. 51 func packetInfoToLinux(packetInfo tcpip.IPPacketInfo) linux.ControlMessageIPPacketInfo { 52 var p linux.ControlMessageIPPacketInfo 53 p.NIC = int32(packetInfo.NIC) 54 copy(p.LocalAddr[:], []byte(packetInfo.LocalAddr)) 55 copy(p.DestinationAddr[:], []byte(packetInfo.DestinationAddr)) 56 return p 57 } 58 59 // errOriginToLinux maps tcpip socket origin to Linux socket origin constants. 60 func errOriginToLinux(origin tcpip.SockErrOrigin) uint8 { 61 switch origin { 62 case tcpip.SockExtErrorOriginNone: 63 return linux.SO_EE_ORIGIN_NONE 64 case tcpip.SockExtErrorOriginLocal: 65 return linux.SO_EE_ORIGIN_LOCAL 66 case tcpip.SockExtErrorOriginICMP: 67 return linux.SO_EE_ORIGIN_ICMP 68 case tcpip.SockExtErrorOriginICMP6: 69 return linux.SO_EE_ORIGIN_ICMP6 70 default: 71 panic(fmt.Sprintf("unknown socket origin: %d", origin)) 72 } 73 } 74 75 // sockErrCmsgToLinux converts SockError control message from tcpip format to 76 // Linux format. 77 func sockErrCmsgToLinux(sockErr *tcpip.SockError) linux.SockErrCMsg { 78 if sockErr == nil { 79 return nil 80 } 81 82 ee := linux.SockExtendedErr{ 83 Errno: uint32(syserr.TranslateNetstackError(sockErr.Err).ToLinux()), 84 Origin: errOriginToLinux(sockErr.Cause.Origin()), 85 Type: sockErr.Cause.Type(), 86 Code: sockErr.Cause.Code(), 87 Info: sockErr.Cause.Info(), 88 } 89 90 switch sockErr.NetProto { 91 case header.IPv4ProtocolNumber: 92 errMsg := &linux.SockErrCMsgIPv4{SockExtendedErr: ee} 93 if len(sockErr.Offender.Addr) > 0 { 94 addr, _ := ConvertAddress(linux.AF_INET, sockErr.Offender) 95 errMsg.Offender = *addr.(*linux.SockAddrInet) 96 } 97 return errMsg 98 case header.IPv6ProtocolNumber: 99 errMsg := &linux.SockErrCMsgIPv6{SockExtendedErr: ee} 100 if len(sockErr.Offender.Addr) > 0 { 101 addr, _ := ConvertAddress(linux.AF_INET6, sockErr.Offender) 102 errMsg.Offender = *addr.(*linux.SockAddrInet6) 103 } 104 return errMsg 105 default: 106 panic(fmt.Sprintf("invalid net proto for creating SockErrCMsg: %d", sockErr.NetProto)) 107 } 108 } 109 110 // NewIPControlMessages converts the tcpip ControlMessgaes (which does not 111 // have Linux specific format) to Linux format. 112 func NewIPControlMessages(family int, cmgs tcpip.ControlMessages) IPControlMessages { 113 var orgDstAddr linux.SockAddr 114 if cmgs.HasOriginalDstAddress { 115 orgDstAddr, _ = ConvertAddress(family, cmgs.OriginalDstAddress) 116 } 117 return IPControlMessages{ 118 HasTimestamp: cmgs.HasTimestamp, 119 Timestamp: cmgs.Timestamp, 120 HasInq: cmgs.HasInq, 121 Inq: cmgs.Inq, 122 HasTOS: cmgs.HasTOS, 123 TOS: cmgs.TOS, 124 HasTClass: cmgs.HasTClass, 125 TClass: cmgs.TClass, 126 HasIPPacketInfo: cmgs.HasIPPacketInfo, 127 PacketInfo: packetInfoToLinux(cmgs.PacketInfo), 128 OriginalDstAddress: orgDstAddr, 129 SockErr: sockErrCmsgToLinux(cmgs.SockErr), 130 } 131 } 132 133 // IPControlMessages contains socket control messages for IP sockets. 134 // This can contain Linux specific structures unlike tcpip.ControlMessages. 135 // 136 // +stateify savable 137 type IPControlMessages struct { 138 // HasTimestamp indicates whether Timestamp is valid/set. 139 HasTimestamp bool 140 141 // Timestamp is the time (in ns) that the last packet used to create 142 // the read data was received. 143 Timestamp int64 144 145 // HasInq indicates whether Inq is valid/set. 146 HasInq bool 147 148 // Inq is the number of bytes ready to be received. 149 Inq int32 150 151 // HasTOS indicates whether Tos is valid/set. 152 HasTOS bool 153 154 // TOS is the IPv4 type of service of the associated packet. 155 TOS uint8 156 157 // HasTClass indicates whether TClass is valid/set. 158 HasTClass bool 159 160 // TClass is the IPv6 traffic class of the associated packet. 161 TClass uint32 162 163 // HasIPPacketInfo indicates whether PacketInfo is set. 164 HasIPPacketInfo bool 165 166 // PacketInfo holds interface and address data on an incoming packet. 167 PacketInfo linux.ControlMessageIPPacketInfo 168 169 // OriginalDestinationAddress holds the original destination address 170 // and port of the incoming packet. 171 OriginalDstAddress linux.SockAddr 172 173 // SockErr is the dequeued socket error on recvmsg(MSG_ERRQUEUE). 174 SockErr linux.SockErrCMsg 175 } 176 177 // Release releases Unix domain socket credentials and rights. 178 func (c *ControlMessages) Release(ctx context.Context) { 179 c.Unix.Release(ctx) 180 } 181 182 // Socket is an interface combining fs.FileOperations and SocketOps, 183 // representing a VFS1 socket file. 184 type Socket interface { 185 fs.FileOperations 186 SocketOps 187 } 188 189 // SocketVFS2 is an interface combining vfs.FileDescription and SocketOps, 190 // representing a VFS2 socket file. 191 type SocketVFS2 interface { 192 vfs.FileDescriptionImpl 193 SocketOps 194 } 195 196 // SocketOps is the interface containing socket syscalls used by the syscall 197 // layer to redirect them to the appropriate implementation. 198 // 199 // It is implemented by both Socket and SocketVFS2. 200 type SocketOps interface { 201 // Connect implements the connect(2) linux unix. 202 Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error 203 204 // Accept implements the accept4(2) linux unix. 205 // Returns fd, real peer address length and error. Real peer address 206 // length is only set if len(peer) > 0. 207 Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) 208 209 // Bind implements the bind(2) linux unix. 210 Bind(t *kernel.Task, sockaddr []byte) *syserr.Error 211 212 // Listen implements the listen(2) linux unix. 213 Listen(t *kernel.Task, backlog int) *syserr.Error 214 215 // Shutdown implements the shutdown(2) linux unix. 216 Shutdown(t *kernel.Task, how int) *syserr.Error 217 218 // GetSockOpt implements the getsockopt(2) linux unix. 219 GetSockOpt(t *kernel.Task, level int, name int, outPtr hostarch.Addr, outLen int) (marshal.Marshallable, *syserr.Error) 220 221 // SetSockOpt implements the setsockopt(2) linux unix. 222 SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error 223 224 // GetSockName implements the getsockname(2) linux unix. 225 // 226 // addrLen is the address length to be returned to the application, not 227 // necessarily the actual length of the address. 228 GetSockName(t *kernel.Task) (addr linux.SockAddr, addrLen uint32, err *syserr.Error) 229 230 // GetPeerName implements the getpeername(2) linux unix. 231 // 232 // addrLen is the address length to be returned to the application, not 233 // necessarily the actual length of the address. 234 GetPeerName(t *kernel.Task) (addr linux.SockAddr, addrLen uint32, err *syserr.Error) 235 236 // RecvMsg implements the recvmsg(2) linux unix. 237 // 238 // senderAddrLen is the address length to be returned to the application, 239 // not necessarily the actual length of the address. 240 // 241 // flags control how RecvMsg should be completed. msgFlags indicate how 242 // the RecvMsg call was completed. Note that control message truncation 243 // may still be required even if the MSG_CTRUNC bit is not set in 244 // msgFlags. In that case, the caller should set MSG_CTRUNC appropriately. 245 // 246 // If err != nil, the recv was not successful. 247 RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (n int, msgFlags int, senderAddr linux.SockAddr, senderAddrLen uint32, controlMessages ControlMessages, err *syserr.Error) 248 249 // SendMsg implements the sendmsg(2) linux unix. SendMsg does not take 250 // ownership of the ControlMessage on error. 251 // 252 // If n > 0, err will either be nil or an error from t.Block. 253 SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages ControlMessages) (n int, err *syserr.Error) 254 255 // SetRecvTimeout sets the timeout (in ns) for recv operations. Zero means 256 // no timeout, and negative means DONTWAIT. 257 SetRecvTimeout(nanoseconds int64) 258 259 // RecvTimeout gets the current timeout (in ns) for recv operations. Zero 260 // means no timeout, and negative means DONTWAIT. 261 RecvTimeout() int64 262 263 // SetSendTimeout sets the timeout (in ns) for send operations. Zero means 264 // no timeout, and negative means DONTWAIT. 265 SetSendTimeout(nanoseconds int64) 266 267 // SendTimeout gets the current timeout (in ns) for send operations. Zero 268 // means no timeout, and negative means DONTWAIT. 269 SendTimeout() int64 270 271 // State returns the current state of the socket, as represented by Linux in 272 // procfs. The returned state value is protocol-specific. 273 State() uint32 274 275 // Type returns the family, socket type and protocol of the socket. 276 Type() (family int, skType linux.SockType, protocol int) 277 } 278 279 // Provider is the interface implemented by providers of sockets for specific 280 // address families (e.g., AF_INET). 281 type Provider interface { 282 // Socket creates a new socket. 283 // 284 // If a nil Socket _and_ a nil error is returned, it means that the 285 // protocol is not supported. A non-nil error should only be returned 286 // if the protocol is supported, but an error occurs during creation. 287 Socket(t *kernel.Task, stype linux.SockType, protocol int) (*fs.File, *syserr.Error) 288 289 // Pair creates a pair of connected sockets. 290 // 291 // See Socket for error information. 292 Pair(t *kernel.Task, stype linux.SockType, protocol int) (*fs.File, *fs.File, *syserr.Error) 293 } 294 295 // families holds a map of all known address families and their providers. 296 var families = make(map[int][]Provider) 297 298 // RegisterProvider registers the provider of a given address family so that 299 // sockets of that type can be created via socket() and/or socketpair() 300 // syscalls. 301 // 302 // This should only be called during the initialization of the address family. 303 func RegisterProvider(family int, provider Provider) { 304 families[family] = append(families[family], provider) 305 } 306 307 // New creates a new socket with the given family, type and protocol. 308 func New(t *kernel.Task, family int, stype linux.SockType, protocol int) (*fs.File, *syserr.Error) { 309 for _, p := range families[family] { 310 s, err := p.Socket(t, stype, protocol) 311 if err != nil { 312 return nil, err 313 } 314 if s != nil { 315 t.Kernel().RecordSocket(s) 316 return s, nil 317 } 318 } 319 320 return nil, syserr.ErrAddressFamilyNotSupported 321 } 322 323 // Pair creates a new connected socket pair with the given family, type and 324 // protocol. 325 func Pair(t *kernel.Task, family int, stype linux.SockType, protocol int) (*fs.File, *fs.File, *syserr.Error) { 326 providers, ok := families[family] 327 if !ok { 328 return nil, nil, syserr.ErrAddressFamilyNotSupported 329 } 330 331 for _, p := range providers { 332 s1, s2, err := p.Pair(t, stype, protocol) 333 if err != nil { 334 return nil, nil, err 335 } 336 if s1 != nil && s2 != nil { 337 k := t.Kernel() 338 k.RecordSocket(s1) 339 k.RecordSocket(s2) 340 return s1, s2, nil 341 } 342 } 343 344 return nil, nil, syserr.ErrSocketNotSupported 345 } 346 347 // NewDirent returns a sockfs fs.Dirent that resides on device d. 348 func NewDirent(ctx context.Context, d *device.Device) *fs.Dirent { 349 ino := d.NextIno() 350 iops := &fsutil.SimpleFileInode{ 351 InodeSimpleAttributes: fsutil.NewInodeSimpleAttributes(ctx, fs.FileOwnerFromContext(ctx), fs.FilePermissions{ 352 User: fs.PermMask{Read: true, Write: true}, 353 }, linux.SOCKFS_MAGIC), 354 } 355 inode := fs.NewInode(ctx, iops, fs.NewPseudoMountSource(ctx), fs.StableAttr{ 356 Type: fs.Socket, 357 DeviceID: d.DeviceID(), 358 InodeID: ino, 359 BlockSize: hostarch.PageSize, 360 }) 361 362 // Dirent name matches net/socket.c:sockfs_dname. 363 return fs.NewDirent(ctx, inode, fmt.Sprintf("socket:[%d]", ino)) 364 } 365 366 // ProviderVFS2 is the vfs2 interface implemented by providers of sockets for 367 // specific address families (e.g., AF_INET). 368 type ProviderVFS2 interface { 369 // Socket creates a new socket. 370 // 371 // If a nil Socket _and_ a nil error is returned, it means that the 372 // protocol is not supported. A non-nil error should only be returned 373 // if the protocol is supported, but an error occurs during creation. 374 Socket(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *syserr.Error) 375 376 // Pair creates a pair of connected sockets. 377 // 378 // See Socket for error information. 379 Pair(t *kernel.Task, stype linux.SockType, protocol int) (*vfs.FileDescription, *vfs.FileDescription, *syserr.Error) 380 } 381 382 // familiesVFS2 holds a map of all known address families and their providers. 383 var familiesVFS2 = make(map[int][]ProviderVFS2) 384 385 // RegisterProviderVFS2 registers the provider of a given address family so that 386 // sockets of that type can be created via socket() and/or socketpair() 387 // syscalls. 388 // 389 // This should only be called during the initialization of the address family. 390 func RegisterProviderVFS2(family int, provider ProviderVFS2) { 391 familiesVFS2[family] = append(familiesVFS2[family], provider) 392 } 393 394 // NewVFS2 creates a new socket with the given family, type and protocol. 395 func NewVFS2(t *kernel.Task, family int, stype linux.SockType, protocol int) (*vfs.FileDescription, *syserr.Error) { 396 for _, p := range familiesVFS2[family] { 397 s, err := p.Socket(t, stype, protocol) 398 if err != nil { 399 return nil, err 400 } 401 if s != nil { 402 t.Kernel().RecordSocketVFS2(s) 403 return s, nil 404 } 405 } 406 407 return nil, syserr.ErrAddressFamilyNotSupported 408 } 409 410 // PairVFS2 creates a new connected socket pair with the given family, type and 411 // protocol. 412 func PairVFS2(t *kernel.Task, family int, stype linux.SockType, protocol int) (*vfs.FileDescription, *vfs.FileDescription, *syserr.Error) { 413 providers, ok := familiesVFS2[family] 414 if !ok { 415 return nil, nil, syserr.ErrAddressFamilyNotSupported 416 } 417 418 for _, p := range providers { 419 s1, s2, err := p.Pair(t, stype, protocol) 420 if err != nil { 421 return nil, nil, err 422 } 423 if s1 != nil && s2 != nil { 424 k := t.Kernel() 425 k.RecordSocketVFS2(s1) 426 k.RecordSocketVFS2(s2) 427 return s1, s2, nil 428 } 429 } 430 431 return nil, nil, syserr.ErrSocketNotSupported 432 } 433 434 // SendReceiveTimeout stores timeouts for send and receive calls. 435 // 436 // It is meant to be embedded into Socket implementations to help satisfy the 437 // interface. 438 // 439 // Care must be taken when copying SendReceiveTimeout as it contains atomic 440 // variables. 441 // 442 // +stateify savable 443 type SendReceiveTimeout struct { 444 // send is length of the send timeout in nanoseconds. 445 // 446 // send must be accessed atomically. 447 send int64 448 449 // recv is length of the receive timeout in nanoseconds. 450 // 451 // recv must be accessed atomically. 452 recv int64 453 } 454 455 // SetRecvTimeout implements Socket.SetRecvTimeout. 456 func (to *SendReceiveTimeout) SetRecvTimeout(nanoseconds int64) { 457 atomic.StoreInt64(&to.recv, nanoseconds) 458 } 459 460 // RecvTimeout implements Socket.RecvTimeout. 461 func (to *SendReceiveTimeout) RecvTimeout() int64 { 462 return atomic.LoadInt64(&to.recv) 463 } 464 465 // SetSendTimeout implements Socket.SetSendTimeout. 466 func (to *SendReceiveTimeout) SetSendTimeout(nanoseconds int64) { 467 atomic.StoreInt64(&to.send, nanoseconds) 468 } 469 470 // SendTimeout implements Socket.SendTimeout. 471 func (to *SendReceiveTimeout) SendTimeout() int64 { 472 return atomic.LoadInt64(&to.send) 473 } 474 475 // GetSockOptEmitUnimplementedEvent emits unimplemented event if name is valid. 476 // It contains names that are valid for GetSockOpt when level is SOL_SOCKET. 477 func GetSockOptEmitUnimplementedEvent(t *kernel.Task, name int) { 478 switch name { 479 case linux.SO_ACCEPTCONN, 480 linux.SO_BPF_EXTENSIONS, 481 linux.SO_COOKIE, 482 linux.SO_DOMAIN, 483 linux.SO_ERROR, 484 linux.SO_GET_FILTER, 485 linux.SO_INCOMING_NAPI_ID, 486 linux.SO_MEMINFO, 487 linux.SO_PEERCRED, 488 linux.SO_PEERGROUPS, 489 linux.SO_PEERNAME, 490 linux.SO_PEERSEC, 491 linux.SO_PROTOCOL, 492 linux.SO_SNDLOWAT, 493 linux.SO_TYPE: 494 495 t.Kernel().EmitUnimplementedEvent(t) 496 497 default: 498 emitUnimplementedEvent(t, name) 499 } 500 } 501 502 // SetSockOptEmitUnimplementedEvent emits unimplemented event if name is valid. 503 // It contains names that are valid for SetSockOpt when level is SOL_SOCKET. 504 func SetSockOptEmitUnimplementedEvent(t *kernel.Task, name int) { 505 switch name { 506 case linux.SO_ATTACH_BPF, 507 linux.SO_ATTACH_FILTER, 508 linux.SO_ATTACH_REUSEPORT_CBPF, 509 linux.SO_ATTACH_REUSEPORT_EBPF, 510 linux.SO_CNX_ADVICE, 511 linux.SO_DETACH_FILTER, 512 linux.SO_RCVBUFFORCE, 513 linux.SO_SNDBUFFORCE: 514 515 t.Kernel().EmitUnimplementedEvent(t) 516 517 default: 518 emitUnimplementedEvent(t, name) 519 } 520 } 521 522 // emitUnimplementedEvent emits unimplemented event if name is valid. It 523 // contains names that are common between Get and SetSocketOpt when level is 524 // SOL_SOCKET. 525 func emitUnimplementedEvent(t *kernel.Task, name int) { 526 switch name { 527 case linux.SO_BINDTODEVICE, 528 linux.SO_BROADCAST, 529 linux.SO_BSDCOMPAT, 530 linux.SO_BUSY_POLL, 531 linux.SO_DEBUG, 532 linux.SO_DONTROUTE, 533 linux.SO_INCOMING_CPU, 534 linux.SO_KEEPALIVE, 535 linux.SO_LINGER, 536 linux.SO_LOCK_FILTER, 537 linux.SO_MARK, 538 linux.SO_MAX_PACING_RATE, 539 linux.SO_NOFCS, 540 linux.SO_OOBINLINE, 541 linux.SO_PASSCRED, 542 linux.SO_PASSSEC, 543 linux.SO_PEEK_OFF, 544 linux.SO_PRIORITY, 545 linux.SO_RCVBUF, 546 linux.SO_RCVLOWAT, 547 linux.SO_RCVTIMEO, 548 linux.SO_REUSEADDR, 549 linux.SO_REUSEPORT, 550 linux.SO_RXQ_OVFL, 551 linux.SO_SELECT_ERR_QUEUE, 552 linux.SO_SNDBUF, 553 linux.SO_SNDTIMEO, 554 linux.SO_TIMESTAMP, 555 linux.SO_TIMESTAMPING, 556 linux.SO_TIMESTAMPNS, 557 linux.SO_TXTIME, 558 linux.SO_WIFI_STATUS, 559 linux.SO_ZEROCOPY: 560 561 t.Kernel().EmitUnimplementedEvent(t) 562 } 563 } 564 565 // UnmarshalSockAddr unmarshals memory representing a struct sockaddr to one of 566 // the ABI socket address types. 567 // 568 // Precondition: data must be long enough to represent a socket address of the 569 // given family. 570 func UnmarshalSockAddr(family int, data []byte) linux.SockAddr { 571 switch family { 572 case unix.AF_INET: 573 var addr linux.SockAddrInet 574 addr.UnmarshalUnsafe(data[:addr.SizeBytes()]) 575 return &addr 576 case unix.AF_INET6: 577 var addr linux.SockAddrInet6 578 addr.UnmarshalUnsafe(data[:addr.SizeBytes()]) 579 return &addr 580 case unix.AF_UNIX: 581 var addr linux.SockAddrUnix 582 addr.UnmarshalUnsafe(data[:addr.SizeBytes()]) 583 return &addr 584 case unix.AF_NETLINK: 585 var addr linux.SockAddrNetlink 586 addr.UnmarshalUnsafe(data[:addr.SizeBytes()]) 587 return &addr 588 default: 589 panic(fmt.Sprintf("Unsupported socket family %v", family)) 590 } 591 } 592 593 var sockAddrLinkSize = (&linux.SockAddrLink{}).SizeBytes() 594 var sockAddrInetSize = (&linux.SockAddrInet{}).SizeBytes() 595 var sockAddrInet6Size = (&linux.SockAddrInet6{}).SizeBytes() 596 597 // Ntohs converts a 16-bit number from network byte order to host byte order. It 598 // assumes that the host is little endian. 599 func Ntohs(v uint16) uint16 { 600 return v<<8 | v>>8 601 } 602 603 // Htons converts a 16-bit number from host byte order to network byte order. It 604 // assumes that the host is little endian. 605 func Htons(v uint16) uint16 { 606 return Ntohs(v) 607 } 608 609 // isLinkLocal determines if the given IPv6 address is link-local. This is the 610 // case when it has the fe80::/10 prefix. This check is used to determine when 611 // the NICID is relevant for a given IPv6 address. 612 func isLinkLocal(addr tcpip.Address) bool { 613 return len(addr) >= 2 && addr[0] == 0xfe && addr[1]&0xc0 == 0x80 614 } 615 616 // ConvertAddress converts the given address to a native format. 617 func ConvertAddress(family int, addr tcpip.FullAddress) (linux.SockAddr, uint32) { 618 switch family { 619 case linux.AF_UNIX: 620 var out linux.SockAddrUnix 621 out.Family = linux.AF_UNIX 622 l := len([]byte(addr.Addr)) 623 for i := 0; i < l; i++ { 624 out.Path[i] = int8(addr.Addr[i]) 625 } 626 627 // Linux returns the used length of the address struct (including the 628 // null terminator) for filesystem paths. The Family field is 2 bytes. 629 // It is sometimes allowed to exclude the null terminator if the 630 // address length is the max. Abstract and empty paths always return 631 // the full exact length. 632 if l == 0 || out.Path[0] == 0 || l == len(out.Path) { 633 return &out, uint32(2 + l) 634 } 635 return &out, uint32(3 + l) 636 637 case linux.AF_INET: 638 var out linux.SockAddrInet 639 copy(out.Addr[:], addr.Addr) 640 out.Family = linux.AF_INET 641 out.Port = Htons(addr.Port) 642 return &out, uint32(sockAddrInetSize) 643 644 case linux.AF_INET6: 645 var out linux.SockAddrInet6 646 if len(addr.Addr) == header.IPv4AddressSize { 647 // Copy address in v4-mapped format. 648 copy(out.Addr[12:], addr.Addr) 649 out.Addr[10] = 0xff 650 out.Addr[11] = 0xff 651 } else { 652 copy(out.Addr[:], addr.Addr) 653 } 654 out.Family = linux.AF_INET6 655 out.Port = Htons(addr.Port) 656 if isLinkLocal(addr.Addr) { 657 out.Scope_id = uint32(addr.NIC) 658 } 659 return &out, uint32(sockAddrInet6Size) 660 661 case linux.AF_PACKET: 662 var out linux.SockAddrLink 663 out.Family = linux.AF_PACKET 664 out.InterfaceIndex = int32(addr.NIC) 665 out.HardwareAddrLen = header.EthernetAddressSize 666 copy(out.HardwareAddr[:], addr.Addr) 667 return &out, uint32(sockAddrLinkSize) 668 669 default: 670 return nil, 0 671 } 672 } 673 674 // BytesToIPAddress converts an IPv4 or IPv6 address from the user to the 675 // netstack representation taking any addresses into account. 676 func BytesToIPAddress(addr []byte) tcpip.Address { 677 if bytes.Equal(addr, make([]byte, 4)) || bytes.Equal(addr, make([]byte, 16)) { 678 return "" 679 } 680 return tcpip.Address(addr) 681 } 682 683 // AddressAndFamily reads an sockaddr struct from the given address and 684 // converts it to the FullAddress format. It supports AF_UNIX, AF_INET, 685 // AF_INET6, and AF_PACKET addresses. 686 // 687 // AddressAndFamily returns an address and its family. 688 func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) { 689 // Make sure we have at least 2 bytes for the address family. 690 if len(addr) < 2 { 691 return tcpip.FullAddress{}, 0, syserr.ErrInvalidArgument 692 } 693 694 // Get the rest of the fields based on the address family. 695 switch family := hostarch.ByteOrder.Uint16(addr); family { 696 case linux.AF_UNIX: 697 path := addr[2:] 698 if len(path) > linux.UnixPathMax { 699 return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument 700 } 701 // Drop the terminating NUL (if one exists) and everything after 702 // it for filesystem (non-abstract) addresses. 703 if len(path) > 0 && path[0] != 0 { 704 if n := bytes.IndexByte(path[1:], 0); n >= 0 { 705 path = path[:n+1] 706 } 707 } 708 return tcpip.FullAddress{ 709 Addr: tcpip.Address(path), 710 }, family, nil 711 712 case linux.AF_INET: 713 var a linux.SockAddrInet 714 if len(addr) < sockAddrInetSize { 715 return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument 716 } 717 a.UnmarshalUnsafe(addr[:sockAddrInetSize]) 718 719 out := tcpip.FullAddress{ 720 Addr: BytesToIPAddress(a.Addr[:]), 721 Port: Ntohs(a.Port), 722 } 723 return out, family, nil 724 725 case linux.AF_INET6: 726 var a linux.SockAddrInet6 727 if len(addr) < sockAddrInet6Size { 728 return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument 729 } 730 a.UnmarshalUnsafe(addr[:sockAddrInet6Size]) 731 732 out := tcpip.FullAddress{ 733 Addr: BytesToIPAddress(a.Addr[:]), 734 Port: Ntohs(a.Port), 735 } 736 if isLinkLocal(out.Addr) { 737 out.NIC = tcpip.NICID(a.Scope_id) 738 } 739 return out, family, nil 740 741 case linux.AF_PACKET: 742 var a linux.SockAddrLink 743 if len(addr) < sockAddrLinkSize { 744 return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument 745 } 746 a.UnmarshalUnsafe(addr[:sockAddrLinkSize]) 747 if a.Family != linux.AF_PACKET || a.HardwareAddrLen != header.EthernetAddressSize { 748 return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument 749 } 750 751 return tcpip.FullAddress{ 752 NIC: tcpip.NICID(a.InterfaceIndex), 753 Addr: tcpip.Address(a.HardwareAddr[:header.EthernetAddressSize]), 754 }, family, nil 755 756 case linux.AF_UNSPEC: 757 return tcpip.FullAddress{}, family, nil 758 759 default: 760 return tcpip.FullAddress{}, 0, syserr.ErrAddressFamilyNotSupported 761 } 762 }