github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/transport/udp/endpoint.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package udp
    16  
    17  import (
    18  	"io"
    19  	"sync/atomic"
    20  	"time"
    21  
    22  	"github.com/SagerNet/gvisor/pkg/sync"
    23  	"github.com/SagerNet/gvisor/pkg/tcpip"
    24  	"github.com/SagerNet/gvisor/pkg/tcpip/buffer"
    25  	"github.com/SagerNet/gvisor/pkg/tcpip/header"
    26  	"github.com/SagerNet/gvisor/pkg/tcpip/ports"
    27  	"github.com/SagerNet/gvisor/pkg/tcpip/stack"
    28  	"github.com/SagerNet/gvisor/pkg/waiter"
    29  )
    30  
    31  // +stateify savable
    32  type udpPacket struct {
    33  	udpPacketEntry
    34  	senderAddress      tcpip.FullAddress
    35  	destinationAddress tcpip.FullAddress
    36  	packetInfo         tcpip.IPPacketInfo
    37  	data               buffer.VectorisedView `state:".(buffer.VectorisedView)"`
    38  	receivedAt         time.Time             `state:".(int64)"`
    39  	// tos stores either the receiveTOS or receiveTClass value.
    40  	tos uint8
    41  }
    42  
    43  // EndpointState represents the state of a UDP endpoint.
    44  type EndpointState tcpip.EndpointState
    45  
    46  // Endpoint states. Note that are represented in a netstack-specific manner and
    47  // may not be meaningful externally. Specifically, they need to be translated to
    48  // Linux's representation for these states if presented to userspace.
    49  const (
    50  	_ EndpointState = iota
    51  	StateInitial
    52  	StateBound
    53  	StateConnected
    54  	StateClosed
    55  )
    56  
    57  // String implements fmt.Stringer.
    58  func (s EndpointState) String() string {
    59  	switch s {
    60  	case StateInitial:
    61  		return "INITIAL"
    62  	case StateBound:
    63  		return "BOUND"
    64  	case StateConnected:
    65  		return "CONNECTING"
    66  	case StateClosed:
    67  		return "CLOSED"
    68  	default:
    69  		return "UNKNOWN"
    70  	}
    71  }
    72  
    73  // endpoint represents a UDP endpoint. This struct serves as the interface
    74  // between users of the endpoint and the protocol implementation; it is legal to
    75  // have concurrent goroutines make calls into the endpoint, they are properly
    76  // synchronized.
    77  //
    78  // It implements tcpip.Endpoint.
    79  //
    80  // +stateify savable
    81  type endpoint struct {
    82  	stack.TransportEndpointInfo
    83  	tcpip.DefaultSocketOptionsHandler
    84  
    85  	// The following fields are initialized at creation time and do not
    86  	// change throughout the lifetime of the endpoint.
    87  	stack       *stack.Stack `state:"manual"`
    88  	waiterQueue *waiter.Queue
    89  	uniqueID    uint64
    90  
    91  	// The following fields are used to manage the receive queue, and are
    92  	// protected by rcvMu.
    93  	rcvMu      sync.Mutex `state:"nosave"`
    94  	rcvReady   bool
    95  	rcvList    udpPacketList
    96  	rcvBufSize int
    97  	rcvClosed  bool
    98  
    99  	// The following fields are protected by the mu mutex.
   100  	mu sync.RWMutex `state:"nosave"`
   101  	// state must be read/set using the EndpointState()/setEndpointState()
   102  	// methods.
   103  	state          uint32
   104  	route          *stack.Route `state:"manual"`
   105  	dstPort        uint16
   106  	ttl            uint8
   107  	multicastTTL   uint8
   108  	multicastAddr  tcpip.Address
   109  	multicastNICID tcpip.NICID
   110  	portFlags      ports.Flags
   111  
   112  	lastErrorMu sync.Mutex `state:"nosave"`
   113  	lastError   tcpip.Error
   114  
   115  	// Values used to reserve a port or register a transport endpoint.
   116  	// (which ever happens first).
   117  	boundBindToDevice tcpip.NICID
   118  	boundPortFlags    ports.Flags
   119  
   120  	// sendTOS represents IPv4 TOS or IPv6 TrafficClass,
   121  	// applied while sending packets. Defaults to 0 as on Linux.
   122  	sendTOS uint8
   123  
   124  	// shutdownFlags represent the current shutdown state of the endpoint.
   125  	shutdownFlags tcpip.ShutdownFlags
   126  
   127  	// multicastMemberships that need to be remvoed when the endpoint is
   128  	// closed. Protected by the mu mutex.
   129  	multicastMemberships map[multicastMembership]struct{}
   130  
   131  	// effectiveNetProtos contains the network protocols actually in use. In
   132  	// most cases it will only contain "netProto", but in cases like IPv6
   133  	// endpoints with v6only set to false, this could include multiple
   134  	// protocols (e.g., IPv6 and IPv4) or a single different protocol (e.g.,
   135  	// IPv4 when IPv6 endpoint is bound or connected to an IPv4 mapped
   136  	// address).
   137  	effectiveNetProtos []tcpip.NetworkProtocolNumber
   138  
   139  	// TODO(b/142022063): Add ability to save and restore per endpoint stats.
   140  	stats tcpip.TransportEndpointStats `state:"nosave"`
   141  
   142  	// owner is used to get uid and gid of the packet.
   143  	owner tcpip.PacketOwner
   144  
   145  	// ops is used to get socket level options.
   146  	ops tcpip.SocketOptions
   147  
   148  	// frozen indicates if the packets should be delivered to the endpoint
   149  	// during restore.
   150  	frozen bool
   151  }
   152  
   153  // +stateify savable
   154  type multicastMembership struct {
   155  	nicID         tcpip.NICID
   156  	multicastAddr tcpip.Address
   157  }
   158  
   159  func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) *endpoint {
   160  	e := &endpoint{
   161  		stack: s,
   162  		TransportEndpointInfo: stack.TransportEndpointInfo{
   163  			NetProto:   netProto,
   164  			TransProto: header.UDPProtocolNumber,
   165  		},
   166  		waiterQueue: waiterQueue,
   167  		// RFC 1075 section 5.4 recommends a TTL of 1 for membership
   168  		// requests.
   169  		//
   170  		// RFC 5135 4.2.1 appears to assume that IGMP messages have a
   171  		// TTL of 1.
   172  		//
   173  		// RFC 5135 Appendix A defines TTL=1: A multicast source that
   174  		// wants its traffic to not traverse a router (e.g., leave a
   175  		// home network) may find it useful to send traffic with IP
   176  		// TTL=1.
   177  		//
   178  		// Linux defaults to TTL=1.
   179  		multicastTTL:         1,
   180  		multicastMemberships: make(map[multicastMembership]struct{}),
   181  		state:                uint32(StateInitial),
   182  		uniqueID:             s.UniqueID(),
   183  	}
   184  	e.ops.InitHandler(e, e.stack, tcpip.GetStackSendBufferLimits, tcpip.GetStackReceiveBufferLimits)
   185  	e.ops.SetMulticastLoop(true)
   186  	e.ops.SetSendBufferSize(32*1024, false /* notify */)
   187  	e.ops.SetReceiveBufferSize(32*1024, false /* notify */)
   188  
   189  	// Override with stack defaults.
   190  	var ss tcpip.SendBufferSizeOption
   191  	if err := s.Option(&ss); err == nil {
   192  		e.ops.SetSendBufferSize(int64(ss.Default), false /* notify */)
   193  	}
   194  
   195  	var rs tcpip.ReceiveBufferSizeOption
   196  	if err := s.Option(&rs); err == nil {
   197  		e.ops.SetReceiveBufferSize(int64(rs.Default), false /* notify */)
   198  	}
   199  
   200  	return e
   201  }
   202  
   203  // setEndpointState updates the state of the endpoint to state atomically. This
   204  // method is unexported as the only place we should update the state is in this
   205  // package but we allow the state to be read freely without holding e.mu.
   206  //
   207  // Precondition: e.mu must be held to call this method.
   208  func (e *endpoint) setEndpointState(state EndpointState) {
   209  	atomic.StoreUint32(&e.state, uint32(state))
   210  }
   211  
   212  // EndpointState() returns the current state of the endpoint.
   213  func (e *endpoint) EndpointState() EndpointState {
   214  	return EndpointState(atomic.LoadUint32(&e.state))
   215  }
   216  
   217  // UniqueID implements stack.TransportEndpoint.
   218  func (e *endpoint) UniqueID() uint64 {
   219  	return e.uniqueID
   220  }
   221  
   222  func (e *endpoint) LastError() tcpip.Error {
   223  	e.lastErrorMu.Lock()
   224  	defer e.lastErrorMu.Unlock()
   225  
   226  	err := e.lastError
   227  	e.lastError = nil
   228  	return err
   229  }
   230  
   231  // UpdateLastError implements tcpip.SocketOptionsHandler.
   232  func (e *endpoint) UpdateLastError(err tcpip.Error) {
   233  	e.lastErrorMu.Lock()
   234  	e.lastError = err
   235  	e.lastErrorMu.Unlock()
   236  }
   237  
   238  // Abort implements stack.TransportEndpoint.
   239  func (e *endpoint) Abort() {
   240  	e.Close()
   241  }
   242  
   243  // Close puts the endpoint in a closed state and frees all resources
   244  // associated with it.
   245  func (e *endpoint) Close() {
   246  	e.mu.Lock()
   247  	e.shutdownFlags = tcpip.ShutdownRead | tcpip.ShutdownWrite
   248  
   249  	switch e.EndpointState() {
   250  	case StateBound, StateConnected:
   251  		e.stack.UnregisterTransportEndpoint(e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.boundPortFlags, e.boundBindToDevice)
   252  		portRes := ports.Reservation{
   253  			Networks:     e.effectiveNetProtos,
   254  			Transport:    ProtocolNumber,
   255  			Addr:         e.ID.LocalAddress,
   256  			Port:         e.ID.LocalPort,
   257  			Flags:        e.boundPortFlags,
   258  			BindToDevice: e.boundBindToDevice,
   259  			Dest:         tcpip.FullAddress{},
   260  		}
   261  		e.stack.ReleasePort(portRes)
   262  		e.boundBindToDevice = 0
   263  		e.boundPortFlags = ports.Flags{}
   264  	}
   265  
   266  	for mem := range e.multicastMemberships {
   267  		e.stack.LeaveGroup(e.NetProto, mem.nicID, mem.multicastAddr)
   268  	}
   269  	e.multicastMemberships = make(map[multicastMembership]struct{})
   270  
   271  	// Close the receive list and drain it.
   272  	e.rcvMu.Lock()
   273  	e.rcvClosed = true
   274  	e.rcvBufSize = 0
   275  	for !e.rcvList.Empty() {
   276  		p := e.rcvList.Front()
   277  		e.rcvList.Remove(p)
   278  	}
   279  	e.rcvMu.Unlock()
   280  
   281  	if e.route != nil {
   282  		e.route.Release()
   283  		e.route = nil
   284  	}
   285  
   286  	// Update the state.
   287  	e.setEndpointState(StateClosed)
   288  
   289  	e.mu.Unlock()
   290  
   291  	e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents)
   292  }
   293  
   294  // ModerateRecvBuf implements tcpip.Endpoint.
   295  func (*endpoint) ModerateRecvBuf(int) {}
   296  
   297  // Read implements tcpip.Endpoint.
   298  func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, tcpip.Error) {
   299  	if err := e.LastError(); err != nil {
   300  		return tcpip.ReadResult{}, err
   301  	}
   302  
   303  	e.rcvMu.Lock()
   304  
   305  	if e.rcvList.Empty() {
   306  		var err tcpip.Error = &tcpip.ErrWouldBlock{}
   307  		if e.rcvClosed {
   308  			e.stats.ReadErrors.ReadClosed.Increment()
   309  			err = &tcpip.ErrClosedForReceive{}
   310  		}
   311  		e.rcvMu.Unlock()
   312  		return tcpip.ReadResult{}, err
   313  	}
   314  
   315  	p := e.rcvList.Front()
   316  	if !opts.Peek {
   317  		e.rcvList.Remove(p)
   318  		e.rcvBufSize -= p.data.Size()
   319  	}
   320  	e.rcvMu.Unlock()
   321  
   322  	// Control Messages
   323  	cm := tcpip.ControlMessages{
   324  		HasTimestamp: true,
   325  		Timestamp:    p.receivedAt.UnixNano(),
   326  	}
   327  	if e.ops.GetReceiveTOS() {
   328  		cm.HasTOS = true
   329  		cm.TOS = p.tos
   330  	}
   331  	if e.ops.GetReceiveTClass() {
   332  		cm.HasTClass = true
   333  		// Although TClass is an 8-bit value it's read in the CMsg as a uint32.
   334  		cm.TClass = uint32(p.tos)
   335  	}
   336  	if e.ops.GetReceivePacketInfo() {
   337  		cm.HasIPPacketInfo = true
   338  		cm.PacketInfo = p.packetInfo
   339  	}
   340  	if e.ops.GetReceiveOriginalDstAddress() {
   341  		cm.HasOriginalDstAddress = true
   342  		cm.OriginalDstAddress = p.destinationAddress
   343  	}
   344  
   345  	// Read Result
   346  	res := tcpip.ReadResult{
   347  		Total:           p.data.Size(),
   348  		ControlMessages: cm,
   349  	}
   350  	if opts.NeedRemoteAddr {
   351  		res.RemoteAddr = p.senderAddress
   352  	}
   353  
   354  	n, err := p.data.ReadTo(dst, opts.Peek)
   355  	if n == 0 && err != nil {
   356  		return res, &tcpip.ErrBadBuffer{}
   357  	}
   358  	res.Count = n
   359  	return res, nil
   360  }
   361  
   362  // prepareForWrite prepares the endpoint for sending data. In particular, it
   363  // binds it if it's still in the initial state. To do so, it must first
   364  // reacquire the mutex in exclusive mode.
   365  //
   366  // Returns true for retry if preparation should be retried.
   367  // +checklocks:e.mu
   368  func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err tcpip.Error) {
   369  	switch e.EndpointState() {
   370  	case StateInitial:
   371  	case StateConnected:
   372  		return false, nil
   373  
   374  	case StateBound:
   375  		if to == nil {
   376  			return false, &tcpip.ErrDestinationRequired{}
   377  		}
   378  		return false, nil
   379  	default:
   380  		return false, &tcpip.ErrInvalidEndpointState{}
   381  	}
   382  
   383  	e.mu.RUnlock()
   384  	e.mu.Lock()
   385  	defer e.mu.DowngradeLock()
   386  
   387  	// The state changed when we released the shared locked and re-acquired
   388  	// it in exclusive mode. Try again.
   389  	if e.EndpointState() != StateInitial {
   390  		return true, nil
   391  	}
   392  
   393  	// The state is still 'initial', so try to bind the endpoint.
   394  	if err := e.bindLocked(tcpip.FullAddress{}); err != nil {
   395  		return false, err
   396  	}
   397  
   398  	return true, nil
   399  }
   400  
   401  // connectRoute establishes a route to the specified interface or the
   402  // configured multicast interface if no interface is specified and the
   403  // specified address is a multicast address.
   404  func (e *endpoint) connectRoute(nicID tcpip.NICID, addr tcpip.FullAddress, netProto tcpip.NetworkProtocolNumber) (*stack.Route, tcpip.NICID, tcpip.Error) {
   405  	localAddr := e.ID.LocalAddress
   406  	if e.isBroadcastOrMulticast(nicID, netProto, localAddr) {
   407  		// A packet can only originate from a unicast address (i.e., an interface).
   408  		localAddr = ""
   409  	}
   410  
   411  	if header.IsV4MulticastAddress(addr.Addr) || header.IsV6MulticastAddress(addr.Addr) {
   412  		if nicID == 0 {
   413  			nicID = e.multicastNICID
   414  		}
   415  		if localAddr == "" && nicID == 0 {
   416  			localAddr = e.multicastAddr
   417  		}
   418  	}
   419  
   420  	// Find a route to the desired destination.
   421  	r, err := e.stack.FindRoute(nicID, localAddr, addr.Addr, netProto, e.ops.GetMulticastLoop())
   422  	if err != nil {
   423  		return nil, 0, err
   424  	}
   425  	return r, nicID, nil
   426  }
   427  
   428  // Write writes data to the endpoint's peer. This method does not block
   429  // if the data cannot be written.
   430  func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) {
   431  	n, err := e.write(p, opts)
   432  	switch err.(type) {
   433  	case nil:
   434  		e.stats.PacketsSent.Increment()
   435  	case *tcpip.ErrMessageTooLong, *tcpip.ErrInvalidOptionValue:
   436  		e.stats.WriteErrors.InvalidArgs.Increment()
   437  	case *tcpip.ErrClosedForSend:
   438  		e.stats.WriteErrors.WriteClosed.Increment()
   439  	case *tcpip.ErrInvalidEndpointState:
   440  		e.stats.WriteErrors.InvalidEndpointState.Increment()
   441  	case *tcpip.ErrNoRoute, *tcpip.ErrBroadcastDisabled, *tcpip.ErrNetworkUnreachable:
   442  		// Errors indicating any problem with IP routing of the packet.
   443  		e.stats.SendErrors.NoRoute.Increment()
   444  	default:
   445  		// For all other errors when writing to the network layer.
   446  		e.stats.SendErrors.SendToNetworkFailed.Increment()
   447  	}
   448  	return n, err
   449  }
   450  
   451  func (e *endpoint) buildUDPPacketInfo(p tcpip.Payloader, opts tcpip.WriteOptions) (udpPacketInfo, tcpip.Error) {
   452  	e.mu.RLock()
   453  	defer e.mu.RUnlock()
   454  
   455  	// If we've shutdown with SHUT_WR we are in an invalid state for sending.
   456  	if e.shutdownFlags&tcpip.ShutdownWrite != 0 {
   457  		return udpPacketInfo{}, &tcpip.ErrClosedForSend{}
   458  	}
   459  
   460  	// Prepare for write.
   461  	for {
   462  		retry, err := e.prepareForWrite(opts.To)
   463  		if err != nil {
   464  			return udpPacketInfo{}, err
   465  		}
   466  
   467  		if !retry {
   468  			break
   469  		}
   470  	}
   471  
   472  	route := e.route
   473  	dstPort := e.dstPort
   474  	if opts.To != nil {
   475  		// Reject destination address if it goes through a different
   476  		// NIC than the endpoint was bound to.
   477  		nicID := opts.To.NIC
   478  		if nicID == 0 {
   479  			nicID = tcpip.NICID(e.ops.GetBindToDevice())
   480  		}
   481  		if e.BindNICID != 0 {
   482  			if nicID != 0 && nicID != e.BindNICID {
   483  				return udpPacketInfo{}, &tcpip.ErrNoRoute{}
   484  			}
   485  
   486  			nicID = e.BindNICID
   487  		}
   488  
   489  		if opts.To.Port == 0 {
   490  			// Port 0 is an invalid port to send to.
   491  			return udpPacketInfo{}, &tcpip.ErrInvalidEndpointState{}
   492  		}
   493  
   494  		dst, netProto, err := e.checkV4MappedLocked(*opts.To)
   495  		if err != nil {
   496  			return udpPacketInfo{}, err
   497  		}
   498  
   499  		r, _, err := e.connectRoute(nicID, dst, netProto)
   500  		if err != nil {
   501  			return udpPacketInfo{}, err
   502  		}
   503  		defer r.Release()
   504  
   505  		route = r
   506  		dstPort = dst.Port
   507  	}
   508  
   509  	if !e.ops.GetBroadcast() && route.IsOutboundBroadcast() {
   510  		return udpPacketInfo{}, &tcpip.ErrBroadcastDisabled{}
   511  	}
   512  
   513  	v := make([]byte, p.Len())
   514  	if _, err := io.ReadFull(p, v); err != nil {
   515  		return udpPacketInfo{}, &tcpip.ErrBadBuffer{}
   516  	}
   517  	if len(v) > header.UDPMaximumPacketSize {
   518  		// Payload can't possibly fit in a packet.
   519  		so := e.SocketOptions()
   520  		if so.GetRecvError() {
   521  			so.QueueLocalErr(
   522  				&tcpip.ErrMessageTooLong{},
   523  				route.NetProto(),
   524  				header.UDPMaximumPacketSize,
   525  				tcpip.FullAddress{
   526  					NIC:  route.NICID(),
   527  					Addr: route.RemoteAddress(),
   528  					Port: dstPort,
   529  				},
   530  				v,
   531  			)
   532  		}
   533  		return udpPacketInfo{}, &tcpip.ErrMessageTooLong{}
   534  	}
   535  
   536  	ttl := e.ttl
   537  	useDefaultTTL := ttl == 0
   538  	if header.IsV4MulticastAddress(route.RemoteAddress()) || header.IsV6MulticastAddress(route.RemoteAddress()) {
   539  		ttl = e.multicastTTL
   540  		// Multicast allows a 0 TTL.
   541  		useDefaultTTL = false
   542  	}
   543  
   544  	return udpPacketInfo{
   545  		route:         route,
   546  		data:          buffer.View(v),
   547  		localPort:     e.ID.LocalPort,
   548  		remotePort:    dstPort,
   549  		ttl:           ttl,
   550  		useDefaultTTL: useDefaultTTL,
   551  		tos:           e.sendTOS,
   552  		owner:         e.owner,
   553  		noChecksum:    e.SocketOptions().GetNoChecksum(),
   554  	}, nil
   555  }
   556  
   557  func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) {
   558  	if err := e.LastError(); err != nil {
   559  		return 0, err
   560  	}
   561  
   562  	// MSG_MORE is unimplemented. (This also means that MSG_EOR is a no-op.)
   563  	if opts.More {
   564  		return 0, &tcpip.ErrInvalidOptionValue{}
   565  	}
   566  
   567  	// Do not hold lock when sending as loopback is synchronous and if the UDP
   568  	// datagram ends up generating an ICMP response then it can result in a
   569  	// deadlock where the ICMP response handling ends up acquiring this endpoint's
   570  	// mutex using e.mu.RLock() in endpoint.HandleControlPacket which can cause a
   571  	// deadlock if another caller is trying to acquire e.mu in exclusive mode w/
   572  	// e.mu.Lock(). Since e.mu.Lock() prevents any new read locks to ensure the
   573  	// lock can be eventually acquired.
   574  	//
   575  	// See: https://golang.org/pkg/sync/#RWMutex for details on why recursive read
   576  	// locking is prohibited.
   577  	u, err := e.buildUDPPacketInfo(p, opts)
   578  	if err != nil {
   579  		return 0, err
   580  	}
   581  	n, err := u.send()
   582  	if err != nil {
   583  		return 0, err
   584  	}
   585  	return int64(n), nil
   586  }
   587  
   588  // OnReuseAddressSet implements tcpip.SocketOptionsHandler.
   589  func (e *endpoint) OnReuseAddressSet(v bool) {
   590  	e.mu.Lock()
   591  	e.portFlags.MostRecent = v
   592  	e.mu.Unlock()
   593  }
   594  
   595  // OnReusePortSet implements tcpip.SocketOptionsHandler.
   596  func (e *endpoint) OnReusePortSet(v bool) {
   597  	e.mu.Lock()
   598  	e.portFlags.LoadBalanced = v
   599  	e.mu.Unlock()
   600  }
   601  
   602  // SetSockOptInt implements tcpip.Endpoint.
   603  func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error {
   604  	switch opt {
   605  	case tcpip.MTUDiscoverOption:
   606  		// Return not supported if the value is not disabling path
   607  		// MTU discovery.
   608  		if v != tcpip.PMTUDiscoveryDont {
   609  			return &tcpip.ErrNotSupported{}
   610  		}
   611  
   612  	case tcpip.MulticastTTLOption:
   613  		e.mu.Lock()
   614  		e.multicastTTL = uint8(v)
   615  		e.mu.Unlock()
   616  
   617  	case tcpip.TTLOption:
   618  		e.mu.Lock()
   619  		e.ttl = uint8(v)
   620  		e.mu.Unlock()
   621  
   622  	case tcpip.IPv4TOSOption:
   623  		e.mu.Lock()
   624  		e.sendTOS = uint8(v)
   625  		e.mu.Unlock()
   626  
   627  	case tcpip.IPv6TrafficClassOption:
   628  		e.mu.Lock()
   629  		e.sendTOS = uint8(v)
   630  		e.mu.Unlock()
   631  	}
   632  
   633  	return nil
   634  }
   635  
   636  var _ tcpip.SocketOptionsHandler = (*endpoint)(nil)
   637  
   638  // HasNIC implements tcpip.SocketOptionsHandler.
   639  func (e *endpoint) HasNIC(id int32) bool {
   640  	return e.stack.HasNIC(tcpip.NICID(id))
   641  }
   642  
   643  // SetSockOpt implements tcpip.Endpoint.
   644  func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error {
   645  	switch v := opt.(type) {
   646  	case *tcpip.MulticastInterfaceOption:
   647  		e.mu.Lock()
   648  		defer e.mu.Unlock()
   649  
   650  		fa := tcpip.FullAddress{Addr: v.InterfaceAddr}
   651  		fa, netProto, err := e.checkV4MappedLocked(fa)
   652  		if err != nil {
   653  			return err
   654  		}
   655  		nic := v.NIC
   656  		addr := fa.Addr
   657  
   658  		if nic == 0 && addr == "" {
   659  			e.multicastAddr = ""
   660  			e.multicastNICID = 0
   661  			break
   662  		}
   663  
   664  		if nic != 0 {
   665  			if !e.stack.CheckNIC(nic) {
   666  				return &tcpip.ErrBadLocalAddress{}
   667  			}
   668  		} else {
   669  			nic = e.stack.CheckLocalAddress(0, netProto, addr)
   670  			if nic == 0 {
   671  				return &tcpip.ErrBadLocalAddress{}
   672  			}
   673  		}
   674  
   675  		if e.BindNICID != 0 && e.BindNICID != nic {
   676  			return &tcpip.ErrInvalidEndpointState{}
   677  		}
   678  
   679  		e.multicastNICID = nic
   680  		e.multicastAddr = addr
   681  
   682  	case *tcpip.AddMembershipOption:
   683  		if !header.IsV4MulticastAddress(v.MulticastAddr) && !header.IsV6MulticastAddress(v.MulticastAddr) {
   684  			return &tcpip.ErrInvalidOptionValue{}
   685  		}
   686  
   687  		nicID := v.NIC
   688  
   689  		if v.InterfaceAddr.Unspecified() {
   690  			if nicID == 0 {
   691  				if r, err := e.stack.FindRoute(0, "", v.MulticastAddr, e.NetProto, false /* multicastLoop */); err == nil {
   692  					nicID = r.NICID()
   693  					r.Release()
   694  				}
   695  			}
   696  		} else {
   697  			nicID = e.stack.CheckLocalAddress(nicID, e.NetProto, v.InterfaceAddr)
   698  		}
   699  		if nicID == 0 {
   700  			return &tcpip.ErrUnknownDevice{}
   701  		}
   702  
   703  		memToInsert := multicastMembership{nicID: nicID, multicastAddr: v.MulticastAddr}
   704  
   705  		e.mu.Lock()
   706  		defer e.mu.Unlock()
   707  
   708  		if _, ok := e.multicastMemberships[memToInsert]; ok {
   709  			return &tcpip.ErrPortInUse{}
   710  		}
   711  
   712  		if err := e.stack.JoinGroup(e.NetProto, nicID, v.MulticastAddr); err != nil {
   713  			return err
   714  		}
   715  
   716  		e.multicastMemberships[memToInsert] = struct{}{}
   717  
   718  	case *tcpip.RemoveMembershipOption:
   719  		if !header.IsV4MulticastAddress(v.MulticastAddr) && !header.IsV6MulticastAddress(v.MulticastAddr) {
   720  			return &tcpip.ErrInvalidOptionValue{}
   721  		}
   722  
   723  		nicID := v.NIC
   724  		if v.InterfaceAddr.Unspecified() {
   725  			if nicID == 0 {
   726  				if r, err := e.stack.FindRoute(0, "", v.MulticastAddr, e.NetProto, false /* multicastLoop */); err == nil {
   727  					nicID = r.NICID()
   728  					r.Release()
   729  				}
   730  			}
   731  		} else {
   732  			nicID = e.stack.CheckLocalAddress(nicID, e.NetProto, v.InterfaceAddr)
   733  		}
   734  		if nicID == 0 {
   735  			return &tcpip.ErrUnknownDevice{}
   736  		}
   737  
   738  		memToRemove := multicastMembership{nicID: nicID, multicastAddr: v.MulticastAddr}
   739  
   740  		e.mu.Lock()
   741  		defer e.mu.Unlock()
   742  
   743  		if _, ok := e.multicastMemberships[memToRemove]; !ok {
   744  			return &tcpip.ErrBadLocalAddress{}
   745  		}
   746  
   747  		if err := e.stack.LeaveGroup(e.NetProto, nicID, v.MulticastAddr); err != nil {
   748  			return err
   749  		}
   750  
   751  		delete(e.multicastMemberships, memToRemove)
   752  
   753  	case *tcpip.SocketDetachFilterOption:
   754  		return nil
   755  	}
   756  	return nil
   757  }
   758  
   759  // GetSockOptInt implements tcpip.Endpoint.
   760  func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) {
   761  	switch opt {
   762  	case tcpip.IPv4TOSOption:
   763  		e.mu.RLock()
   764  		v := int(e.sendTOS)
   765  		e.mu.RUnlock()
   766  		return v, nil
   767  
   768  	case tcpip.IPv6TrafficClassOption:
   769  		e.mu.RLock()
   770  		v := int(e.sendTOS)
   771  		e.mu.RUnlock()
   772  		return v, nil
   773  
   774  	case tcpip.MTUDiscoverOption:
   775  		// The only supported setting is path MTU discovery disabled.
   776  		return tcpip.PMTUDiscoveryDont, nil
   777  
   778  	case tcpip.MulticastTTLOption:
   779  		e.mu.Lock()
   780  		v := int(e.multicastTTL)
   781  		e.mu.Unlock()
   782  		return v, nil
   783  
   784  	case tcpip.ReceiveQueueSizeOption:
   785  		v := 0
   786  		e.rcvMu.Lock()
   787  		if !e.rcvList.Empty() {
   788  			p := e.rcvList.Front()
   789  			v = p.data.Size()
   790  		}
   791  		e.rcvMu.Unlock()
   792  		return v, nil
   793  
   794  	case tcpip.TTLOption:
   795  		e.mu.Lock()
   796  		v := int(e.ttl)
   797  		e.mu.Unlock()
   798  		return v, nil
   799  
   800  	default:
   801  		return -1, &tcpip.ErrUnknownProtocolOption{}
   802  	}
   803  }
   804  
   805  // GetSockOpt implements tcpip.Endpoint.
   806  func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error {
   807  	switch o := opt.(type) {
   808  	case *tcpip.MulticastInterfaceOption:
   809  		e.mu.Lock()
   810  		*o = tcpip.MulticastInterfaceOption{
   811  			NIC:           e.multicastNICID,
   812  			InterfaceAddr: e.multicastAddr,
   813  		}
   814  		e.mu.Unlock()
   815  
   816  	default:
   817  		return &tcpip.ErrUnknownProtocolOption{}
   818  	}
   819  	return nil
   820  }
   821  
   822  // udpPacketInfo contains all information required to send a UDP packet.
   823  //
   824  // This should be used as a value-only type, which exists in order to simplify
   825  // return value syntax. It should not be exported or extended.
   826  type udpPacketInfo struct {
   827  	route         *stack.Route
   828  	data          buffer.View
   829  	localPort     uint16
   830  	remotePort    uint16
   831  	ttl           uint8
   832  	useDefaultTTL bool
   833  	tos           uint8
   834  	owner         tcpip.PacketOwner
   835  	noChecksum    bool
   836  }
   837  
   838  // send sends the given packet.
   839  func (u *udpPacketInfo) send() (int, tcpip.Error) {
   840  	vv := u.data.ToVectorisedView()
   841  	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
   842  		ReserveHeaderBytes: header.UDPMinimumSize + int(u.route.MaxHeaderLength()),
   843  		Data:               vv,
   844  	})
   845  	pkt.Owner = u.owner
   846  
   847  	// Initialize the UDP header.
   848  	udp := header.UDP(pkt.TransportHeader().Push(header.UDPMinimumSize))
   849  	pkt.TransportProtocolNumber = ProtocolNumber
   850  
   851  	length := uint16(pkt.Size())
   852  	udp.Encode(&header.UDPFields{
   853  		SrcPort: u.localPort,
   854  		DstPort: u.remotePort,
   855  		Length:  length,
   856  	})
   857  
   858  	// Set the checksum field unless TX checksum offload is enabled.
   859  	// On IPv4, UDP checksum is optional, and a zero value indicates the
   860  	// transmitter skipped the checksum generation (RFC768).
   861  	// On IPv6, UDP checksum is not optional (RFC2460 Section 8.1).
   862  	if u.route.RequiresTXTransportChecksum() &&
   863  		(!u.noChecksum || u.route.NetProto() == header.IPv6ProtocolNumber) {
   864  		xsum := u.route.PseudoHeaderChecksum(ProtocolNumber, length)
   865  		for _, v := range vv.Views() {
   866  			xsum = header.Checksum(v, xsum)
   867  		}
   868  		udp.SetChecksum(^udp.CalculateChecksum(xsum))
   869  	}
   870  
   871  	if u.useDefaultTTL {
   872  		u.ttl = u.route.DefaultTTL()
   873  	}
   874  	if err := u.route.WritePacket(stack.NetworkHeaderParams{
   875  		Protocol: ProtocolNumber,
   876  		TTL:      u.ttl,
   877  		TOS:      u.tos,
   878  	}, pkt); err != nil {
   879  		u.route.Stats().UDP.PacketSendErrors.Increment()
   880  		return 0, err
   881  	}
   882  
   883  	// Track count of packets sent.
   884  	u.route.Stats().UDP.PacketsSent.Increment()
   885  	return len(u.data), nil
   886  }
   887  
   888  // checkV4MappedLocked determines the effective network protocol and converts
   889  // addr to its canonical form.
   890  func (e *endpoint) checkV4MappedLocked(addr tcpip.FullAddress) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, tcpip.Error) {
   891  	unwrapped, netProto, err := e.TransportEndpointInfo.AddrNetProtoLocked(addr, e.ops.GetV6Only())
   892  	if err != nil {
   893  		return tcpip.FullAddress{}, 0, err
   894  	}
   895  	return unwrapped, netProto, nil
   896  }
   897  
   898  // Disconnect implements tcpip.Endpoint.
   899  func (e *endpoint) Disconnect() tcpip.Error {
   900  	e.mu.Lock()
   901  	defer e.mu.Unlock()
   902  
   903  	if e.EndpointState() != StateConnected {
   904  		return nil
   905  	}
   906  	var (
   907  		id  stack.TransportEndpointID
   908  		btd tcpip.NICID
   909  	)
   910  
   911  	// We change this value below and we need the old value to unregister
   912  	// the endpoint.
   913  	boundPortFlags := e.boundPortFlags
   914  
   915  	// Exclude ephemerally bound endpoints.
   916  	if e.BindNICID != 0 || e.ID.LocalAddress == "" {
   917  		var err tcpip.Error
   918  		id = stack.TransportEndpointID{
   919  			LocalPort:    e.ID.LocalPort,
   920  			LocalAddress: e.ID.LocalAddress,
   921  		}
   922  		id, btd, err = e.registerWithStack(e.effectiveNetProtos, id)
   923  		if err != nil {
   924  			return err
   925  		}
   926  		e.setEndpointState(StateBound)
   927  		boundPortFlags = e.boundPortFlags
   928  	} else {
   929  		if e.ID.LocalPort != 0 {
   930  			// Release the ephemeral port.
   931  			portRes := ports.Reservation{
   932  				Networks:     e.effectiveNetProtos,
   933  				Transport:    ProtocolNumber,
   934  				Addr:         e.ID.LocalAddress,
   935  				Port:         e.ID.LocalPort,
   936  				Flags:        boundPortFlags,
   937  				BindToDevice: e.boundBindToDevice,
   938  				Dest:         tcpip.FullAddress{},
   939  			}
   940  			e.stack.ReleasePort(portRes)
   941  			e.boundPortFlags = ports.Flags{}
   942  		}
   943  		e.setEndpointState(StateInitial)
   944  	}
   945  
   946  	e.stack.UnregisterTransportEndpoint(e.effectiveNetProtos, ProtocolNumber, e.ID, e, boundPortFlags, e.boundBindToDevice)
   947  	e.ID = id
   948  	e.boundBindToDevice = btd
   949  	e.route.Release()
   950  	e.route = nil
   951  	e.dstPort = 0
   952  
   953  	return nil
   954  }
   955  
   956  // Connect connects the endpoint to its peer. Specifying a NIC is optional.
   957  func (e *endpoint) Connect(addr tcpip.FullAddress) tcpip.Error {
   958  	e.mu.Lock()
   959  	defer e.mu.Unlock()
   960  
   961  	nicID := addr.NIC
   962  	var localPort uint16
   963  	switch e.EndpointState() {
   964  	case StateInitial:
   965  	case StateBound, StateConnected:
   966  		localPort = e.ID.LocalPort
   967  		if e.BindNICID == 0 {
   968  			break
   969  		}
   970  
   971  		if nicID != 0 && nicID != e.BindNICID {
   972  			return &tcpip.ErrInvalidEndpointState{}
   973  		}
   974  
   975  		nicID = e.BindNICID
   976  	default:
   977  		return &tcpip.ErrInvalidEndpointState{}
   978  	}
   979  
   980  	addr, netProto, err := e.checkV4MappedLocked(addr)
   981  	if err != nil {
   982  		return err
   983  	}
   984  
   985  	r, nicID, err := e.connectRoute(nicID, addr, netProto)
   986  	if err != nil {
   987  		return err
   988  	}
   989  
   990  	id := stack.TransportEndpointID{
   991  		LocalAddress:  e.ID.LocalAddress,
   992  		LocalPort:     localPort,
   993  		RemotePort:    addr.Port,
   994  		RemoteAddress: r.RemoteAddress(),
   995  	}
   996  
   997  	if e.EndpointState() == StateInitial {
   998  		id.LocalAddress = r.LocalAddress()
   999  	}
  1000  
  1001  	// Even if we're connected, this endpoint can still be used to send
  1002  	// packets on a different network protocol, so we register both even if
  1003  	// v6only is set to false and this is an ipv6 endpoint.
  1004  	netProtos := []tcpip.NetworkProtocolNumber{netProto}
  1005  	if netProto == header.IPv6ProtocolNumber && !e.ops.GetV6Only() {
  1006  		netProtos = []tcpip.NetworkProtocolNumber{
  1007  			header.IPv4ProtocolNumber,
  1008  			header.IPv6ProtocolNumber,
  1009  		}
  1010  	}
  1011  
  1012  	oldPortFlags := e.boundPortFlags
  1013  
  1014  	id, btd, err := e.registerWithStack(netProtos, id)
  1015  	if err != nil {
  1016  		r.Release()
  1017  		return err
  1018  	}
  1019  
  1020  	// Remove the old registration.
  1021  	if e.ID.LocalPort != 0 {
  1022  		e.stack.UnregisterTransportEndpoint(e.effectiveNetProtos, ProtocolNumber, e.ID, e, oldPortFlags, e.boundBindToDevice)
  1023  	}
  1024  
  1025  	e.ID = id
  1026  	e.boundBindToDevice = btd
  1027  	if e.route != nil {
  1028  		// If the endpoint was already connected then make sure we release the
  1029  		// previous route.
  1030  		e.route.Release()
  1031  	}
  1032  	e.route = r
  1033  	e.dstPort = addr.Port
  1034  	e.RegisterNICID = nicID
  1035  	e.effectiveNetProtos = netProtos
  1036  
  1037  	e.setEndpointState(StateConnected)
  1038  
  1039  	e.rcvMu.Lock()
  1040  	e.rcvReady = true
  1041  	e.rcvMu.Unlock()
  1042  
  1043  	return nil
  1044  }
  1045  
  1046  // ConnectEndpoint is not supported.
  1047  func (*endpoint) ConnectEndpoint(tcpip.Endpoint) tcpip.Error {
  1048  	return &tcpip.ErrInvalidEndpointState{}
  1049  }
  1050  
  1051  // Shutdown closes the read and/or write end of the endpoint connection
  1052  // to its peer.
  1053  func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) tcpip.Error {
  1054  	e.mu.Lock()
  1055  	defer e.mu.Unlock()
  1056  
  1057  	// A socket in the bound state can still receive multicast messages,
  1058  	// so we need to notify waiters on shutdown.
  1059  	if state := e.EndpointState(); state != StateBound && state != StateConnected {
  1060  		return &tcpip.ErrNotConnected{}
  1061  	}
  1062  
  1063  	e.shutdownFlags |= flags
  1064  
  1065  	if flags&tcpip.ShutdownRead != 0 {
  1066  		e.rcvMu.Lock()
  1067  		wasClosed := e.rcvClosed
  1068  		e.rcvClosed = true
  1069  		e.rcvMu.Unlock()
  1070  
  1071  		if !wasClosed {
  1072  			e.waiterQueue.Notify(waiter.ReadableEvents)
  1073  		}
  1074  	}
  1075  
  1076  	return nil
  1077  }
  1078  
  1079  // Listen is not supported by UDP, it just fails.
  1080  func (*endpoint) Listen(int) tcpip.Error {
  1081  	return &tcpip.ErrNotSupported{}
  1082  }
  1083  
  1084  // Accept is not supported by UDP, it just fails.
  1085  func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, tcpip.Error) {
  1086  	return nil, nil, &tcpip.ErrNotSupported{}
  1087  }
  1088  
  1089  func (e *endpoint) registerWithStack(netProtos []tcpip.NetworkProtocolNumber, id stack.TransportEndpointID) (stack.TransportEndpointID, tcpip.NICID, tcpip.Error) {
  1090  	bindToDevice := tcpip.NICID(e.ops.GetBindToDevice())
  1091  	if e.ID.LocalPort == 0 {
  1092  		portRes := ports.Reservation{
  1093  			Networks:     netProtos,
  1094  			Transport:    ProtocolNumber,
  1095  			Addr:         id.LocalAddress,
  1096  			Port:         id.LocalPort,
  1097  			Flags:        e.portFlags,
  1098  			BindToDevice: bindToDevice,
  1099  			Dest:         tcpip.FullAddress{},
  1100  		}
  1101  		port, err := e.stack.ReservePort(e.stack.Rand(), portRes, nil /* testPort */)
  1102  		if err != nil {
  1103  			return id, bindToDevice, err
  1104  		}
  1105  		id.LocalPort = port
  1106  	}
  1107  	e.boundPortFlags = e.portFlags
  1108  
  1109  	err := e.stack.RegisterTransportEndpoint(netProtos, ProtocolNumber, id, e, e.boundPortFlags, bindToDevice)
  1110  	if err != nil {
  1111  		portRes := ports.Reservation{
  1112  			Networks:     netProtos,
  1113  			Transport:    ProtocolNumber,
  1114  			Addr:         id.LocalAddress,
  1115  			Port:         id.LocalPort,
  1116  			Flags:        e.boundPortFlags,
  1117  			BindToDevice: bindToDevice,
  1118  			Dest:         tcpip.FullAddress{},
  1119  		}
  1120  		e.stack.ReleasePort(portRes)
  1121  		e.boundPortFlags = ports.Flags{}
  1122  	}
  1123  	return id, bindToDevice, err
  1124  }
  1125  
  1126  func (e *endpoint) bindLocked(addr tcpip.FullAddress) tcpip.Error {
  1127  	// Don't allow binding once endpoint is not in the initial state
  1128  	// anymore.
  1129  	if e.EndpointState() != StateInitial {
  1130  		return &tcpip.ErrInvalidEndpointState{}
  1131  	}
  1132  
  1133  	addr, netProto, err := e.checkV4MappedLocked(addr)
  1134  	if err != nil {
  1135  		return err
  1136  	}
  1137  
  1138  	// Expand netProtos to include v4 and v6 if the caller is binding to a
  1139  	// wildcard (empty) address, and this is an IPv6 endpoint with v6only
  1140  	// set to false.
  1141  	netProtos := []tcpip.NetworkProtocolNumber{netProto}
  1142  	if netProto == header.IPv6ProtocolNumber && !e.ops.GetV6Only() && addr.Addr == "" {
  1143  		netProtos = []tcpip.NetworkProtocolNumber{
  1144  			header.IPv6ProtocolNumber,
  1145  			header.IPv4ProtocolNumber,
  1146  		}
  1147  	}
  1148  
  1149  	nicID := addr.NIC
  1150  	if len(addr.Addr) != 0 && !e.isBroadcastOrMulticast(addr.NIC, netProto, addr.Addr) {
  1151  		// A local unicast address was specified, verify that it's valid.
  1152  		nicID = e.stack.CheckLocalAddress(addr.NIC, netProto, addr.Addr)
  1153  		if nicID == 0 {
  1154  			return &tcpip.ErrBadLocalAddress{}
  1155  		}
  1156  	}
  1157  
  1158  	id := stack.TransportEndpointID{
  1159  		LocalPort:    addr.Port,
  1160  		LocalAddress: addr.Addr,
  1161  	}
  1162  	id, btd, err := e.registerWithStack(netProtos, id)
  1163  	if err != nil {
  1164  		return err
  1165  	}
  1166  
  1167  	e.ID = id
  1168  	e.boundBindToDevice = btd
  1169  	e.RegisterNICID = nicID
  1170  	e.effectiveNetProtos = netProtos
  1171  
  1172  	// Mark endpoint as bound.
  1173  	e.setEndpointState(StateBound)
  1174  
  1175  	e.rcvMu.Lock()
  1176  	e.rcvReady = true
  1177  	e.rcvMu.Unlock()
  1178  
  1179  	return nil
  1180  }
  1181  
  1182  // Bind binds the endpoint to a specific local address and port.
  1183  // Specifying a NIC is optional.
  1184  func (e *endpoint) Bind(addr tcpip.FullAddress) tcpip.Error {
  1185  	e.mu.Lock()
  1186  	defer e.mu.Unlock()
  1187  
  1188  	err := e.bindLocked(addr)
  1189  	if err != nil {
  1190  		return err
  1191  	}
  1192  
  1193  	// Save the effective NICID generated by bindLocked.
  1194  	e.BindNICID = e.RegisterNICID
  1195  
  1196  	return nil
  1197  }
  1198  
  1199  // GetLocalAddress returns the address to which the endpoint is bound.
  1200  func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) {
  1201  	e.mu.RLock()
  1202  	defer e.mu.RUnlock()
  1203  
  1204  	addr := e.ID.LocalAddress
  1205  	if e.EndpointState() == StateConnected {
  1206  		addr = e.route.LocalAddress()
  1207  	}
  1208  
  1209  	return tcpip.FullAddress{
  1210  		NIC:  e.RegisterNICID,
  1211  		Addr: addr,
  1212  		Port: e.ID.LocalPort,
  1213  	}, nil
  1214  }
  1215  
  1216  // GetRemoteAddress returns the address to which the endpoint is connected.
  1217  func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) {
  1218  	e.mu.RLock()
  1219  	defer e.mu.RUnlock()
  1220  
  1221  	if e.EndpointState() != StateConnected || e.dstPort == 0 {
  1222  		return tcpip.FullAddress{}, &tcpip.ErrNotConnected{}
  1223  	}
  1224  
  1225  	return tcpip.FullAddress{
  1226  		NIC:  e.RegisterNICID,
  1227  		Addr: e.ID.RemoteAddress,
  1228  		Port: e.ID.RemotePort,
  1229  	}, nil
  1230  }
  1231  
  1232  // Readiness returns the current readiness of the endpoint. For example, if
  1233  // waiter.EventIn is set, the endpoint is immediately readable.
  1234  func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
  1235  	// The endpoint is always writable.
  1236  	result := waiter.WritableEvents & mask
  1237  
  1238  	// Determine if the endpoint is readable if requested.
  1239  	if mask&waiter.ReadableEvents != 0 {
  1240  		e.rcvMu.Lock()
  1241  		if !e.rcvList.Empty() || e.rcvClosed {
  1242  			result |= waiter.ReadableEvents
  1243  		}
  1244  		e.rcvMu.Unlock()
  1245  	}
  1246  
  1247  	e.lastErrorMu.Lock()
  1248  	hasError := e.lastError != nil
  1249  	e.lastErrorMu.Unlock()
  1250  	if hasError {
  1251  		result |= waiter.EventErr
  1252  	}
  1253  	return result
  1254  }
  1255  
  1256  // verifyChecksum verifies the checksum unless RX checksum offload is enabled.
  1257  func verifyChecksum(hdr header.UDP, pkt *stack.PacketBuffer) bool {
  1258  	if pkt.RXTransportChecksumValidated {
  1259  		return true
  1260  	}
  1261  
  1262  	// On IPv4, UDP checksum is optional, and a zero value means the transmitter
  1263  	// omitted the checksum generation, as per RFC 768:
  1264  	//
  1265  	//   An all zero transmitted checksum value means that the transmitter
  1266  	//   generated  no checksum  (for debugging or for higher level protocols that
  1267  	//   don't care).
  1268  	//
  1269  	// On IPv6, UDP checksum is not optional, as per RFC 2460 Section 8.1:
  1270  	//
  1271  	//   Unlike IPv4, when UDP packets are originated by an IPv6 node, the UDP
  1272  	//   checksum is not optional.
  1273  	if pkt.NetworkProtocolNumber == header.IPv4ProtocolNumber && hdr.Checksum() == 0 {
  1274  		return true
  1275  	}
  1276  
  1277  	netHdr := pkt.Network()
  1278  	payloadChecksum := pkt.Data().AsRange().Checksum()
  1279  	return hdr.IsChecksumValid(netHdr.SourceAddress(), netHdr.DestinationAddress(), payloadChecksum)
  1280  }
  1281  
  1282  // HandlePacket is called by the stack when new packets arrive to this transport
  1283  // endpoint.
  1284  func (e *endpoint) HandlePacket(id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
  1285  	// Get the header then trim it from the view.
  1286  	hdr := header.UDP(pkt.TransportHeader().View())
  1287  	if int(hdr.Length()) > pkt.Data().Size()+header.UDPMinimumSize {
  1288  		// Malformed packet.
  1289  		e.stack.Stats().UDP.MalformedPacketsReceived.Increment()
  1290  		e.stats.ReceiveErrors.MalformedPacketsReceived.Increment()
  1291  		return
  1292  	}
  1293  
  1294  	if !verifyChecksum(hdr, pkt) {
  1295  		e.stack.Stats().UDP.ChecksumErrors.Increment()
  1296  		e.stats.ReceiveErrors.ChecksumErrors.Increment()
  1297  		return
  1298  	}
  1299  
  1300  	e.stack.Stats().UDP.PacketsReceived.Increment()
  1301  	e.stats.PacketsReceived.Increment()
  1302  
  1303  	e.rcvMu.Lock()
  1304  	// Drop the packet if our buffer is currently full.
  1305  	if !e.rcvReady || e.rcvClosed {
  1306  		e.rcvMu.Unlock()
  1307  		e.stack.Stats().UDP.ReceiveBufferErrors.Increment()
  1308  		e.stats.ReceiveErrors.ClosedReceiver.Increment()
  1309  		return
  1310  	}
  1311  
  1312  	rcvBufSize := e.ops.GetReceiveBufferSize()
  1313  	if e.frozen || e.rcvBufSize >= int(rcvBufSize) {
  1314  		e.rcvMu.Unlock()
  1315  		e.stack.Stats().UDP.ReceiveBufferErrors.Increment()
  1316  		e.stats.ReceiveErrors.ReceiveBufferOverflow.Increment()
  1317  		return
  1318  	}
  1319  
  1320  	wasEmpty := e.rcvBufSize == 0
  1321  
  1322  	// Push new packet into receive list and increment the buffer size.
  1323  	packet := &udpPacket{
  1324  		senderAddress: tcpip.FullAddress{
  1325  			NIC:  pkt.NICID,
  1326  			Addr: id.RemoteAddress,
  1327  			Port: hdr.SourcePort(),
  1328  		},
  1329  		destinationAddress: tcpip.FullAddress{
  1330  			NIC:  pkt.NICID,
  1331  			Addr: id.LocalAddress,
  1332  			Port: hdr.DestinationPort(),
  1333  		},
  1334  		data: pkt.Data().ExtractVV(),
  1335  	}
  1336  	e.rcvList.PushBack(packet)
  1337  	e.rcvBufSize += packet.data.Size()
  1338  
  1339  	// Save any useful information from the network header to the packet.
  1340  	switch pkt.NetworkProtocolNumber {
  1341  	case header.IPv4ProtocolNumber:
  1342  		packet.tos, _ = header.IPv4(pkt.NetworkHeader().View()).TOS()
  1343  	case header.IPv6ProtocolNumber:
  1344  		packet.tos, _ = header.IPv6(pkt.NetworkHeader().View()).TOS()
  1345  	}
  1346  
  1347  	// TODO(github.com/SagerNet/issue/3556): r.LocalAddress may be a multicast or broadcast
  1348  	// address. packetInfo.LocalAddr should hold a unicast address that can be
  1349  	// used to respond to the incoming packet.
  1350  	localAddr := pkt.Network().DestinationAddress()
  1351  	packet.packetInfo.LocalAddr = localAddr
  1352  	packet.packetInfo.DestinationAddr = localAddr
  1353  	packet.packetInfo.NIC = pkt.NICID
  1354  	packet.receivedAt = e.stack.Clock().Now()
  1355  
  1356  	e.rcvMu.Unlock()
  1357  
  1358  	// Notify any waiters that there's data to be read now.
  1359  	if wasEmpty {
  1360  		e.waiterQueue.Notify(waiter.ReadableEvents)
  1361  	}
  1362  }
  1363  
  1364  func (e *endpoint) onICMPError(err tcpip.Error, transErr stack.TransportError, pkt *stack.PacketBuffer) {
  1365  	// Update last error first.
  1366  	e.lastErrorMu.Lock()
  1367  	e.lastError = err
  1368  	e.lastErrorMu.Unlock()
  1369  
  1370  	// Update the error queue if IP_RECVERR is enabled.
  1371  	if e.SocketOptions().GetRecvError() {
  1372  		// Linux passes the payload without the UDP header.
  1373  		var payload []byte
  1374  		udp := header.UDP(pkt.Data().AsRange().ToOwnedView())
  1375  		if len(udp) >= header.UDPMinimumSize {
  1376  			payload = udp.Payload()
  1377  		}
  1378  
  1379  		e.SocketOptions().QueueErr(&tcpip.SockError{
  1380  			Err:     err,
  1381  			Cause:   transErr,
  1382  			Payload: payload,
  1383  			Dst: tcpip.FullAddress{
  1384  				NIC:  pkt.NICID,
  1385  				Addr: e.ID.RemoteAddress,
  1386  				Port: e.ID.RemotePort,
  1387  			},
  1388  			Offender: tcpip.FullAddress{
  1389  				NIC:  pkt.NICID,
  1390  				Addr: e.ID.LocalAddress,
  1391  				Port: e.ID.LocalPort,
  1392  			},
  1393  			NetProto: pkt.NetworkProtocolNumber,
  1394  		})
  1395  	}
  1396  
  1397  	// Notify of the error.
  1398  	e.waiterQueue.Notify(waiter.EventErr)
  1399  }
  1400  
  1401  // HandleError implements stack.TransportEndpoint.
  1402  func (e *endpoint) HandleError(transErr stack.TransportError, pkt *stack.PacketBuffer) {
  1403  	// TODO(github.com/SagerNet/issues/5270): Handle all transport errors.
  1404  	switch transErr.Kind() {
  1405  	case stack.DestinationPortUnreachableTransportError:
  1406  		if e.EndpointState() == StateConnected {
  1407  			e.onICMPError(&tcpip.ErrConnectionRefused{}, transErr, pkt)
  1408  		}
  1409  	}
  1410  }
  1411  
  1412  // State implements tcpip.Endpoint.
  1413  func (e *endpoint) State() uint32 {
  1414  	return uint32(e.EndpointState())
  1415  }
  1416  
  1417  // Info returns a copy of the endpoint info.
  1418  func (e *endpoint) Info() tcpip.EndpointInfo {
  1419  	e.mu.RLock()
  1420  	// Make a copy of the endpoint info.
  1421  	ret := e.TransportEndpointInfo
  1422  	e.mu.RUnlock()
  1423  	return &ret
  1424  }
  1425  
  1426  // Stats returns a pointer to the endpoint stats.
  1427  func (e *endpoint) Stats() tcpip.EndpointStats {
  1428  	return &e.stats
  1429  }
  1430  
  1431  // Wait implements tcpip.Endpoint.
  1432  func (*endpoint) Wait() {}
  1433  
  1434  func (e *endpoint) isBroadcastOrMulticast(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
  1435  	return addr == header.IPv4Broadcast || header.IsV4MulticastAddress(addr) || header.IsV6MulticastAddress(addr) || e.stack.IsSubnetBroadcast(nicID, netProto, addr)
  1436  }
  1437  
  1438  // SetOwner implements tcpip.Endpoint.
  1439  func (e *endpoint) SetOwner(owner tcpip.PacketOwner) {
  1440  	e.owner = owner
  1441  }
  1442  
  1443  // SocketOptions implements tcpip.Endpoint.
  1444  func (e *endpoint) SocketOptions() *tcpip.SocketOptions {
  1445  	return &e.ops
  1446  }
  1447  
  1448  // freeze prevents any more packets from being delivered to the endpoint.
  1449  func (e *endpoint) freeze() {
  1450  	e.mu.Lock()
  1451  	e.frozen = true
  1452  	e.mu.Unlock()
  1453  }
  1454  
  1455  // thaw unfreezes a previously frozen endpoint using endpoint.freeze() allows
  1456  // new packets to be delivered again.
  1457  func (e *endpoint) thaw() {
  1458  	e.mu.Lock()
  1459  	e.frozen = false
  1460  	e.mu.Unlock()
  1461  }