github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/transport/packet/endpoint.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package packet provides the implementation of packet sockets (see
    16  // packet(7)). Packet sockets allow applications to:
    17  //
    18  //   * manually write and inspect link, network, and transport headers
    19  //   * receive all traffic of a given network protocol, or all protocols
    20  //
    21  // Packet sockets are similar to raw sockets, but provide even more power to
    22  // users, letting them effectively talk directly to the network device.
    23  //
    24  // Packet sockets skip the input and output iptables chains.
    25  package packet
    26  
    27  import (
    28  	"fmt"
    29  	"io"
    30  	"time"
    31  
    32  	"github.com/SagerNet/gvisor/pkg/sync"
    33  	"github.com/SagerNet/gvisor/pkg/tcpip"
    34  	"github.com/SagerNet/gvisor/pkg/tcpip/buffer"
    35  	"github.com/SagerNet/gvisor/pkg/tcpip/header"
    36  	"github.com/SagerNet/gvisor/pkg/tcpip/stack"
    37  	"github.com/SagerNet/gvisor/pkg/waiter"
    38  )
    39  
    40  // +stateify savable
    41  type packet struct {
    42  	packetEntry
    43  	// data holds the actual packet data, including any headers and
    44  	// payload.
    45  	data       buffer.VectorisedView `state:".(buffer.VectorisedView)"`
    46  	receivedAt time.Time             `state:".(int64)"`
    47  	// senderAddr is the network address of the sender.
    48  	senderAddr tcpip.FullAddress
    49  	// packetInfo holds additional information like the protocol
    50  	// of the packet etc.
    51  	packetInfo tcpip.LinkPacketInfo
    52  }
    53  
    54  // endpoint is the packet socket implementation of tcpip.Endpoint. It is legal
    55  // to have goroutines make concurrent calls into the endpoint.
    56  //
    57  // Lock order:
    58  //   endpoint.mu
    59  //     endpoint.rcvMu
    60  //
    61  // +stateify savable
    62  type endpoint struct {
    63  	stack.TransportEndpointInfo
    64  	tcpip.DefaultSocketOptionsHandler
    65  
    66  	// The following fields are initialized at creation time and are
    67  	// immutable.
    68  	stack       *stack.Stack `state:"manual"`
    69  	netProto    tcpip.NetworkProtocolNumber
    70  	waiterQueue *waiter.Queue
    71  	cooked      bool
    72  
    73  	// The following fields are used to manage the receive queue and are
    74  	// protected by rcvMu.
    75  	rcvMu      sync.Mutex `state:"nosave"`
    76  	rcvList    packetList
    77  	rcvBufSize int
    78  	rcvClosed  bool
    79  
    80  	// The following fields are protected by mu.
    81  	mu       sync.RWMutex `state:"nosave"`
    82  	closed   bool
    83  	stats    tcpip.TransportEndpointStats `state:"nosave"`
    84  	bound    bool
    85  	boundNIC tcpip.NICID
    86  
    87  	// lastErrorMu protects lastError.
    88  	lastErrorMu sync.Mutex `state:"nosave"`
    89  	lastError   tcpip.Error
    90  
    91  	// ops is used to get socket level options.
    92  	ops tcpip.SocketOptions
    93  
    94  	// frozen indicates if the packets should be delivered to the endpoint
    95  	// during restore.
    96  	frozen bool
    97  }
    98  
    99  // NewEndpoint returns a new packet endpoint.
   100  func NewEndpoint(s *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) {
   101  	ep := &endpoint{
   102  		stack: s,
   103  		TransportEndpointInfo: stack.TransportEndpointInfo{
   104  			NetProto: netProto,
   105  		},
   106  		cooked:      cooked,
   107  		netProto:    netProto,
   108  		waiterQueue: waiterQueue,
   109  	}
   110  	ep.ops.InitHandler(ep, ep.stack, tcpip.GetStackSendBufferLimits, tcpip.GetStackReceiveBufferLimits)
   111  	ep.ops.SetReceiveBufferSize(32*1024, false /* notify */)
   112  
   113  	// Override with stack defaults.
   114  	var ss tcpip.SendBufferSizeOption
   115  	if err := s.Option(&ss); err == nil {
   116  		ep.ops.SetSendBufferSize(int64(ss.Default), false /* notify */)
   117  	}
   118  
   119  	var rs tcpip.ReceiveBufferSizeOption
   120  	if err := s.Option(&rs); err == nil {
   121  		ep.ops.SetReceiveBufferSize(int64(rs.Default), false /* notify */)
   122  	}
   123  
   124  	if err := s.RegisterPacketEndpoint(0, netProto, ep); err != nil {
   125  		return nil, err
   126  	}
   127  	return ep, nil
   128  }
   129  
   130  // Abort implements stack.TransportEndpoint.Abort.
   131  func (ep *endpoint) Abort() {
   132  	ep.Close()
   133  }
   134  
   135  // Close implements tcpip.Endpoint.Close.
   136  func (ep *endpoint) Close() {
   137  	ep.mu.Lock()
   138  	defer ep.mu.Unlock()
   139  
   140  	if ep.closed {
   141  		return
   142  	}
   143  
   144  	ep.stack.UnregisterPacketEndpoint(0, ep.netProto, ep)
   145  
   146  	ep.rcvMu.Lock()
   147  	defer ep.rcvMu.Unlock()
   148  
   149  	// Clear the receive list.
   150  	ep.rcvClosed = true
   151  	ep.rcvBufSize = 0
   152  	for !ep.rcvList.Empty() {
   153  		ep.rcvList.Remove(ep.rcvList.Front())
   154  	}
   155  
   156  	ep.closed = true
   157  	ep.bound = false
   158  	ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents)
   159  }
   160  
   161  // ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf.
   162  func (*endpoint) ModerateRecvBuf(int) {}
   163  
   164  // Read implements tcpip.Endpoint.Read.
   165  func (ep *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, tcpip.Error) {
   166  	ep.rcvMu.Lock()
   167  
   168  	// If there's no data to read, return that read would block or that the
   169  	// endpoint is closed.
   170  	if ep.rcvList.Empty() {
   171  		var err tcpip.Error = &tcpip.ErrWouldBlock{}
   172  		if ep.rcvClosed {
   173  			ep.stats.ReadErrors.ReadClosed.Increment()
   174  			err = &tcpip.ErrClosedForReceive{}
   175  		}
   176  		ep.rcvMu.Unlock()
   177  		return tcpip.ReadResult{}, err
   178  	}
   179  
   180  	packet := ep.rcvList.Front()
   181  	if !opts.Peek {
   182  		ep.rcvList.Remove(packet)
   183  		ep.rcvBufSize -= packet.data.Size()
   184  	}
   185  
   186  	ep.rcvMu.Unlock()
   187  
   188  	res := tcpip.ReadResult{
   189  		Total: packet.data.Size(),
   190  		ControlMessages: tcpip.ControlMessages{
   191  			HasTimestamp: true,
   192  			Timestamp:    packet.receivedAt.UnixNano(),
   193  		},
   194  	}
   195  	if opts.NeedRemoteAddr {
   196  		res.RemoteAddr = packet.senderAddr
   197  	}
   198  	if opts.NeedLinkPacketInfo {
   199  		res.LinkPacketInfo = packet.packetInfo
   200  	}
   201  
   202  	n, err := packet.data.ReadTo(dst, opts.Peek)
   203  	if n == 0 && err != nil {
   204  		return res, &tcpip.ErrBadBuffer{}
   205  	}
   206  	res.Count = n
   207  	return res, nil
   208  }
   209  
   210  func (*endpoint) Write(tcpip.Payloader, tcpip.WriteOptions) (int64, tcpip.Error) {
   211  	return 0, &tcpip.ErrInvalidOptionValue{}
   212  }
   213  
   214  // Disconnect implements tcpip.Endpoint.Disconnect. Packet sockets cannot be
   215  // disconnected, and this function always returns tpcip.ErrNotSupported.
   216  func (*endpoint) Disconnect() tcpip.Error {
   217  	return &tcpip.ErrNotSupported{}
   218  }
   219  
   220  // Connect implements tcpip.Endpoint.Connect. Packet sockets cannot be
   221  // connected, and this function always returnes *tcpip.ErrNotSupported.
   222  func (*endpoint) Connect(tcpip.FullAddress) tcpip.Error {
   223  	return &tcpip.ErrNotSupported{}
   224  }
   225  
   226  // Shutdown implements tcpip.Endpoint.Shutdown. Packet sockets cannot be used
   227  // with Shutdown, and this function always returns *tcpip.ErrNotSupported.
   228  func (*endpoint) Shutdown(tcpip.ShutdownFlags) tcpip.Error {
   229  	return &tcpip.ErrNotSupported{}
   230  }
   231  
   232  // Listen implements tcpip.Endpoint.Listen. Packet sockets cannot be used with
   233  // Listen, and this function always returns *tcpip.ErrNotSupported.
   234  func (*endpoint) Listen(int) tcpip.Error {
   235  	return &tcpip.ErrNotSupported{}
   236  }
   237  
   238  // Accept implements tcpip.Endpoint.Accept. Packet sockets cannot be used with
   239  // Accept, and this function always returns *tcpip.ErrNotSupported.
   240  func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, tcpip.Error) {
   241  	return nil, nil, &tcpip.ErrNotSupported{}
   242  }
   243  
   244  // Bind implements tcpip.Endpoint.Bind.
   245  func (ep *endpoint) Bind(addr tcpip.FullAddress) tcpip.Error {
   246  	// "By default, all packets of the specified protocol type are passed
   247  	// to a packet socket.  To get packets only from a specific interface
   248  	// use bind(2) specifying an address in a struct sockaddr_ll to bind
   249  	// the packet socket  to  an interface.  Fields used for binding are
   250  	// sll_family (should be AF_PACKET), sll_protocol, and sll_ifindex."
   251  	// - packet(7).
   252  
   253  	ep.mu.Lock()
   254  	defer ep.mu.Unlock()
   255  
   256  	if ep.bound && ep.boundNIC == addr.NIC {
   257  		// If the NIC being bound is the same then just return success.
   258  		return nil
   259  	}
   260  
   261  	// Unregister endpoint with all the nics.
   262  	ep.stack.UnregisterPacketEndpoint(0, ep.netProto, ep)
   263  	ep.bound = false
   264  
   265  	// Bind endpoint to receive packets from specific interface.
   266  	if err := ep.stack.RegisterPacketEndpoint(addr.NIC, ep.netProto, ep); err != nil {
   267  		return err
   268  	}
   269  
   270  	ep.bound = true
   271  	ep.boundNIC = addr.NIC
   272  
   273  	return nil
   274  }
   275  
   276  // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress.
   277  func (*endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) {
   278  	return tcpip.FullAddress{}, &tcpip.ErrNotSupported{}
   279  }
   280  
   281  // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress.
   282  func (*endpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) {
   283  	// Even a connected socket doesn't return a remote address.
   284  	return tcpip.FullAddress{}, &tcpip.ErrNotConnected{}
   285  }
   286  
   287  // Readiness implements tcpip.Endpoint.Readiness.
   288  func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
   289  	// The endpoint is always writable.
   290  	result := waiter.WritableEvents & mask
   291  
   292  	// Determine whether the endpoint is readable.
   293  	if (mask & waiter.ReadableEvents) != 0 {
   294  		ep.rcvMu.Lock()
   295  		if !ep.rcvList.Empty() || ep.rcvClosed {
   296  			result |= waiter.ReadableEvents
   297  		}
   298  		ep.rcvMu.Unlock()
   299  	}
   300  
   301  	return result
   302  }
   303  
   304  // SetSockOpt implements tcpip.Endpoint.SetSockOpt. Packet sockets cannot be
   305  // used with SetSockOpt, and this function always returns
   306  // *tcpip.ErrNotSupported.
   307  func (ep *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error {
   308  	switch opt.(type) {
   309  	case *tcpip.SocketDetachFilterOption:
   310  		return nil
   311  
   312  	default:
   313  		return &tcpip.ErrUnknownProtocolOption{}
   314  	}
   315  }
   316  
   317  // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt.
   318  func (*endpoint) SetSockOptInt(tcpip.SockOptInt, int) tcpip.Error {
   319  	return &tcpip.ErrUnknownProtocolOption{}
   320  }
   321  
   322  func (ep *endpoint) LastError() tcpip.Error {
   323  	ep.lastErrorMu.Lock()
   324  	defer ep.lastErrorMu.Unlock()
   325  
   326  	err := ep.lastError
   327  	ep.lastError = nil
   328  	return err
   329  }
   330  
   331  // UpdateLastError implements tcpip.SocketOptionsHandler.UpdateLastError.
   332  func (ep *endpoint) UpdateLastError(err tcpip.Error) {
   333  	ep.lastErrorMu.Lock()
   334  	ep.lastError = err
   335  	ep.lastErrorMu.Unlock()
   336  }
   337  
   338  // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
   339  func (*endpoint) GetSockOpt(tcpip.GettableSocketOption) tcpip.Error {
   340  	return &tcpip.ErrNotSupported{}
   341  }
   342  
   343  // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt.
   344  func (ep *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) {
   345  	switch opt {
   346  	case tcpip.ReceiveQueueSizeOption:
   347  		v := 0
   348  		ep.rcvMu.Lock()
   349  		if !ep.rcvList.Empty() {
   350  			p := ep.rcvList.Front()
   351  			v = p.data.Size()
   352  		}
   353  		ep.rcvMu.Unlock()
   354  		return v, nil
   355  
   356  	default:
   357  		return -1, &tcpip.ErrUnknownProtocolOption{}
   358  	}
   359  }
   360  
   361  // HandlePacket implements stack.PacketEndpoint.HandlePacket.
   362  func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
   363  	ep.rcvMu.Lock()
   364  
   365  	// Drop the packet if our buffer is currently full.
   366  	if ep.rcvClosed {
   367  		ep.rcvMu.Unlock()
   368  		ep.stack.Stats().DroppedPackets.Increment()
   369  		ep.stats.ReceiveErrors.ClosedReceiver.Increment()
   370  		return
   371  	}
   372  
   373  	rcvBufSize := ep.ops.GetReceiveBufferSize()
   374  	if ep.frozen || ep.rcvBufSize >= int(rcvBufSize) {
   375  		ep.rcvMu.Unlock()
   376  		ep.stack.Stats().DroppedPackets.Increment()
   377  		ep.stats.ReceiveErrors.ReceiveBufferOverflow.Increment()
   378  		return
   379  	}
   380  
   381  	wasEmpty := ep.rcvBufSize == 0
   382  
   383  	// Push new packet into receive list and increment the buffer size.
   384  	var packet packet
   385  	if !pkt.LinkHeader().View().IsEmpty() {
   386  		// Get info directly from the ethernet header.
   387  		hdr := header.Ethernet(pkt.LinkHeader().View())
   388  		packet.senderAddr = tcpip.FullAddress{
   389  			NIC:  nicID,
   390  			Addr: tcpip.Address(hdr.SourceAddress()),
   391  		}
   392  		packet.packetInfo.Protocol = netProto
   393  		packet.packetInfo.PktType = pkt.PktType
   394  	} else {
   395  		// Guess the would-be ethernet header.
   396  		packet.senderAddr = tcpip.FullAddress{
   397  			NIC:  nicID,
   398  			Addr: tcpip.Address(localAddr),
   399  		}
   400  		packet.packetInfo.Protocol = netProto
   401  		packet.packetInfo.PktType = pkt.PktType
   402  	}
   403  
   404  	if ep.cooked {
   405  		// Cooked packets can simply be queued.
   406  		switch pkt.PktType {
   407  		case tcpip.PacketHost:
   408  			packet.data = pkt.Data().ExtractVV()
   409  		case tcpip.PacketOutgoing:
   410  			// Strip Link Header.
   411  			var combinedVV buffer.VectorisedView
   412  			if v := pkt.NetworkHeader().View(); !v.IsEmpty() {
   413  				combinedVV.AppendView(v)
   414  			}
   415  			if v := pkt.TransportHeader().View(); !v.IsEmpty() {
   416  				combinedVV.AppendView(v)
   417  			}
   418  			combinedVV.Append(pkt.Data().ExtractVV())
   419  			packet.data = combinedVV
   420  		default:
   421  			panic(fmt.Sprintf("unexpected PktType in pkt: %+v", pkt))
   422  		}
   423  	} else {
   424  		// Raw packets need their ethernet headers prepended before
   425  		// queueing.
   426  		var linkHeader buffer.View
   427  		if pkt.PktType != tcpip.PacketOutgoing {
   428  			if pkt.LinkHeader().View().IsEmpty() {
   429  				// We weren't provided with an actual ethernet header,
   430  				// so fake one.
   431  				ethFields := header.EthernetFields{
   432  					SrcAddr: tcpip.LinkAddress([]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00}),
   433  					DstAddr: localAddr,
   434  					Type:    netProto,
   435  				}
   436  				fakeHeader := make(header.Ethernet, header.EthernetMinimumSize)
   437  				fakeHeader.Encode(&ethFields)
   438  				linkHeader = buffer.View(fakeHeader)
   439  			} else {
   440  				linkHeader = append(buffer.View(nil), pkt.LinkHeader().View()...)
   441  			}
   442  			combinedVV := linkHeader.ToVectorisedView()
   443  			combinedVV.Append(pkt.Data().ExtractVV())
   444  			packet.data = combinedVV
   445  		} else {
   446  			packet.data = buffer.NewVectorisedView(pkt.Size(), pkt.Views())
   447  		}
   448  	}
   449  	packet.receivedAt = ep.stack.Clock().Now()
   450  
   451  	ep.rcvList.PushBack(&packet)
   452  	ep.rcvBufSize += packet.data.Size()
   453  
   454  	ep.rcvMu.Unlock()
   455  	ep.stats.PacketsReceived.Increment()
   456  	// Notify waiters that there's data to be read.
   457  	if wasEmpty {
   458  		ep.waiterQueue.Notify(waiter.ReadableEvents)
   459  	}
   460  }
   461  
   462  // State implements socket.Socket.State.
   463  func (*endpoint) State() uint32 {
   464  	return 0
   465  }
   466  
   467  // Info returns a copy of the endpoint info.
   468  func (ep *endpoint) Info() tcpip.EndpointInfo {
   469  	ep.mu.RLock()
   470  	// Make a copy of the endpoint info.
   471  	ret := ep.TransportEndpointInfo
   472  	ep.mu.RUnlock()
   473  	return &ret
   474  }
   475  
   476  // Stats returns a pointer to the endpoint stats.
   477  func (ep *endpoint) Stats() tcpip.EndpointStats {
   478  	return &ep.stats
   479  }
   480  
   481  // SetOwner implements tcpip.Endpoint.SetOwner.
   482  func (*endpoint) SetOwner(tcpip.PacketOwner) {}
   483  
   484  // SocketOptions implements tcpip.Endpoint.SocketOptions.
   485  func (ep *endpoint) SocketOptions() *tcpip.SocketOptions {
   486  	return &ep.ops
   487  }
   488  
   489  // freeze prevents any more packets from being delivered to the endpoint.
   490  func (ep *endpoint) freeze() {
   491  	ep.mu.Lock()
   492  	ep.frozen = true
   493  	ep.mu.Unlock()
   494  }
   495  
   496  // thaw unfreezes a previously frozen endpoint using endpoint.freeze() allows
   497  // new packets to be delivered again.
   498  func (ep *endpoint) thaw() {
   499  	ep.mu.Lock()
   500  	ep.frozen = false
   501  	ep.mu.Unlock()
   502  }