github.com/vpnishe/netstack@v1.10.6/tcpip/transport/packet/endpoint.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package packet provides the implementation of packet sockets (see
    16  // packet(7)). Packet sockets allow applications to:
    17  //
    18  //   * manually write and inspect link, network, and transport headers
    19  //   * receive all traffic of a given network protocol, or all protocols
    20  //
    21  // Packet sockets are similar to raw sockets, but provide even more power to
    22  // users, letting them effectively talk directly to the network device.
    23  //
    24  // Packet sockets skip the input and output iptables chains.
    25  package packet
    26  
    27  import (
    28  	"sync"
    29  
    30  	"github.com/vpnishe/netstack/tcpip"
    31  	"github.com/vpnishe/netstack/tcpip/buffer"
    32  	"github.com/vpnishe/netstack/tcpip/header"
    33  	"github.com/vpnishe/netstack/tcpip/iptables"
    34  	"github.com/vpnishe/netstack/tcpip/stack"
    35  	"github.com/vpnishe/netstack/waiter"
    36  )
    37  
    38  // +stateify savable
    39  type packet struct {
    40  	packetEntry
    41  	// data holds the actual packet data, including any headers and
    42  	// payload.
    43  	data buffer.VectorisedView
    44  	// timestampNS is the unix time at which the packet was received.
    45  	timestampNS int64
    46  	// senderAddr is the network address of the sender.
    47  	senderAddr tcpip.FullAddress
    48  }
    49  
    50  // endpoint is the packet socket implementation of tcpip.Endpoint. It is legal
    51  // to have goroutines make concurrent calls into the endpoint.
    52  //
    53  // Lock order:
    54  //   endpoint.mu
    55  //     endpoint.rcvMu
    56  //
    57  // +stateify savable
    58  type endpoint struct {
    59  	stack.TransportEndpointInfo
    60  	// The following fields are initialized at creation time and are
    61  	// immutable.
    62  	stack       *stack.Stack
    63  	netProto    tcpip.NetworkProtocolNumber
    64  	waiterQueue *waiter.Queue
    65  	cooked      bool
    66  
    67  	// The following fields are used to manage the receive queue and are
    68  	// protected by rcvMu.
    69  	rcvMu         sync.Mutex
    70  	rcvList       packetList
    71  	rcvBufSizeMax int
    72  	rcvBufSize    int
    73  	rcvClosed     bool
    74  
    75  	// The following fields are protected by mu.
    76  	mu         sync.RWMutex
    77  	sndBufSize int
    78  	closed     bool
    79  	stats      tcpip.TransportEndpointStats
    80  }
    81  
    82  // NewEndpoint returns a new packet endpoint.
    83  func NewEndpoint(s *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
    84  	ep := &endpoint{
    85  		stack: s,
    86  		TransportEndpointInfo: stack.TransportEndpointInfo{
    87  			NetProto: netProto,
    88  		},
    89  		cooked:        cooked,
    90  		netProto:      netProto,
    91  		waiterQueue:   waiterQueue,
    92  		rcvBufSizeMax: 32 * 1024,
    93  		sndBufSize:    32 * 1024,
    94  	}
    95  
    96  	if err := s.RegisterPacketEndpoint(0, netProto, ep); err != nil {
    97  		return nil, err
    98  	}
    99  	return ep, nil
   100  }
   101  
   102  // Close implements tcpip.Endpoint.Close.
   103  func (ep *endpoint) Close() {
   104  	ep.mu.Lock()
   105  	defer ep.mu.Unlock()
   106  
   107  	if ep.closed {
   108  		return
   109  	}
   110  
   111  	ep.stack.UnregisterPacketEndpoint(0, ep.netProto, ep)
   112  
   113  	ep.rcvMu.Lock()
   114  	defer ep.rcvMu.Unlock()
   115  
   116  	// Clear the receive list.
   117  	ep.rcvClosed = true
   118  	ep.rcvBufSize = 0
   119  	for !ep.rcvList.Empty() {
   120  		ep.rcvList.Remove(ep.rcvList.Front())
   121  	}
   122  
   123  	ep.closed = true
   124  	ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut)
   125  }
   126  
   127  // ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf.
   128  func (ep *endpoint) ModerateRecvBuf(copied int) {}
   129  
   130  // IPTables implements tcpip.Endpoint.IPTables.
   131  func (ep *endpoint) IPTables() (iptables.IPTables, error) {
   132  	return ep.stack.IPTables(), nil
   133  }
   134  
   135  // Read implements tcpip.Endpoint.Read.
   136  func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
   137  	ep.rcvMu.Lock()
   138  
   139  	// If there's no data to read, return that read would block or that the
   140  	// endpoint is closed.
   141  	if ep.rcvList.Empty() {
   142  		err := tcpip.ErrWouldBlock
   143  		if ep.rcvClosed {
   144  			ep.stats.ReadErrors.ReadClosed.Increment()
   145  			err = tcpip.ErrClosedForReceive
   146  		}
   147  		ep.rcvMu.Unlock()
   148  		return buffer.View{}, tcpip.ControlMessages{}, err
   149  	}
   150  
   151  	packet := ep.rcvList.Front()
   152  	ep.rcvList.Remove(packet)
   153  	ep.rcvBufSize -= packet.data.Size()
   154  
   155  	ep.rcvMu.Unlock()
   156  
   157  	if addr != nil {
   158  		*addr = packet.senderAddr
   159  	}
   160  
   161  	return packet.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: packet.timestampNS}, nil
   162  }
   163  
   164  func (ep *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) {
   165  	// TODO(b/129292371): Implement.
   166  	return 0, nil, tcpip.ErrInvalidOptionValue
   167  }
   168  
   169  // Peek implements tcpip.Endpoint.Peek.
   170  func (ep *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
   171  	return 0, tcpip.ControlMessages{}, nil
   172  }
   173  
   174  // Disconnect implements tcpip.Endpoint.Disconnect. Packet sockets cannot be
   175  // disconnected, and this function always returns tpcip.ErrNotSupported.
   176  func (*endpoint) Disconnect() *tcpip.Error {
   177  	return tcpip.ErrNotSupported
   178  }
   179  
   180  // Connect implements tcpip.Endpoint.Connect. Packet sockets cannot be
   181  // connected, and this function always returnes tcpip.ErrNotSupported.
   182  func (ep *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
   183  	return tcpip.ErrNotSupported
   184  }
   185  
   186  // Shutdown implements tcpip.Endpoint.Shutdown. Packet sockets cannot be used
   187  // with Shutdown, and this function always returns tcpip.ErrNotSupported.
   188  func (ep *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error {
   189  	return tcpip.ErrNotSupported
   190  }
   191  
   192  // Listen implements tcpip.Endpoint.Listen. Packet sockets cannot be used with
   193  // Listen, and this function always returns tcpip.ErrNotSupported.
   194  func (ep *endpoint) Listen(backlog int) *tcpip.Error {
   195  	return tcpip.ErrNotSupported
   196  }
   197  
   198  // Accept implements tcpip.Endpoint.Accept. Packet sockets cannot be used with
   199  // Accept, and this function always returns tcpip.ErrNotSupported.
   200  func (ep *endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) {
   201  	return nil, nil, tcpip.ErrNotSupported
   202  }
   203  
   204  // Bind implements tcpip.Endpoint.Bind.
   205  func (ep *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error {
   206  	// TODO(gvisor.dev/issue/173): Add Bind support.
   207  
   208  	// "By default, all packets of the specified protocol type are passed
   209  	// to a packet socket.  To get packets only from a specific interface
   210  	// use bind(2) specifying an address in a struct sockaddr_ll to bind
   211  	// the packet socket  to  an interface.  Fields used for binding are
   212  	// sll_family (should be AF_PACKET), sll_protocol, and sll_ifindex."
   213  	// - packet(7).
   214  
   215  	return tcpip.ErrNotSupported
   216  }
   217  
   218  // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress.
   219  func (ep *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
   220  	return tcpip.FullAddress{}, tcpip.ErrNotSupported
   221  }
   222  
   223  // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress.
   224  func (ep *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) {
   225  	// Even a connected socket doesn't return a remote address.
   226  	return tcpip.FullAddress{}, tcpip.ErrNotConnected
   227  }
   228  
   229  // Readiness implements tcpip.Endpoint.Readiness.
   230  func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
   231  	// The endpoint is always writable.
   232  	result := waiter.EventOut & mask
   233  
   234  	// Determine whether the endpoint is readable.
   235  	if (mask & waiter.EventIn) != 0 {
   236  		ep.rcvMu.Lock()
   237  		if !ep.rcvList.Empty() || ep.rcvClosed {
   238  			result |= waiter.EventIn
   239  		}
   240  		ep.rcvMu.Unlock()
   241  	}
   242  
   243  	return result
   244  }
   245  
   246  // SetSockOpt implements tcpip.Endpoint.SetSockOpt. Packet sockets cannot be
   247  // used with SetSockOpt, and this function always returns
   248  // tcpip.ErrNotSupported.
   249  func (ep *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
   250  	return tcpip.ErrNotSupported
   251  }
   252  
   253  // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt.
   254  func (ep *endpoint) SetSockOptInt(opt tcpip.SockOpt, v int) *tcpip.Error {
   255  	return tcpip.ErrUnknownProtocolOption
   256  }
   257  
   258  // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt.
   259  func (ep *endpoint) GetSockOptInt(opt tcpip.SockOpt) (int, *tcpip.Error) {
   260  	return 0, tcpip.ErrNotSupported
   261  }
   262  
   263  // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
   264  func (ep *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
   265  	return tcpip.ErrNotSupported
   266  }
   267  
   268  // HandlePacket implements stack.PacketEndpoint.HandlePacket.
   269  func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) {
   270  	ep.rcvMu.Lock()
   271  
   272  	// Drop the packet if our buffer is currently full.
   273  	if ep.rcvClosed {
   274  		ep.rcvMu.Unlock()
   275  		ep.stack.Stats().DroppedPackets.Increment()
   276  		ep.stats.ReceiveErrors.ClosedReceiver.Increment()
   277  		return
   278  	}
   279  
   280  	if ep.rcvBufSize >= ep.rcvBufSizeMax {
   281  		ep.rcvMu.Unlock()
   282  		ep.stack.Stats().DroppedPackets.Increment()
   283  		ep.stats.ReceiveErrors.ReceiveBufferOverflow.Increment()
   284  		return
   285  	}
   286  
   287  	wasEmpty := ep.rcvBufSize == 0
   288  
   289  	// Push new packet into receive list and increment the buffer size.
   290  	var packet packet
   291  	// TODO(b/129292371): Return network protocol.
   292  	if len(pkt.LinkHeader) > 0 {
   293  		// Get info directly from the ethernet header.
   294  		hdr := header.Ethernet(pkt.LinkHeader)
   295  		packet.senderAddr = tcpip.FullAddress{
   296  			NIC:  nicID,
   297  			Addr: tcpip.Address(hdr.SourceAddress()),
   298  		}
   299  	} else {
   300  		// Guess the would-be ethernet header.
   301  		packet.senderAddr = tcpip.FullAddress{
   302  			NIC:  nicID,
   303  			Addr: tcpip.Address(localAddr),
   304  		}
   305  	}
   306  
   307  	if ep.cooked {
   308  		// Cooked packets can simply be queued.
   309  		packet.data = pkt.Data
   310  	} else {
   311  		// Raw packets need their ethernet headers prepended before
   312  		// queueing.
   313  		var linkHeader buffer.View
   314  		if len(pkt.LinkHeader) == 0 {
   315  			// We weren't provided with an actual ethernet header,
   316  			// so fake one.
   317  			ethFields := header.EthernetFields{
   318  				SrcAddr: tcpip.LinkAddress([]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00}),
   319  				DstAddr: localAddr,
   320  				Type:    netProto,
   321  			}
   322  			fakeHeader := make(header.Ethernet, header.EthernetMinimumSize)
   323  			fakeHeader.Encode(&ethFields)
   324  			linkHeader = buffer.View(fakeHeader)
   325  		} else {
   326  			linkHeader = append(buffer.View(nil), pkt.LinkHeader...)
   327  		}
   328  		combinedVV := linkHeader.ToVectorisedView()
   329  		combinedVV.Append(pkt.Data)
   330  		packet.data = combinedVV
   331  	}
   332  	packet.timestampNS = ep.stack.NowNanoseconds()
   333  
   334  	ep.rcvList.PushBack(&packet)
   335  	ep.rcvBufSize += packet.data.Size()
   336  
   337  	ep.rcvMu.Unlock()
   338  	ep.stats.PacketsReceived.Increment()
   339  	// Notify waiters that there's data to be read.
   340  	if wasEmpty {
   341  		ep.waiterQueue.Notify(waiter.EventIn)
   342  	}
   343  }
   344  
   345  // State implements socket.Socket.State.
   346  func (ep *endpoint) State() uint32 {
   347  	return 0
   348  }
   349  
   350  // Info returns a copy of the endpoint info.
   351  func (ep *endpoint) Info() tcpip.EndpointInfo {
   352  	ep.mu.RLock()
   353  	// Make a copy of the endpoint info.
   354  	ret := ep.TransportEndpointInfo
   355  	ep.mu.RUnlock()
   356  	return &ret
   357  }
   358  
   359  // Stats returns a pointer to the endpoint stats.
   360  func (ep *endpoint) Stats() tcpip.EndpointStats {
   361  	return &ep.stats
   362  }