inet.af/netstack@v0.0.0-20220214151720-7585b01ddccf/tcpip/transport/packet/endpoint.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package packet provides the implementation of packet sockets (see
    16  // packet(7)). Packet sockets allow applications to:
    17  //
    18  //   * manually write and inspect link, network, and transport headers
    19  //   * receive all traffic of a given network protocol, or all protocols
    20  //
    21  // Packet sockets are similar to raw sockets, but provide even more power to
    22  // users, letting them effectively talk directly to the network device.
    23  //
    24  // Packet sockets skip the input and output iptables chains.
    25  package packet
    26  
    27  import (
    28  	"io"
    29  	"time"
    30  
    31  	"inet.af/netstack/sync"
    32  	"inet.af/netstack/tcpip"
    33  	"inet.af/netstack/tcpip/buffer"
    34  	"inet.af/netstack/tcpip/header"
    35  	"inet.af/netstack/tcpip/stack"
    36  	"inet.af/netstack/waiter"
    37  )
    38  
    39  // +stateify savable
    40  type packet struct {
    41  	packetEntry
    42  	// data holds the actual packet data, including any headers and
    43  	// payload.
    44  	data       buffer.VectorisedView `state:".(buffer.VectorisedView)"`
    45  	receivedAt time.Time             `state:".(int64)"`
    46  	// senderAddr is the network address of the sender.
    47  	senderAddr tcpip.FullAddress
    48  	// packetInfo holds additional information like the protocol
    49  	// of the packet etc.
    50  	packetInfo tcpip.LinkPacketInfo
    51  }
    52  
    53  // endpoint is the packet socket implementation of tcpip.Endpoint. It is legal
    54  // to have goroutines make concurrent calls into the endpoint.
    55  //
    56  // Lock order:
    57  //   endpoint.mu
    58  //     endpoint.rcvMu
    59  //
    60  // +stateify savable
    61  type endpoint struct {
    62  	tcpip.DefaultSocketOptionsHandler
    63  
    64  	// The following fields are initialized at creation time and are
    65  	// immutable.
    66  	stack       *stack.Stack `state:"manual"`
    67  	waiterQueue *waiter.Queue
    68  	cooked      bool
    69  	ops         tcpip.SocketOptions
    70  	stats       tcpip.TransportEndpointStats
    71  
    72  	// The following fields are used to manage the receive queue.
    73  	rcvMu sync.Mutex `state:"nosave"`
    74  	// +checklocks:rcvMu
    75  	rcvList packetList
    76  	// +checklocks:rcvMu
    77  	rcvBufSize int
    78  	// +checklocks:rcvMu
    79  	rcvClosed bool
    80  	// +checklocks:rcvMu
    81  	rcvDisabled bool
    82  
    83  	mu sync.RWMutex `state:"nosave"`
    84  	// +checklocks:mu
    85  	closed bool
    86  	// +checklocks:mu
    87  	boundNetProto tcpip.NetworkProtocolNumber
    88  	// +checklocks:mu
    89  	boundNIC tcpip.NICID
    90  
    91  	lastErrorMu sync.Mutex `state:"nosave"`
    92  	// +checklocks:lastErrorMu
    93  	lastError tcpip.Error
    94  }
    95  
    96  // NewEndpoint returns a new packet endpoint.
    97  func NewEndpoint(s *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) {
    98  	ep := &endpoint{
    99  		stack:         s,
   100  		cooked:        cooked,
   101  		boundNetProto: netProto,
   102  		waiterQueue:   waiterQueue,
   103  	}
   104  	ep.ops.InitHandler(ep, ep.stack, tcpip.GetStackSendBufferLimits, tcpip.GetStackReceiveBufferLimits)
   105  	ep.ops.SetReceiveBufferSize(32*1024, false /* notify */)
   106  
   107  	// Override with stack defaults.
   108  	var ss tcpip.SendBufferSizeOption
   109  	if err := s.Option(&ss); err == nil {
   110  		ep.ops.SetSendBufferSize(int64(ss.Default), false /* notify */)
   111  	}
   112  
   113  	var rs tcpip.ReceiveBufferSizeOption
   114  	if err := s.Option(&rs); err == nil {
   115  		ep.ops.SetReceiveBufferSize(int64(rs.Default), false /* notify */)
   116  	}
   117  
   118  	if err := s.RegisterPacketEndpoint(0, netProto, ep); err != nil {
   119  		return nil, err
   120  	}
   121  	return ep, nil
   122  }
   123  
   124  // Abort implements stack.TransportEndpoint.Abort.
   125  func (ep *endpoint) Abort() {
   126  	ep.Close()
   127  }
   128  
   129  // Close implements tcpip.Endpoint.Close.
   130  func (ep *endpoint) Close() {
   131  	ep.mu.Lock()
   132  	defer ep.mu.Unlock()
   133  
   134  	if ep.closed {
   135  		return
   136  	}
   137  
   138  	ep.stack.UnregisterPacketEndpoint(ep.boundNIC, ep.boundNetProto, ep)
   139  
   140  	ep.rcvMu.Lock()
   141  	defer ep.rcvMu.Unlock()
   142  
   143  	// Clear the receive list.
   144  	ep.rcvClosed = true
   145  	ep.rcvBufSize = 0
   146  	for !ep.rcvList.Empty() {
   147  		ep.rcvList.Remove(ep.rcvList.Front())
   148  	}
   149  
   150  	ep.closed = true
   151  	ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents)
   152  }
   153  
   154  // ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf.
   155  func (*endpoint) ModerateRecvBuf(int) {}
   156  
   157  // Read implements tcpip.Endpoint.Read.
   158  func (ep *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, tcpip.Error) {
   159  	ep.rcvMu.Lock()
   160  
   161  	// If there's no data to read, return that read would block or that the
   162  	// endpoint is closed.
   163  	if ep.rcvList.Empty() {
   164  		var err tcpip.Error = &tcpip.ErrWouldBlock{}
   165  		if ep.rcvClosed {
   166  			ep.stats.ReadErrors.ReadClosed.Increment()
   167  			err = &tcpip.ErrClosedForReceive{}
   168  		}
   169  		ep.rcvMu.Unlock()
   170  		return tcpip.ReadResult{}, err
   171  	}
   172  
   173  	packet := ep.rcvList.Front()
   174  	if !opts.Peek {
   175  		ep.rcvList.Remove(packet)
   176  		ep.rcvBufSize -= packet.data.Size()
   177  	}
   178  
   179  	ep.rcvMu.Unlock()
   180  
   181  	res := tcpip.ReadResult{
   182  		Total: packet.data.Size(),
   183  		ControlMessages: tcpip.ControlMessages{
   184  			HasTimestamp: true,
   185  			Timestamp:    packet.receivedAt,
   186  		},
   187  	}
   188  	if opts.NeedRemoteAddr {
   189  		res.RemoteAddr = packet.senderAddr
   190  	}
   191  	if opts.NeedLinkPacketInfo {
   192  		res.LinkPacketInfo = packet.packetInfo
   193  	}
   194  
   195  	n, err := packet.data.ReadTo(dst, opts.Peek)
   196  	if n == 0 && err != nil {
   197  		return res, &tcpip.ErrBadBuffer{}
   198  	}
   199  	res.Count = n
   200  	return res, nil
   201  }
   202  
   203  func (ep *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) {
   204  	if !ep.stack.PacketEndpointWriteSupported() {
   205  		return 0, &tcpip.ErrNotSupported{}
   206  	}
   207  
   208  	ep.mu.Lock()
   209  	closed := ep.closed
   210  	nicID := ep.boundNIC
   211  	proto := ep.boundNetProto
   212  	ep.mu.Unlock()
   213  	if closed {
   214  		return 0, &tcpip.ErrClosedForSend{}
   215  	}
   216  
   217  	var remote tcpip.LinkAddress
   218  	if to := opts.To; to != nil {
   219  		remote = tcpip.LinkAddress(to.Addr)
   220  
   221  		if n := to.NIC; n != 0 {
   222  			nicID = n
   223  		}
   224  
   225  		if p := to.Port; p != 0 {
   226  			proto = tcpip.NetworkProtocolNumber(p)
   227  		}
   228  	}
   229  
   230  	if nicID == 0 {
   231  		return 0, &tcpip.ErrInvalidOptionValue{}
   232  	}
   233  
   234  	// TODO(https://gvisor.dev/issue/6538): Avoid this allocation.
   235  	payloadBytes := make(buffer.View, p.Len())
   236  	if _, err := io.ReadFull(p, payloadBytes); err != nil {
   237  		return 0, &tcpip.ErrBadBuffer{}
   238  	}
   239  
   240  	if err := func() tcpip.Error {
   241  		if ep.cooked {
   242  			return ep.stack.WritePacketToRemote(nicID, remote, proto, payloadBytes.ToVectorisedView())
   243  		}
   244  		return ep.stack.WriteRawPacket(nicID, proto, payloadBytes.ToVectorisedView())
   245  	}(); err != nil {
   246  		return 0, err
   247  	}
   248  	return int64(len(payloadBytes)), nil
   249  }
   250  
   251  // Disconnect implements tcpip.Endpoint.Disconnect. Packet sockets cannot be
   252  // disconnected, and this function always returns tpcip.ErrNotSupported.
   253  func (*endpoint) Disconnect() tcpip.Error {
   254  	return &tcpip.ErrNotSupported{}
   255  }
   256  
   257  // Connect implements tcpip.Endpoint.Connect. Packet sockets cannot be
   258  // connected, and this function always returnes *tcpip.ErrNotSupported.
   259  func (*endpoint) Connect(tcpip.FullAddress) tcpip.Error {
   260  	return &tcpip.ErrNotSupported{}
   261  }
   262  
   263  // Shutdown implements tcpip.Endpoint.Shutdown. Packet sockets cannot be used
   264  // with Shutdown, and this function always returns *tcpip.ErrNotSupported.
   265  func (*endpoint) Shutdown(tcpip.ShutdownFlags) tcpip.Error {
   266  	return &tcpip.ErrNotSupported{}
   267  }
   268  
   269  // Listen implements tcpip.Endpoint.Listen. Packet sockets cannot be used with
   270  // Listen, and this function always returns *tcpip.ErrNotSupported.
   271  func (*endpoint) Listen(int) tcpip.Error {
   272  	return &tcpip.ErrNotSupported{}
   273  }
   274  
   275  // Accept implements tcpip.Endpoint.Accept. Packet sockets cannot be used with
   276  // Accept, and this function always returns *tcpip.ErrNotSupported.
   277  func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, tcpip.Error) {
   278  	return nil, nil, &tcpip.ErrNotSupported{}
   279  }
   280  
   281  // Bind implements tcpip.Endpoint.Bind.
   282  func (ep *endpoint) Bind(addr tcpip.FullAddress) tcpip.Error {
   283  	// "By default, all packets of the specified protocol type are passed
   284  	// to a packet socket.  To get packets only from a specific interface
   285  	// use bind(2) specifying an address in a struct sockaddr_ll to bind
   286  	// the packet socket  to  an interface.  Fields used for binding are
   287  	// sll_family (should be AF_PACKET), sll_protocol, and sll_ifindex."
   288  	// - packet(7).
   289  
   290  	ep.mu.Lock()
   291  	defer ep.mu.Unlock()
   292  
   293  	netProto := tcpip.NetworkProtocolNumber(addr.Port)
   294  	if netProto == 0 {
   295  		// Do not allow unbinding the network protocol.
   296  		netProto = ep.boundNetProto
   297  	}
   298  
   299  	if ep.boundNIC == addr.NIC && ep.boundNetProto == netProto {
   300  		// Already bound to the requested NIC and network protocol.
   301  		return nil
   302  	}
   303  
   304  	// TODO(https://gvisor.dev/issue/6618): Unregister after registering the new
   305  	// binding.
   306  	ep.stack.UnregisterPacketEndpoint(ep.boundNIC, ep.boundNetProto, ep)
   307  	ep.boundNIC = 0
   308  	ep.boundNetProto = 0
   309  
   310  	// Bind endpoint to receive packets from specific interface.
   311  	if err := ep.stack.RegisterPacketEndpoint(addr.NIC, netProto, ep); err != nil {
   312  		return err
   313  	}
   314  
   315  	ep.boundNIC = addr.NIC
   316  	ep.boundNetProto = netProto
   317  	return nil
   318  }
   319  
   320  // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress.
   321  func (ep *endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) {
   322  	ep.mu.RLock()
   323  	defer ep.mu.RUnlock()
   324  
   325  	return tcpip.FullAddress{
   326  		NIC:  ep.boundNIC,
   327  		Port: uint16(ep.boundNetProto),
   328  	}, nil
   329  }
   330  
   331  // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress.
   332  func (*endpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) {
   333  	// Even a connected socket doesn't return a remote address.
   334  	return tcpip.FullAddress{}, &tcpip.ErrNotConnected{}
   335  }
   336  
   337  // Readiness implements tcpip.Endpoint.Readiness.
   338  func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
   339  	// The endpoint is always writable.
   340  	result := waiter.WritableEvents & mask
   341  
   342  	// Determine whether the endpoint is readable.
   343  	if (mask & waiter.ReadableEvents) != 0 {
   344  		ep.rcvMu.Lock()
   345  		if !ep.rcvList.Empty() || ep.rcvClosed {
   346  			result |= waiter.ReadableEvents
   347  		}
   348  		ep.rcvMu.Unlock()
   349  	}
   350  
   351  	return result
   352  }
   353  
   354  // SetSockOpt implements tcpip.Endpoint.SetSockOpt. Packet sockets cannot be
   355  // used with SetSockOpt, and this function always returns
   356  // *tcpip.ErrNotSupported.
   357  func (ep *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error {
   358  	switch opt.(type) {
   359  	case *tcpip.SocketDetachFilterOption:
   360  		return nil
   361  
   362  	default:
   363  		return &tcpip.ErrUnknownProtocolOption{}
   364  	}
   365  }
   366  
   367  // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt.
   368  func (*endpoint) SetSockOptInt(tcpip.SockOptInt, int) tcpip.Error {
   369  	return &tcpip.ErrUnknownProtocolOption{}
   370  }
   371  
   372  func (ep *endpoint) LastError() tcpip.Error {
   373  	ep.lastErrorMu.Lock()
   374  	defer ep.lastErrorMu.Unlock()
   375  
   376  	err := ep.lastError
   377  	ep.lastError = nil
   378  	return err
   379  }
   380  
   381  // UpdateLastError implements tcpip.SocketOptionsHandler.UpdateLastError.
   382  func (ep *endpoint) UpdateLastError(err tcpip.Error) {
   383  	ep.lastErrorMu.Lock()
   384  	ep.lastError = err
   385  	ep.lastErrorMu.Unlock()
   386  }
   387  
   388  // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
   389  func (*endpoint) GetSockOpt(tcpip.GettableSocketOption) tcpip.Error {
   390  	return &tcpip.ErrNotSupported{}
   391  }
   392  
   393  // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt.
   394  func (ep *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) {
   395  	switch opt {
   396  	case tcpip.ReceiveQueueSizeOption:
   397  		v := 0
   398  		ep.rcvMu.Lock()
   399  		if !ep.rcvList.Empty() {
   400  			p := ep.rcvList.Front()
   401  			v = p.data.Size()
   402  		}
   403  		ep.rcvMu.Unlock()
   404  		return v, nil
   405  
   406  	default:
   407  		return -1, &tcpip.ErrUnknownProtocolOption{}
   408  	}
   409  }
   410  
   411  // HandlePacket implements stack.PacketEndpoint.HandlePacket.
   412  func (ep *endpoint) HandlePacket(nicID tcpip.NICID, _ tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
   413  	ep.rcvMu.Lock()
   414  
   415  	// Drop the packet if our buffer is currently full.
   416  	if ep.rcvClosed {
   417  		ep.rcvMu.Unlock()
   418  		ep.stack.Stats().DroppedPackets.Increment()
   419  		ep.stats.ReceiveErrors.ClosedReceiver.Increment()
   420  		return
   421  	}
   422  
   423  	rcvBufSize := ep.ops.GetReceiveBufferSize()
   424  	if ep.rcvDisabled || ep.rcvBufSize >= int(rcvBufSize) {
   425  		ep.rcvMu.Unlock()
   426  		ep.stack.Stats().DroppedPackets.Increment()
   427  		ep.stats.ReceiveErrors.ReceiveBufferOverflow.Increment()
   428  		return
   429  	}
   430  
   431  	wasEmpty := ep.rcvBufSize == 0
   432  
   433  	rcvdPkt := packet{
   434  		packetInfo: tcpip.LinkPacketInfo{
   435  			Protocol: netProto,
   436  			PktType:  pkt.PktType,
   437  		},
   438  		senderAddr: tcpip.FullAddress{
   439  			NIC: nicID,
   440  		},
   441  		receivedAt: ep.stack.Clock().Now(),
   442  	}
   443  
   444  	if !pkt.LinkHeader().View().IsEmpty() {
   445  		hdr := header.Ethernet(pkt.LinkHeader().View())
   446  		rcvdPkt.senderAddr.Addr = tcpip.Address(hdr.SourceAddress())
   447  	}
   448  
   449  	if ep.cooked {
   450  		// Cooked packet endpoints don't include the link-headers in received
   451  		// packets.
   452  		if v := pkt.NetworkHeader().View(); !v.IsEmpty() {
   453  			rcvdPkt.data.AppendView(v)
   454  		}
   455  		if v := pkt.TransportHeader().View(); !v.IsEmpty() {
   456  			rcvdPkt.data.AppendView(v)
   457  		}
   458  		rcvdPkt.data.Append(pkt.Data().ExtractVV())
   459  	} else {
   460  		// Raw packet endpoints include link-headers in received packets.
   461  		rcvdPkt.data = buffer.NewVectorisedView(pkt.Size(), pkt.Views())
   462  	}
   463  
   464  	ep.rcvList.PushBack(&rcvdPkt)
   465  	ep.rcvBufSize += rcvdPkt.data.Size()
   466  
   467  	ep.rcvMu.Unlock()
   468  	ep.stats.PacketsReceived.Increment()
   469  	// Notify waiters that there's data to be read.
   470  	if wasEmpty {
   471  		ep.waiterQueue.Notify(waiter.ReadableEvents)
   472  	}
   473  }
   474  
   475  // State implements socket.Socket.State.
   476  func (*endpoint) State() uint32 {
   477  	return 0
   478  }
   479  
   480  // Info returns a copy of the endpoint info.
   481  func (ep *endpoint) Info() tcpip.EndpointInfo {
   482  	ep.mu.RLock()
   483  	defer ep.mu.RUnlock()
   484  	return &stack.TransportEndpointInfo{NetProto: ep.boundNetProto}
   485  }
   486  
   487  // Stats returns a pointer to the endpoint stats.
   488  func (ep *endpoint) Stats() tcpip.EndpointStats {
   489  	return &ep.stats
   490  }
   491  
   492  // SetOwner implements tcpip.Endpoint.SetOwner.
   493  func (*endpoint) SetOwner(tcpip.PacketOwner) {}
   494  
   495  // SocketOptions implements tcpip.Endpoint.SocketOptions.
   496  func (ep *endpoint) SocketOptions() *tcpip.SocketOptions {
   497  	return &ep.ops
   498  }