github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/tcpip/transport/packet/endpoint.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package packet provides the implementation of packet sockets (see
    16  // packet(7)). Packet sockets allow applications to:
    17  //
    18  //   - manually write and inspect link, network, and transport headers
    19  //   - receive all traffic of a given network protocol, or all protocols
    20  //
    21  // Packet sockets are similar to raw sockets, but provide even more power to
    22  // users, letting them effectively talk directly to the network device.
    23  //
    24  // Packet sockets skip the input and output iptables chains.
    25  package packet
    26  
    27  import (
    28  	"io"
    29  	"time"
    30  
    31  	"github.com/nicocha30/gvisor-ligolo/pkg/buffer"
    32  	"github.com/nicocha30/gvisor-ligolo/pkg/sync"
    33  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip"
    34  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip/header"
    35  	"github.com/nicocha30/gvisor-ligolo/pkg/tcpip/stack"
    36  	"github.com/nicocha30/gvisor-ligolo/pkg/waiter"
    37  )
    38  
    39  // +stateify savable
    40  type packet struct {
    41  	packetEntry
    42  	// data holds the actual packet data, including any headers and payload.
    43  	data       stack.PacketBufferPtr
    44  	receivedAt time.Time `state:".(int64)"`
    45  	// senderAddr is the network address of the sender.
    46  	senderAddr tcpip.FullAddress
    47  	// packetInfo holds additional information like the protocol
    48  	// of the packet etc.
    49  	packetInfo tcpip.LinkPacketInfo
    50  }
    51  
    52  // endpoint is the packet socket implementation of tcpip.Endpoint. It is legal
    53  // to have goroutines make concurrent calls into the endpoint.
    54  //
    55  // Lock order:
    56  //
    57  //	endpoint.mu
    58  //	  endpoint.rcvMu
    59  //
    60  // +stateify savable
    61  type endpoint struct {
    62  	tcpip.DefaultSocketOptionsHandler
    63  
    64  	// The following fields are initialized at creation time and are
    65  	// immutable.
    66  	stack       *stack.Stack `state:"manual"`
    67  	waiterQueue *waiter.Queue
    68  	cooked      bool
    69  	ops         tcpip.SocketOptions
    70  	stats       tcpip.TransportEndpointStats
    71  
    72  	// The following fields are used to manage the receive queue.
    73  	rcvMu sync.Mutex `state:"nosave"`
    74  	// +checklocks:rcvMu
    75  	rcvList packetList
    76  	// +checklocks:rcvMu
    77  	rcvBufSize int
    78  	// +checklocks:rcvMu
    79  	rcvClosed bool
    80  	// +checklocks:rcvMu
    81  	rcvDisabled bool
    82  
    83  	mu sync.RWMutex `state:"nosave"`
    84  	// +checklocks:mu
    85  	closed bool
    86  	// +checklocks:mu
    87  	boundNetProto tcpip.NetworkProtocolNumber
    88  	// +checklocks:mu
    89  	boundNIC tcpip.NICID
    90  
    91  	lastErrorMu sync.Mutex `state:"nosave"`
    92  	// +checklocks:lastErrorMu
    93  	lastError tcpip.Error
    94  }
    95  
    96  // NewEndpoint returns a new packet endpoint.
    97  func NewEndpoint(s *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) {
    98  	ep := &endpoint{
    99  		stack:         s,
   100  		cooked:        cooked,
   101  		boundNetProto: netProto,
   102  		waiterQueue:   waiterQueue,
   103  	}
   104  	ep.ops.InitHandler(ep, ep.stack, tcpip.GetStackSendBufferLimits, tcpip.GetStackReceiveBufferLimits)
   105  	ep.ops.SetReceiveBufferSize(32*1024, false /* notify */)
   106  
   107  	// Override with stack defaults.
   108  	var ss tcpip.SendBufferSizeOption
   109  	if err := s.Option(&ss); err == nil {
   110  		ep.ops.SetSendBufferSize(int64(ss.Default), false /* notify */)
   111  	}
   112  
   113  	var rs tcpip.ReceiveBufferSizeOption
   114  	if err := s.Option(&rs); err == nil {
   115  		ep.ops.SetReceiveBufferSize(int64(rs.Default), false /* notify */)
   116  	}
   117  
   118  	if err := s.RegisterPacketEndpoint(0, netProto, ep); err != nil {
   119  		return nil, err
   120  	}
   121  	return ep, nil
   122  }
   123  
   124  // Abort implements stack.TransportEndpoint.Abort.
   125  func (ep *endpoint) Abort() {
   126  	ep.Close()
   127  }
   128  
   129  // Close implements tcpip.Endpoint.Close.
   130  func (ep *endpoint) Close() {
   131  	ep.mu.Lock()
   132  	defer ep.mu.Unlock()
   133  
   134  	if ep.closed {
   135  		return
   136  	}
   137  
   138  	ep.stack.UnregisterPacketEndpoint(ep.boundNIC, ep.boundNetProto, ep)
   139  
   140  	ep.rcvMu.Lock()
   141  	defer ep.rcvMu.Unlock()
   142  
   143  	// Clear the receive list.
   144  	ep.rcvClosed = true
   145  	ep.rcvBufSize = 0
   146  	for !ep.rcvList.Empty() {
   147  		p := ep.rcvList.Front()
   148  		ep.rcvList.Remove(p)
   149  		p.data.DecRef()
   150  	}
   151  
   152  	ep.closed = true
   153  	ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents)
   154  }
   155  
   156  // ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf.
   157  func (*endpoint) ModerateRecvBuf(int) {}
   158  
   159  // Read implements tcpip.Endpoint.Read.
   160  func (ep *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, tcpip.Error) {
   161  	ep.rcvMu.Lock()
   162  
   163  	// If there's no data to read, return that read would block or that the
   164  	// endpoint is closed.
   165  	if ep.rcvList.Empty() {
   166  		var err tcpip.Error = &tcpip.ErrWouldBlock{}
   167  		if ep.rcvClosed {
   168  			ep.stats.ReadErrors.ReadClosed.Increment()
   169  			err = &tcpip.ErrClosedForReceive{}
   170  		}
   171  		ep.rcvMu.Unlock()
   172  		return tcpip.ReadResult{}, err
   173  	}
   174  
   175  	packet := ep.rcvList.Front()
   176  	if !opts.Peek {
   177  		ep.rcvList.Remove(packet)
   178  		defer packet.data.DecRef()
   179  		ep.rcvBufSize -= packet.data.Size()
   180  	}
   181  
   182  	ep.rcvMu.Unlock()
   183  
   184  	res := tcpip.ReadResult{
   185  		Total: packet.data.Size(),
   186  		ControlMessages: tcpip.ReceivableControlMessages{
   187  			HasTimestamp: true,
   188  			Timestamp:    packet.receivedAt,
   189  		},
   190  	}
   191  	if opts.NeedRemoteAddr {
   192  		res.RemoteAddr = packet.senderAddr
   193  	}
   194  	if opts.NeedLinkPacketInfo {
   195  		res.LinkPacketInfo = packet.packetInfo
   196  	}
   197  
   198  	n, err := packet.data.Data().ReadTo(dst, opts.Peek)
   199  	if n == 0 && err != nil {
   200  		return res, &tcpip.ErrBadBuffer{}
   201  	}
   202  	res.Count = n
   203  	return res, nil
   204  }
   205  
   206  func (ep *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) {
   207  	if !ep.stack.PacketEndpointWriteSupported() {
   208  		return 0, &tcpip.ErrNotSupported{}
   209  	}
   210  
   211  	ep.mu.Lock()
   212  	closed := ep.closed
   213  	nicID := ep.boundNIC
   214  	proto := ep.boundNetProto
   215  	ep.mu.Unlock()
   216  	if closed {
   217  		return 0, &tcpip.ErrClosedForSend{}
   218  	}
   219  
   220  	var remote tcpip.LinkAddress
   221  	if to := opts.To; to != nil {
   222  		remote = to.LinkAddr
   223  
   224  		if n := to.NIC; n != 0 {
   225  			nicID = n
   226  		}
   227  
   228  		if p := to.Port; p != 0 {
   229  			proto = tcpip.NetworkProtocolNumber(p)
   230  		}
   231  	}
   232  
   233  	if nicID == 0 {
   234  		return 0, &tcpip.ErrInvalidOptionValue{}
   235  	}
   236  
   237  	// Prevents giant buffer allocations.
   238  	if p.Len() > header.DatagramMaximumSize {
   239  		return 0, &tcpip.ErrMessageTooLong{}
   240  	}
   241  
   242  	var payload buffer.Buffer
   243  	if _, err := payload.WriteFromReader(p, int64(p.Len())); err != nil {
   244  		return 0, &tcpip.ErrBadBuffer{}
   245  	}
   246  	payloadSz := payload.Size()
   247  
   248  	if err := func() tcpip.Error {
   249  		if ep.cooked {
   250  			return ep.stack.WritePacketToRemote(nicID, remote, proto, payload)
   251  		}
   252  		return ep.stack.WriteRawPacket(nicID, proto, payload)
   253  	}(); err != nil {
   254  		return 0, err
   255  	}
   256  	return payloadSz, nil
   257  }
   258  
   259  // Disconnect implements tcpip.Endpoint.Disconnect. Packet sockets cannot be
   260  // disconnected, and this function always returns tpcip.ErrNotSupported.
   261  func (*endpoint) Disconnect() tcpip.Error {
   262  	return &tcpip.ErrNotSupported{}
   263  }
   264  
   265  // Connect implements tcpip.Endpoint.Connect. Packet sockets cannot be
   266  // connected, and this function always returnes *tcpip.ErrNotSupported.
   267  func (*endpoint) Connect(tcpip.FullAddress) tcpip.Error {
   268  	return &tcpip.ErrNotSupported{}
   269  }
   270  
   271  // Shutdown implements tcpip.Endpoint.Shutdown. Packet sockets cannot be used
   272  // with Shutdown, and this function always returns *tcpip.ErrNotSupported.
   273  func (*endpoint) Shutdown(tcpip.ShutdownFlags) tcpip.Error {
   274  	return &tcpip.ErrNotSupported{}
   275  }
   276  
   277  // Listen implements tcpip.Endpoint.Listen. Packet sockets cannot be used with
   278  // Listen, and this function always returns *tcpip.ErrNotSupported.
   279  func (*endpoint) Listen(int) tcpip.Error {
   280  	return &tcpip.ErrNotSupported{}
   281  }
   282  
   283  // Accept implements tcpip.Endpoint.Accept. Packet sockets cannot be used with
   284  // Accept, and this function always returns *tcpip.ErrNotSupported.
   285  func (*endpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, tcpip.Error) {
   286  	return nil, nil, &tcpip.ErrNotSupported{}
   287  }
   288  
   289  // Bind implements tcpip.Endpoint.Bind.
   290  func (ep *endpoint) Bind(addr tcpip.FullAddress) tcpip.Error {
   291  	// "By default, all packets of the specified protocol type are passed
   292  	// to a packet socket.  To get packets only from a specific interface
   293  	// use bind(2) specifying an address in a struct sockaddr_ll to bind
   294  	// the packet socket  to  an interface.  Fields used for binding are
   295  	// sll_family (should be AF_PACKET), sll_protocol, and sll_ifindex."
   296  	// - packet(7).
   297  
   298  	ep.mu.Lock()
   299  	defer ep.mu.Unlock()
   300  
   301  	netProto := tcpip.NetworkProtocolNumber(addr.Port)
   302  	if netProto == 0 {
   303  		// Do not allow unbinding the network protocol.
   304  		netProto = ep.boundNetProto
   305  	}
   306  
   307  	if ep.boundNIC == addr.NIC && ep.boundNetProto == netProto {
   308  		// Already bound to the requested NIC and network protocol.
   309  		return nil
   310  	}
   311  
   312  	// TODO(https://gvisor.dev/issue/6618): Unregister after registering the new
   313  	// binding.
   314  	ep.stack.UnregisterPacketEndpoint(ep.boundNIC, ep.boundNetProto, ep)
   315  	ep.boundNIC = 0
   316  	ep.boundNetProto = 0
   317  
   318  	// Bind endpoint to receive packets from specific interface.
   319  	if err := ep.stack.RegisterPacketEndpoint(addr.NIC, netProto, ep); err != nil {
   320  		return err
   321  	}
   322  
   323  	ep.boundNIC = addr.NIC
   324  	ep.boundNetProto = netProto
   325  	return nil
   326  }
   327  
   328  // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress.
   329  func (ep *endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) {
   330  	ep.mu.RLock()
   331  	defer ep.mu.RUnlock()
   332  
   333  	return tcpip.FullAddress{
   334  		NIC:  ep.boundNIC,
   335  		Port: uint16(ep.boundNetProto),
   336  	}, nil
   337  }
   338  
   339  // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress.
   340  func (*endpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) {
   341  	// Even a connected socket doesn't return a remote address.
   342  	return tcpip.FullAddress{}, &tcpip.ErrNotConnected{}
   343  }
   344  
   345  // Readiness implements tcpip.Endpoint.Readiness.
   346  func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
   347  	// The endpoint is always writable.
   348  	result := waiter.WritableEvents & mask
   349  
   350  	// Determine whether the endpoint is readable.
   351  	if (mask & waiter.ReadableEvents) != 0 {
   352  		ep.rcvMu.Lock()
   353  		if !ep.rcvList.Empty() || ep.rcvClosed {
   354  			result |= waiter.ReadableEvents
   355  		}
   356  		ep.rcvMu.Unlock()
   357  	}
   358  
   359  	return result
   360  }
   361  
   362  // SetSockOpt implements tcpip.Endpoint.SetSockOpt. Packet sockets cannot be
   363  // used with SetSockOpt, and this function always returns
   364  // *tcpip.ErrNotSupported.
   365  func (ep *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error {
   366  	switch opt.(type) {
   367  	case *tcpip.SocketDetachFilterOption:
   368  		return nil
   369  
   370  	default:
   371  		return &tcpip.ErrUnknownProtocolOption{}
   372  	}
   373  }
   374  
   375  // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt.
   376  func (*endpoint) SetSockOptInt(tcpip.SockOptInt, int) tcpip.Error {
   377  	return &tcpip.ErrUnknownProtocolOption{}
   378  }
   379  
   380  func (ep *endpoint) LastError() tcpip.Error {
   381  	ep.lastErrorMu.Lock()
   382  	defer ep.lastErrorMu.Unlock()
   383  
   384  	err := ep.lastError
   385  	ep.lastError = nil
   386  	return err
   387  }
   388  
   389  // UpdateLastError implements tcpip.SocketOptionsHandler.UpdateLastError.
   390  func (ep *endpoint) UpdateLastError(err tcpip.Error) {
   391  	ep.lastErrorMu.Lock()
   392  	ep.lastError = err
   393  	ep.lastErrorMu.Unlock()
   394  }
   395  
   396  // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
   397  func (*endpoint) GetSockOpt(tcpip.GettableSocketOption) tcpip.Error {
   398  	return &tcpip.ErrNotSupported{}
   399  }
   400  
   401  // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt.
   402  func (ep *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) {
   403  	switch opt {
   404  	case tcpip.ReceiveQueueSizeOption:
   405  		v := 0
   406  		ep.rcvMu.Lock()
   407  		if !ep.rcvList.Empty() {
   408  			p := ep.rcvList.Front()
   409  			v = p.data.Size()
   410  		}
   411  		ep.rcvMu.Unlock()
   412  		return v, nil
   413  
   414  	default:
   415  		return -1, &tcpip.ErrUnknownProtocolOption{}
   416  	}
   417  }
   418  
   419  // HandlePacket implements stack.PacketEndpoint.HandlePacket.
   420  func (ep *endpoint) HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, pkt stack.PacketBufferPtr) {
   421  	ep.rcvMu.Lock()
   422  
   423  	// Drop the packet if our buffer is currently full.
   424  	if ep.rcvClosed {
   425  		ep.rcvMu.Unlock()
   426  		ep.stack.Stats().DroppedPackets.Increment()
   427  		ep.stats.ReceiveErrors.ClosedReceiver.Increment()
   428  		return
   429  	}
   430  
   431  	rcvBufSize := ep.ops.GetReceiveBufferSize()
   432  	if ep.rcvDisabled || ep.rcvBufSize >= int(rcvBufSize) {
   433  		ep.rcvMu.Unlock()
   434  		ep.stack.Stats().DroppedPackets.Increment()
   435  		ep.stats.ReceiveErrors.ReceiveBufferOverflow.Increment()
   436  		return
   437  	}
   438  
   439  	wasEmpty := ep.rcvBufSize == 0
   440  
   441  	rcvdPkt := packet{
   442  		packetInfo: tcpip.LinkPacketInfo{
   443  			Protocol: netProto,
   444  			PktType:  pkt.PktType,
   445  		},
   446  		senderAddr: tcpip.FullAddress{
   447  			NIC: nicID,
   448  		},
   449  		receivedAt: ep.stack.Clock().Now(),
   450  	}
   451  
   452  	if len(pkt.LinkHeader().Slice()) != 0 {
   453  		hdr := header.Ethernet(pkt.LinkHeader().Slice())
   454  		rcvdPkt.senderAddr.LinkAddr = hdr.SourceAddress()
   455  	}
   456  
   457  	// Raw packet endpoints include link-headers in received packets.
   458  	pktBuf := pkt.ToBuffer()
   459  	if ep.cooked {
   460  		// Cooked packet endpoints don't include the link-headers in received
   461  		// packets.
   462  		pktBuf.TrimFront(int64(len(pkt.LinkHeader().Slice()) + len(pkt.VirtioNetHeader().Slice())))
   463  	}
   464  	rcvdPkt.data = stack.NewPacketBuffer(stack.PacketBufferOptions{Payload: pktBuf})
   465  
   466  	ep.rcvList.PushBack(&rcvdPkt)
   467  	ep.rcvBufSize += rcvdPkt.data.Size()
   468  
   469  	ep.rcvMu.Unlock()
   470  	ep.stats.PacketsReceived.Increment()
   471  	// Notify waiters that there's data to be read.
   472  	if wasEmpty {
   473  		ep.waiterQueue.Notify(waiter.ReadableEvents)
   474  	}
   475  }
   476  
   477  // State implements socket.Socket.State.
   478  func (*endpoint) State() uint32 {
   479  	return 0
   480  }
   481  
   482  // Info returns a copy of the endpoint info.
   483  func (ep *endpoint) Info() tcpip.EndpointInfo {
   484  	ep.mu.RLock()
   485  	defer ep.mu.RUnlock()
   486  	return &stack.TransportEndpointInfo{NetProto: ep.boundNetProto}
   487  }
   488  
   489  // Stats returns a pointer to the endpoint stats.
   490  func (ep *endpoint) Stats() tcpip.EndpointStats {
   491  	return &ep.stats
   492  }
   493  
   494  // SetOwner implements tcpip.Endpoint.SetOwner.
   495  func (*endpoint) SetOwner(tcpip.PacketOwner) {}
   496  
   497  // SocketOptions implements tcpip.Endpoint.SocketOptions.
   498  func (ep *endpoint) SocketOptions() *tcpip.SocketOptions {
   499  	return &ep.ops
   500  }