github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/link/fdbased/packet_dispatchers.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // +build linux
    16  
    17  package fdbased
    18  
    19  import (
    20  	"golang.org/x/sys/unix"
    21  	"github.com/SagerNet/gvisor/pkg/tcpip"
    22  	"github.com/SagerNet/gvisor/pkg/tcpip/buffer"
    23  	"github.com/SagerNet/gvisor/pkg/tcpip/header"
    24  	"github.com/SagerNet/gvisor/pkg/tcpip/link/rawfile"
    25  	"github.com/SagerNet/gvisor/pkg/tcpip/stack"
    26  )
    27  
    28  // BufConfig defines the shape of the vectorised view used to read packets from the NIC.
    29  var BufConfig = []int{128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768}
    30  
    31  type iovecBuffer struct {
    32  	// views are the actual buffers that hold the packet contents.
    33  	views []buffer.View
    34  
    35  	// iovecs are initialized with base pointers/len of the corresponding
    36  	// entries in the views defined above, except when GSO is enabled
    37  	// (skipsVnetHdr) then the first iovec points to a buffer for the vnet header
    38  	// which is stripped before the views are passed up the stack for further
    39  	// processing.
    40  	iovecs []unix.Iovec
    41  
    42  	// sizes is an array of buffer sizes for the underlying views. sizes is
    43  	// immutable.
    44  	sizes []int
    45  
    46  	// skipsVnetHdr is true if virtioNetHdr is to skipped.
    47  	skipsVnetHdr bool
    48  }
    49  
    50  func newIovecBuffer(sizes []int, skipsVnetHdr bool) *iovecBuffer {
    51  	b := &iovecBuffer{
    52  		views:        make([]buffer.View, len(sizes)),
    53  		sizes:        sizes,
    54  		skipsVnetHdr: skipsVnetHdr,
    55  	}
    56  	niov := len(b.views)
    57  	if b.skipsVnetHdr {
    58  		niov++
    59  	}
    60  	b.iovecs = make([]unix.Iovec, niov)
    61  	return b
    62  }
    63  
    64  func (b *iovecBuffer) nextIovecs() []unix.Iovec {
    65  	vnetHdrOff := 0
    66  	if b.skipsVnetHdr {
    67  		var vnetHdr [virtioNetHdrSize]byte
    68  		// The kernel adds virtioNetHdr before each packet, but
    69  		// we don't use it, so so we allocate a buffer for it,
    70  		// add it in iovecs but don't add it in a view.
    71  		b.iovecs[0] = unix.Iovec{Base: &vnetHdr[0]}
    72  		b.iovecs[0].SetLen(virtioNetHdrSize)
    73  		vnetHdrOff++
    74  	}
    75  	for i := range b.views {
    76  		if b.views[i] != nil {
    77  			break
    78  		}
    79  		v := buffer.NewView(b.sizes[i])
    80  		b.views[i] = v
    81  		b.iovecs[i+vnetHdrOff] = unix.Iovec{Base: &v[0]}
    82  		b.iovecs[i+vnetHdrOff].SetLen(len(v))
    83  	}
    84  	return b.iovecs
    85  }
    86  
    87  func (b *iovecBuffer) pullViews(n int) buffer.VectorisedView {
    88  	var views []buffer.View
    89  	c := 0
    90  	if b.skipsVnetHdr {
    91  		c += virtioNetHdrSize
    92  		if c >= n {
    93  			// Nothing in the packet.
    94  			return buffer.NewVectorisedView(0, nil)
    95  		}
    96  	}
    97  	for i, v := range b.views {
    98  		c += len(v)
    99  		if c >= n {
   100  			b.views[i].CapLength(len(v) - (c - n))
   101  			views = append([]buffer.View(nil), b.views[:i+1]...)
   102  			break
   103  		}
   104  	}
   105  	// Remove the first len(views) used views from the state.
   106  	for i := range views {
   107  		b.views[i] = nil
   108  	}
   109  	if b.skipsVnetHdr {
   110  		// Exclude the size of the vnet header.
   111  		n -= virtioNetHdrSize
   112  	}
   113  	return buffer.NewVectorisedView(n, views)
   114  }
   115  
   116  // readVDispatcher uses readv() system call to read inbound packets and
   117  // dispatches them.
   118  type readVDispatcher struct {
   119  	// fd is the file descriptor used to send and receive packets.
   120  	fd int
   121  
   122  	// e is the endpoint this dispatcher is attached to.
   123  	e *endpoint
   124  
   125  	// buf is the iovec buffer that contains the packet contents.
   126  	buf *iovecBuffer
   127  }
   128  
   129  func newReadVDispatcher(fd int, e *endpoint) (linkDispatcher, error) {
   130  	d := &readVDispatcher{fd: fd, e: e}
   131  	skipsVnetHdr := d.e.gsoKind == stack.HWGSOSupported
   132  	d.buf = newIovecBuffer(BufConfig, skipsVnetHdr)
   133  	return d, nil
   134  }
   135  
   136  // dispatch reads one packet from the file descriptor and dispatches it.
   137  func (d *readVDispatcher) dispatch() (bool, tcpip.Error) {
   138  	n, err := rawfile.BlockingReadv(d.fd, d.buf.nextIovecs())
   139  	if n == 0 || err != nil {
   140  		return false, err
   141  	}
   142  
   143  	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
   144  		Data: d.buf.pullViews(n),
   145  	})
   146  
   147  	var (
   148  		p             tcpip.NetworkProtocolNumber
   149  		remote, local tcpip.LinkAddress
   150  	)
   151  	if d.e.hdrSize > 0 {
   152  		hdr, ok := pkt.LinkHeader().Consume(d.e.hdrSize)
   153  		if !ok {
   154  			return false, nil
   155  		}
   156  		eth := header.Ethernet(hdr)
   157  		p = eth.Type()
   158  		remote = eth.SourceAddress()
   159  		local = eth.DestinationAddress()
   160  	} else {
   161  		// We don't get any indication of what the packet is, so try to guess
   162  		// if it's an IPv4 or IPv6 packet.
   163  		// IP version information is at the first octet, so pulling up 1 byte.
   164  		h, ok := pkt.Data().PullUp(1)
   165  		if !ok {
   166  			return true, nil
   167  		}
   168  		switch header.IPVersion(h) {
   169  		case header.IPv4Version:
   170  			p = header.IPv4ProtocolNumber
   171  		case header.IPv6Version:
   172  			p = header.IPv6ProtocolNumber
   173  		default:
   174  			return true, nil
   175  		}
   176  	}
   177  
   178  	d.e.dispatcher.DeliverNetworkPacket(remote, local, p, pkt)
   179  
   180  	return true, nil
   181  }
   182  
   183  // recvMMsgDispatcher uses the recvmmsg system call to read inbound packets and
   184  // dispatches them.
   185  type recvMMsgDispatcher struct {
   186  	// fd is the file descriptor used to send and receive packets.
   187  	fd int
   188  
   189  	// e is the endpoint this dispatcher is attached to.
   190  	e *endpoint
   191  
   192  	// bufs is an array of iovec buffers that contain packet contents.
   193  	bufs []*iovecBuffer
   194  
   195  	// msgHdrs is an array of MMsgHdr objects where each MMsghdr is used to
   196  	// reference an array of iovecs in the iovecs field defined above.  This
   197  	// array is passed as the parameter to recvmmsg call to retrieve
   198  	// potentially more than 1 packet per unix.
   199  	msgHdrs []rawfile.MMsgHdr
   200  }
   201  
   202  const (
   203  	// MaxMsgsPerRecv is the maximum number of packets we want to retrieve
   204  	// in a single RecvMMsg call.
   205  	MaxMsgsPerRecv = 8
   206  )
   207  
   208  func newRecvMMsgDispatcher(fd int, e *endpoint) (linkDispatcher, error) {
   209  	d := &recvMMsgDispatcher{
   210  		fd:      fd,
   211  		e:       e,
   212  		bufs:    make([]*iovecBuffer, MaxMsgsPerRecv),
   213  		msgHdrs: make([]rawfile.MMsgHdr, MaxMsgsPerRecv),
   214  	}
   215  	skipsVnetHdr := d.e.gsoKind == stack.HWGSOSupported
   216  	for i := range d.bufs {
   217  		d.bufs[i] = newIovecBuffer(BufConfig, skipsVnetHdr)
   218  	}
   219  	return d, nil
   220  }
   221  
   222  // recvMMsgDispatch reads more than one packet at a time from the file
   223  // descriptor and dispatches it.
   224  func (d *recvMMsgDispatcher) dispatch() (bool, tcpip.Error) {
   225  	// Fill message headers.
   226  	for k := range d.msgHdrs {
   227  		if d.msgHdrs[k].Msg.Iovlen > 0 {
   228  			break
   229  		}
   230  		iovecs := d.bufs[k].nextIovecs()
   231  		iovLen := len(iovecs)
   232  		d.msgHdrs[k].Len = 0
   233  		d.msgHdrs[k].Msg.Iov = &iovecs[0]
   234  		d.msgHdrs[k].Msg.SetIovlen(iovLen)
   235  	}
   236  
   237  	nMsgs, err := rawfile.BlockingRecvMMsg(d.fd, d.msgHdrs)
   238  	if err != nil {
   239  		return false, err
   240  	}
   241  	// Process each of received packets.
   242  	for k := 0; k < nMsgs; k++ {
   243  		n := int(d.msgHdrs[k].Len)
   244  
   245  		pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
   246  			Data: d.bufs[k].pullViews(n),
   247  		})
   248  
   249  		// Mark that this iovec has been processed.
   250  		d.msgHdrs[k].Msg.Iovlen = 0
   251  
   252  		var (
   253  			p             tcpip.NetworkProtocolNumber
   254  			remote, local tcpip.LinkAddress
   255  		)
   256  		if d.e.hdrSize > 0 {
   257  			hdr, ok := pkt.LinkHeader().Consume(d.e.hdrSize)
   258  			if !ok {
   259  				return false, nil
   260  			}
   261  			eth := header.Ethernet(hdr)
   262  			p = eth.Type()
   263  			remote = eth.SourceAddress()
   264  			local = eth.DestinationAddress()
   265  		} else {
   266  			// We don't get any indication of what the packet is, so try to guess
   267  			// if it's an IPv4 or IPv6 packet.
   268  			// IP version information is at the first octet, so pulling up 1 byte.
   269  			h, ok := pkt.Data().PullUp(1)
   270  			if !ok {
   271  				// Skip this packet.
   272  				continue
   273  			}
   274  			switch header.IPVersion(h) {
   275  			case header.IPv4Version:
   276  				p = header.IPv4ProtocolNumber
   277  			case header.IPv6Version:
   278  				p = header.IPv6ProtocolNumber
   279  			default:
   280  				// Skip this packet.
   281  				continue
   282  			}
   283  		}
   284  
   285  		d.e.dispatcher.DeliverNetworkPacket(remote, local, p, pkt)
   286  	}
   287  
   288  	return true, nil
   289  }