github.com/google/netstack@v0.0.0-20191123085552-55fcc16cd0eb/tcpip/link/fdbased/packet_dispatchers.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // +build linux
    16  
    17  package fdbased
    18  
    19  import (
    20  	"syscall"
    21  
    22  	"github.com/google/netstack/tcpip"
    23  	"github.com/google/netstack/tcpip/buffer"
    24  	"github.com/google/netstack/tcpip/header"
    25  	"github.com/google/netstack/tcpip/link/rawfile"
    26  	"github.com/google/netstack/tcpip/stack"
    27  )
    28  
    29  // BufConfig defines the shape of the vectorised view used to read packets from the NIC.
    30  var BufConfig = []int{128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768}
    31  
    32  // readVDispatcher uses readv() system call to read inbound packets and
    33  // dispatches them.
    34  type readVDispatcher struct {
    35  	// fd is the file descriptor used to send and receive packets.
    36  	fd int
    37  
    38  	// e is the endpoint this dispatcher is attached to.
    39  	e *endpoint
    40  
    41  	// views are the actual buffers that hold the packet contents.
    42  	views []buffer.View
    43  
    44  	// iovecs are initialized with base pointers/len of the corresponding
    45  	// entries in the views defined above, except when GSO is enabled then
    46  	// the first iovec points to a buffer for the vnet header which is
    47  	// stripped before the views are passed up the stack for further
    48  	// processing.
    49  	iovecs []syscall.Iovec
    50  }
    51  
    52  func newReadVDispatcher(fd int, e *endpoint) (linkDispatcher, error) {
    53  	d := &readVDispatcher{fd: fd, e: e}
    54  	d.views = make([]buffer.View, len(BufConfig))
    55  	iovLen := len(BufConfig)
    56  	if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
    57  		iovLen++
    58  	}
    59  	d.iovecs = make([]syscall.Iovec, iovLen)
    60  	return d, nil
    61  }
    62  
    63  func (d *readVDispatcher) allocateViews(bufConfig []int) {
    64  	var vnetHdr [virtioNetHdrSize]byte
    65  	vnetHdrOff := 0
    66  	if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
    67  		// The kernel adds virtioNetHdr before each packet, but
    68  		// we don't use it, so so we allocate a buffer for it,
    69  		// add it in iovecs but don't add it in a view.
    70  		d.iovecs[0] = syscall.Iovec{
    71  			Base: &vnetHdr[0],
    72  			Len:  uint64(virtioNetHdrSize),
    73  		}
    74  		vnetHdrOff++
    75  	}
    76  	for i := 0; i < len(bufConfig); i++ {
    77  		if d.views[i] != nil {
    78  			break
    79  		}
    80  		b := buffer.NewView(bufConfig[i])
    81  		d.views[i] = b
    82  		d.iovecs[i+vnetHdrOff] = syscall.Iovec{
    83  			Base: &b[0],
    84  			Len:  uint64(len(b)),
    85  		}
    86  	}
    87  }
    88  
    89  func (d *readVDispatcher) capViews(n int, buffers []int) int {
    90  	c := 0
    91  	for i, s := range buffers {
    92  		c += s
    93  		if c >= n {
    94  			d.views[i].CapLength(s - (c - n))
    95  			return i + 1
    96  		}
    97  	}
    98  	return len(buffers)
    99  }
   100  
   101  // dispatch reads one packet from the file descriptor and dispatches it.
   102  func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
   103  	d.allocateViews(BufConfig)
   104  
   105  	n, err := rawfile.BlockingReadv(d.fd, d.iovecs)
   106  	if err != nil {
   107  		return false, err
   108  	}
   109  	if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
   110  		// Skip virtioNetHdr which is added before each packet, it
   111  		// isn't used and it isn't in a view.
   112  		n -= virtioNetHdrSize
   113  	}
   114  	if n <= d.e.hdrSize {
   115  		return false, nil
   116  	}
   117  
   118  	var (
   119  		p             tcpip.NetworkProtocolNumber
   120  		remote, local tcpip.LinkAddress
   121  		eth           header.Ethernet
   122  	)
   123  	if d.e.hdrSize > 0 {
   124  		eth = header.Ethernet(d.views[0][:header.EthernetMinimumSize])
   125  		p = eth.Type()
   126  		remote = eth.SourceAddress()
   127  		local = eth.DestinationAddress()
   128  	} else {
   129  		// We don't get any indication of what the packet is, so try to guess
   130  		// if it's an IPv4 or IPv6 packet.
   131  		switch header.IPVersion(d.views[0]) {
   132  		case header.IPv4Version:
   133  			p = header.IPv4ProtocolNumber
   134  		case header.IPv6Version:
   135  			p = header.IPv6ProtocolNumber
   136  		default:
   137  			return true, nil
   138  		}
   139  	}
   140  
   141  	used := d.capViews(n, BufConfig)
   142  	pkt := tcpip.PacketBuffer{
   143  		Data:       buffer.NewVectorisedView(n, append([]buffer.View(nil), d.views[:used]...)),
   144  		LinkHeader: buffer.View(eth),
   145  	}
   146  	pkt.Data.TrimFront(d.e.hdrSize)
   147  
   148  	d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, pkt)
   149  
   150  	// Prepare e.views for another packet: release used views.
   151  	for i := 0; i < used; i++ {
   152  		d.views[i] = nil
   153  	}
   154  
   155  	return true, nil
   156  }
   157  
   158  // recvMMsgDispatcher uses the recvmmsg system call to read inbound packets and
   159  // dispatches them.
   160  type recvMMsgDispatcher struct {
   161  	// fd is the file descriptor used to send and receive packets.
   162  	fd int
   163  
   164  	// e is the endpoint this dispatcher is attached to.
   165  	e *endpoint
   166  
   167  	// views is an array of array of buffers that contain packet contents.
   168  	views [][]buffer.View
   169  
   170  	// iovecs is an array of array of iovec records where each iovec base
   171  	// pointer and length are initialzed to the corresponding view above,
   172  	// except when GSO is neabled then the first iovec in each array of
   173  	// iovecs points to a buffer for the vnet header which is stripped
   174  	// before the views are passed up the stack for further processing.
   175  	iovecs [][]syscall.Iovec
   176  
   177  	// msgHdrs is an array of MMsgHdr objects where each MMsghdr is used to
   178  	// reference an array of iovecs in the iovecs field defined above.  This
   179  	// array is passed as the parameter to recvmmsg call to retrieve
   180  	// potentially more than 1 packet per syscall.
   181  	msgHdrs []rawfile.MMsgHdr
   182  }
   183  
   184  const (
   185  	// MaxMsgsPerRecv is the maximum number of packets we want to retrieve
   186  	// in a single RecvMMsg call.
   187  	MaxMsgsPerRecv = 8
   188  )
   189  
   190  func newRecvMMsgDispatcher(fd int, e *endpoint) (linkDispatcher, error) {
   191  	d := &recvMMsgDispatcher{
   192  		fd: fd,
   193  		e:  e,
   194  	}
   195  	d.views = make([][]buffer.View, MaxMsgsPerRecv)
   196  	for i := range d.views {
   197  		d.views[i] = make([]buffer.View, len(BufConfig))
   198  	}
   199  	d.iovecs = make([][]syscall.Iovec, MaxMsgsPerRecv)
   200  	iovLen := len(BufConfig)
   201  	if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
   202  		// virtioNetHdr is prepended before each packet.
   203  		iovLen++
   204  	}
   205  	for i := range d.iovecs {
   206  		d.iovecs[i] = make([]syscall.Iovec, iovLen)
   207  	}
   208  	d.msgHdrs = make([]rawfile.MMsgHdr, MaxMsgsPerRecv)
   209  	for i := range d.msgHdrs {
   210  		d.msgHdrs[i].Msg.Iov = &d.iovecs[i][0]
   211  		d.msgHdrs[i].Msg.Iovlen = uint64(iovLen)
   212  	}
   213  	return d, nil
   214  }
   215  
   216  func (d *recvMMsgDispatcher) capViews(k, n int, buffers []int) int {
   217  	c := 0
   218  	for i, s := range buffers {
   219  		c += s
   220  		if c >= n {
   221  			d.views[k][i].CapLength(s - (c - n))
   222  			return i + 1
   223  		}
   224  	}
   225  	return len(buffers)
   226  }
   227  
   228  func (d *recvMMsgDispatcher) allocateViews(bufConfig []int) {
   229  	for k := 0; k < len(d.views); k++ {
   230  		var vnetHdr [virtioNetHdrSize]byte
   231  		vnetHdrOff := 0
   232  		if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
   233  			// The kernel adds virtioNetHdr before each packet, but
   234  			// we don't use it, so so we allocate a buffer for it,
   235  			// add it in iovecs but don't add it in a view.
   236  			d.iovecs[k][0] = syscall.Iovec{
   237  				Base: &vnetHdr[0],
   238  				Len:  uint64(virtioNetHdrSize),
   239  			}
   240  			vnetHdrOff++
   241  		}
   242  		for i := 0; i < len(bufConfig); i++ {
   243  			if d.views[k][i] != nil {
   244  				break
   245  			}
   246  			b := buffer.NewView(bufConfig[i])
   247  			d.views[k][i] = b
   248  			d.iovecs[k][i+vnetHdrOff] = syscall.Iovec{
   249  				Base: &b[0],
   250  				Len:  uint64(len(b)),
   251  			}
   252  		}
   253  	}
   254  }
   255  
   256  // recvMMsgDispatch reads more than one packet at a time from the file
   257  // descriptor and dispatches it.
   258  func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
   259  	d.allocateViews(BufConfig)
   260  
   261  	nMsgs, err := rawfile.BlockingRecvMMsg(d.fd, d.msgHdrs)
   262  	if err != nil {
   263  		return false, err
   264  	}
   265  	// Process each of received packets.
   266  	for k := 0; k < nMsgs; k++ {
   267  		n := int(d.msgHdrs[k].Len)
   268  		if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
   269  			n -= virtioNetHdrSize
   270  		}
   271  		if n <= d.e.hdrSize {
   272  			return false, nil
   273  		}
   274  
   275  		var (
   276  			p             tcpip.NetworkProtocolNumber
   277  			remote, local tcpip.LinkAddress
   278  			eth           header.Ethernet
   279  		)
   280  		if d.e.hdrSize > 0 {
   281  			eth = header.Ethernet(d.views[k][0])
   282  			p = eth.Type()
   283  			remote = eth.SourceAddress()
   284  			local = eth.DestinationAddress()
   285  		} else {
   286  			// We don't get any indication of what the packet is, so try to guess
   287  			// if it's an IPv4 or IPv6 packet.
   288  			switch header.IPVersion(d.views[k][0]) {
   289  			case header.IPv4Version:
   290  				p = header.IPv4ProtocolNumber
   291  			case header.IPv6Version:
   292  				p = header.IPv6ProtocolNumber
   293  			default:
   294  				return true, nil
   295  			}
   296  		}
   297  
   298  		used := d.capViews(k, int(n), BufConfig)
   299  		pkt := tcpip.PacketBuffer{
   300  			Data:       buffer.NewVectorisedView(int(n), append([]buffer.View(nil), d.views[k][:used]...)),
   301  			LinkHeader: buffer.View(eth),
   302  		}
   303  		pkt.Data.TrimFront(d.e.hdrSize)
   304  		d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, pkt)
   305  
   306  		// Prepare e.views for another packet: release used views.
   307  		for i := 0; i < used; i++ {
   308  			d.views[k][i] = nil
   309  		}
   310  	}
   311  
   312  	for k := 0; k < nMsgs; k++ {
   313  		d.msgHdrs[k].Len = 0
   314  	}
   315  
   316  	return true, nil
   317  }