github.com/FlowerWrong/netstack@v0.0.0-20191009141956-e5848263af28/tcpip/link/fdbased/packet_dispatchers.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // +build linux, darwin
    16  
    17  package fdbased
    18  
    19  import (
    20  	"syscall"
    21  
    22  	"github.com/FlowerWrong/netstack/tcpip"
    23  	"github.com/FlowerWrong/netstack/tcpip/buffer"
    24  	"github.com/FlowerWrong/netstack/tcpip/header"
    25  	"github.com/FlowerWrong/netstack/tcpip/link/rawfile"
    26  	"github.com/FlowerWrong/netstack/tcpip/stack"
    27  )
    28  
    29  // BufConfig defines the shape of the vectorised view used to read packets from the NIC.
    30  var BufConfig = []int{128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768}
    31  
    32  // readVDispatcher uses readv() system call to read inbound packets and
    33  // dispatches them.
    34  type readVDispatcher struct {
    35  	// fd is the file descriptor used to send and receive packets.
    36  	fd int
    37  
    38  	// e is the endpoint this dispatcher is attached to.
    39  	e *endpoint
    40  
    41  	// views are the actual buffers that hold the packet contents.
    42  	views []buffer.View
    43  
    44  	// iovecs are initialized with base pointers/len of the corresponding
    45  	// entries in the views defined above, except when GSO is enabled then
    46  	// the first iovec points to a buffer for the vnet header which is
    47  	// stripped before the views are passed up the stack for further
    48  	// processing.
    49  	iovecs []syscall.Iovec
    50  }
    51  
    52  func newReadVDispatcher(fd int, e *endpoint) (linkDispatcher, error) {
    53  	d := &readVDispatcher{fd: fd, e: e}
    54  	d.views = make([]buffer.View, len(BufConfig))
    55  	iovLen := len(BufConfig)
    56  	if d.e.Capabilities()&stack.CapabilityGSO != 0 {
    57  		iovLen++
    58  	}
    59  	d.iovecs = make([]syscall.Iovec, iovLen)
    60  	return d, nil
    61  }
    62  
    63  func (d *readVDispatcher) allocateViews(bufConfig []int) {
    64  	var vnetHdr [virtioNetHdrSize]byte
    65  	vnetHdrOff := 0
    66  	if d.e.Capabilities()&stack.CapabilityGSO != 0 {
    67  		// The kernel adds virtioNetHdr before each packet, but
    68  		// we don't use it, so so we allocate a buffer for it,
    69  		// add it in iovecs but don't add it in a view.
    70  		d.iovecs[0] = syscall.Iovec{
    71  			Base: &vnetHdr[0],
    72  			Len:  uint64(virtioNetHdrSize),
    73  		}
    74  		vnetHdrOff++
    75  	}
    76  	for i := 0; i < len(bufConfig); i++ {
    77  		if d.views[i] != nil {
    78  			break
    79  		}
    80  		b := buffer.NewView(bufConfig[i])
    81  		d.views[i] = b
    82  		d.iovecs[i+vnetHdrOff] = syscall.Iovec{
    83  			Base: &b[0],
    84  			Len:  uint64(len(b)),
    85  		}
    86  	}
    87  }
    88  
    89  func (d *readVDispatcher) capViews(n int, buffers []int) int {
    90  	c := 0
    91  	for i, s := range buffers {
    92  		c += s
    93  		if c >= n {
    94  			d.views[i].CapLength(s - (c - n))
    95  			return i + 1
    96  		}
    97  	}
    98  	return len(buffers)
    99  }
   100  
   101  // dispatch reads one packet from the file descriptor and dispatches it.
   102  func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
   103  	d.allocateViews(BufConfig)
   104  
   105  	n, err := rawfile.BlockingReadv(d.fd, d.iovecs)
   106  	if err != nil {
   107  		return false, err
   108  	}
   109  	if d.e.Capabilities()&stack.CapabilityGSO != 0 {
   110  		// Skip virtioNetHdr which is added before each packet, it
   111  		// isn't used and it isn't in a view.
   112  		n -= virtioNetHdrSize
   113  	}
   114  	if n <= d.e.hdrSize {
   115  		return false, nil
   116  	}
   117  
   118  	var (
   119  		p             tcpip.NetworkProtocolNumber
   120  		remote, local tcpip.LinkAddress
   121  	)
   122  	if d.e.hdrSize > 0 {
   123  		eth := header.Ethernet(d.views[0])
   124  		p = eth.Type()
   125  		remote = eth.SourceAddress()
   126  		local = eth.DestinationAddress()
   127  	} else {
   128  		// We don't get any indication of what the packet is, so try to guess
   129  		// if it's an IPv4 or IPv6 packet.
   130  		switch header.IPVersion(d.views[0]) {
   131  		case header.IPv4Version:
   132  			p = header.IPv4ProtocolNumber
   133  		case header.IPv6Version:
   134  			p = header.IPv6ProtocolNumber
   135  		default:
   136  			return true, nil
   137  		}
   138  	}
   139  
   140  	used := d.capViews(n, BufConfig)
   141  	vv := buffer.NewVectorisedView(n, d.views[:used])
   142  	vv.TrimFront(d.e.hdrSize)
   143  
   144  	d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv)
   145  
   146  	// Prepare e.views for another packet: release used views.
   147  	for i := 0; i < used; i++ {
   148  		d.views[i] = nil
   149  	}
   150  
   151  	return true, nil
   152  }
   153  
   154  // recvMMsgDispatcher uses the recvmmsg system call to read inbound packets and
   155  // dispatches them.
   156  type recvMMsgDispatcher struct {
   157  	// fd is the file descriptor used to send and receive packets.
   158  	fd int
   159  
   160  	// e is the endpoint this dispatcher is attached to.
   161  	e *endpoint
   162  
   163  	// views is an array of array of buffers that contain packet contents.
   164  	views [][]buffer.View
   165  
   166  	// iovecs is an array of array of iovec records where each iovec base
   167  	// pointer and length are initialzed to the corresponding view above,
   168  	// except when GSO is neabled then the first iovec in each array of
   169  	// iovecs points to a buffer for the vnet header which is stripped
   170  	// before the views are passed up the stack for further processing.
   171  	iovecs [][]syscall.Iovec
   172  
   173  	// msgHdrs is an array of MMsgHdr objects where each MMsghdr is used to
   174  	// reference an array of iovecs in the iovecs field defined above.  This
   175  	// array is passed as the parameter to recvmmsg call to retrieve
   176  	// potentially more than 1 packet per syscall.
   177  	msgHdrs []rawfile.MMsgHdr
   178  }
   179  
   180  const (
   181  	// MaxMsgsPerRecv is the maximum number of packets we want to retrieve
   182  	// in a single RecvMMsg call.
   183  	MaxMsgsPerRecv = 8
   184  )
   185  
   186  func (d *recvMMsgDispatcher) capViews(k, n int, buffers []int) int {
   187  	c := 0
   188  	for i, s := range buffers {
   189  		c += s
   190  		if c >= n {
   191  			d.views[k][i].CapLength(s - (c - n))
   192  			return i + 1
   193  		}
   194  	}
   195  	return len(buffers)
   196  }
   197  
   198  func (d *recvMMsgDispatcher) allocateViews(bufConfig []int) {
   199  	for k := 0; k < len(d.views); k++ {
   200  		var vnetHdr [virtioNetHdrSize]byte
   201  		vnetHdrOff := 0
   202  		if d.e.Capabilities()&stack.CapabilityGSO != 0 {
   203  			// The kernel adds virtioNetHdr before each packet, but
   204  			// we don't use it, so so we allocate a buffer for it,
   205  			// add it in iovecs but don't add it in a view.
   206  			d.iovecs[k][0] = syscall.Iovec{
   207  				Base: &vnetHdr[0],
   208  				Len:  uint64(virtioNetHdrSize),
   209  			}
   210  			vnetHdrOff++
   211  		}
   212  		for i := 0; i < len(bufConfig); i++ {
   213  			if d.views[k][i] != nil {
   214  				break
   215  			}
   216  			b := buffer.NewView(bufConfig[i])
   217  			d.views[k][i] = b
   218  			d.iovecs[k][i+vnetHdrOff] = syscall.Iovec{
   219  				Base: &b[0],
   220  				Len:  uint64(len(b)),
   221  			}
   222  		}
   223  	}
   224  }
   225  
   226  // recvMMsgDispatch reads more than one packet at a time from the file
   227  // descriptor and dispatches it.
   228  func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
   229  	d.allocateViews(BufConfig)
   230  
   231  	nMsgs, err := rawfile.BlockingRecvMMsg(d.fd, d.msgHdrs)
   232  	if err != nil {
   233  		return false, err
   234  	}
   235  	// Process each of received packets.
   236  	for k := 0; k < nMsgs; k++ {
   237  		n := int(d.msgHdrs[k].Len)
   238  		if d.e.Capabilities()&stack.CapabilityGSO != 0 {
   239  			n -= virtioNetHdrSize
   240  		}
   241  		if n <= d.e.hdrSize {
   242  			return false, nil
   243  		}
   244  
   245  		var (
   246  			p             tcpip.NetworkProtocolNumber
   247  			remote, local tcpip.LinkAddress
   248  		)
   249  		if d.e.hdrSize > 0 {
   250  			eth := header.Ethernet(d.views[k][0])
   251  			p = eth.Type()
   252  			remote = eth.SourceAddress()
   253  			local = eth.DestinationAddress()
   254  		} else {
   255  			// We don't get any indication of what the packet is, so try to guess
   256  			// if it's an IPv4 or IPv6 packet.
   257  			switch header.IPVersion(d.views[k][0]) {
   258  			case header.IPv4Version:
   259  				p = header.IPv4ProtocolNumber
   260  			case header.IPv6Version:
   261  				p = header.IPv6ProtocolNumber
   262  			default:
   263  				return true, nil
   264  			}
   265  		}
   266  
   267  		used := d.capViews(k, int(n), BufConfig)
   268  		vv := buffer.NewVectorisedView(int(n), d.views[k][:used])
   269  		vv.TrimFront(d.e.hdrSize)
   270  		d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv)
   271  
   272  		// Prepare e.views for another packet: release used views.
   273  		for i := 0; i < used; i++ {
   274  			d.views[k][i] = nil
   275  		}
   276  	}
   277  
   278  	for k := 0; k < nMsgs; k++ {
   279  		d.msgHdrs[k].Len = 0
   280  	}
   281  
   282  	return true, nil
   283  }