github.com/flowerwrong/netstack@v0.0.0-20191009141956-e5848263af28/tcpip/link/fdbased/mmap.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // +build linux,amd64 linux,arm64 darwin,amd64
    16  
    17  package fdbased
    18  
    19  import (
    20  	"encoding/binary"
    21  	"syscall"
    22  
    23  	"github.com/FlowerWrong/netstack/tcpip"
    24  	"github.com/FlowerWrong/netstack/tcpip/buffer"
    25  	"github.com/FlowerWrong/netstack/tcpip/header"
    26  	"github.com/FlowerWrong/netstack/tcpip/link/rawfile"
    27  	"golang.org/x/sys/unix"
    28  )
    29  
    30  const (
    31  	tPacketAlignment = uintptr(16)
    32  	tpStatusKernel   = 0
    33  	tpStatusUser     = 1
    34  	tpStatusCopy     = 2
    35  	tpStatusLosing   = 4
    36  )
    37  
    38  // We overallocate the frame size to accommodate space for the
    39  // TPacketHdr+RawSockAddrLinkLayer+MAC header and any padding.
    40  //
    41  // Memory allocated for the ring buffer: tpBlockSize * tpBlockNR = 2 MiB
    42  //
    43  // NOTE:
    44  //   Frames need to be aligned at 16 byte boundaries.
    45  //   BlockSize needs to be page aligned.
    46  //
    47  //   For details see PACKET_MMAP setting constraints in
    48  //   https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt
    49  const (
    50  	tpFrameSize = 65536 + 128
    51  	tpBlockSize = tpFrameSize * 32
    52  	tpBlockNR   = 1
    53  	tpFrameNR   = (tpBlockSize * tpBlockNR) / tpFrameSize
    54  )
    55  
    56  // tPacketAlign aligns the pointer v at a tPacketAlignment boundary. Direct
    57  // translation of the TPACKET_ALIGN macro in <linux/if_packet.h>.
    58  func tPacketAlign(v uintptr) uintptr {
    59  	return (v + tPacketAlignment - 1) & uintptr(^(tPacketAlignment - 1))
    60  }
    61  
    62  // tPacketReq is the tpacket_req structure as described in
    63  // https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt
    64  type tPacketReq struct {
    65  	tpBlockSize uint32
    66  	tpBlockNR   uint32
    67  	tpFrameSize uint32
    68  	tpFrameNR   uint32
    69  }
    70  
    71  // tPacketHdr is tpacket_hdr structure as described in <linux/if_packet.h>
    72  type tPacketHdr []byte
    73  
    74  const (
    75  	tpStatusOffset  = 0
    76  	tpLenOffset     = 8
    77  	tpSnapLenOffset = 12
    78  	tpMacOffset     = 16
    79  	tpNetOffset     = 18
    80  	tpSecOffset     = 20
    81  	tpUSecOffset    = 24
    82  )
    83  
    84  func (t tPacketHdr) tpLen() uint32 {
    85  	return binary.LittleEndian.Uint32(t[tpLenOffset:])
    86  }
    87  
    88  func (t tPacketHdr) tpSnapLen() uint32 {
    89  	return binary.LittleEndian.Uint32(t[tpSnapLenOffset:])
    90  }
    91  
    92  func (t tPacketHdr) tpMac() uint16 {
    93  	return binary.LittleEndian.Uint16(t[tpMacOffset:])
    94  }
    95  
    96  func (t tPacketHdr) tpNet() uint16 {
    97  	return binary.LittleEndian.Uint16(t[tpNetOffset:])
    98  }
    99  
   100  func (t tPacketHdr) tpSec() uint32 {
   101  	return binary.LittleEndian.Uint32(t[tpSecOffset:])
   102  }
   103  
   104  func (t tPacketHdr) tpUSec() uint32 {
   105  	return binary.LittleEndian.Uint32(t[tpUSecOffset:])
   106  }
   107  
   108  func (t tPacketHdr) Payload() []byte {
   109  	return t[uint32(t.tpMac()) : uint32(t.tpMac())+t.tpSnapLen()]
   110  }
   111  
   112  // packetMMapDispatcher uses PACKET_RX_RING's to read/dispatch inbound packets.
   113  // See: mmap_amd64_unsafe.go for implementation details.
   114  type packetMMapDispatcher struct {
   115  	// fd is the file descriptor used to send and receive packets.
   116  	fd int
   117  
   118  	// e is the endpoint this dispatcher is attached to.
   119  	e *endpoint
   120  
   121  	// ringBuffer is only used when PacketMMap dispatcher is used and points
   122  	// to the start of the mmapped PACKET_RX_RING buffer.
   123  	ringBuffer []byte
   124  
   125  	// ringOffset is the current offset into the ring buffer where the next
   126  	// inbound packet will be placed by the kernel.
   127  	ringOffset int
   128  }
   129  
   130  func (d *packetMMapDispatcher) readMMappedPacket() ([]byte, *tcpip.Error) {
   131  	hdr := tPacketHdr(d.ringBuffer[d.ringOffset*tpFrameSize:])
   132  	for hdr.tpStatus()&tpStatusUser == 0 {
   133  		event := rawfile.PollEvent{
   134  			FD:     int32(d.fd),
   135  			Events: unix.POLLIN | unix.POLLERR,
   136  		}
   137  		if _, errno := rawfile.BlockingPoll(&event, 1, nil); errno != 0 {
   138  			if errno == syscall.EINTR {
   139  				continue
   140  			}
   141  			return nil, rawfile.TranslateErrno(errno)
   142  		}
   143  		if hdr.tpStatus()&tpStatusCopy != 0 {
   144  			// This frame is truncated so skip it after flipping the
   145  			// buffer to the kernel.
   146  			hdr.setTPStatus(tpStatusKernel)
   147  			d.ringOffset = (d.ringOffset + 1) % tpFrameNR
   148  			hdr = (tPacketHdr)(d.ringBuffer[d.ringOffset*tpFrameSize:])
   149  			continue
   150  		}
   151  	}
   152  
   153  	// Copy out the packet from the mmapped frame to a locally owned buffer.
   154  	pkt := make([]byte, hdr.tpSnapLen())
   155  	copy(pkt, hdr.Payload())
   156  	// Release packet to kernel.
   157  	hdr.setTPStatus(tpStatusKernel)
   158  	d.ringOffset = (d.ringOffset + 1) % tpFrameNR
   159  	return pkt, nil
   160  }
   161  
   162  // dispatch reads packets from an mmaped ring buffer and dispatches them to the
   163  // network stack.
   164  func (d *packetMMapDispatcher) dispatch() (bool, *tcpip.Error) {
   165  	pkt, err := d.readMMappedPacket()
   166  	if err != nil {
   167  		return false, err
   168  	}
   169  	var (
   170  		p             tcpip.NetworkProtocolNumber
   171  		remote, local tcpip.LinkAddress
   172  	)
   173  	if d.e.hdrSize > 0 {
   174  		eth := header.Ethernet(pkt)
   175  		p = eth.Type()
   176  		remote = eth.SourceAddress()
   177  		local = eth.DestinationAddress()
   178  	} else {
   179  		// We don't get any indication of what the packet is, so try to guess
   180  		// if it's an IPv4 or IPv6 packet.
   181  		switch header.IPVersion(pkt) {
   182  		case header.IPv4Version:
   183  			p = header.IPv4ProtocolNumber
   184  		case header.IPv6Version:
   185  			p = header.IPv6ProtocolNumber
   186  		default:
   187  			return true, nil
   188  		}
   189  	}
   190  
   191  	pkt = pkt[d.e.hdrSize:]
   192  	d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, buffer.NewVectorisedView(len(pkt), []buffer.View{buffer.View(pkt)}))
   193  	return true, nil
   194  }