github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/link/fdbased/mmap.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // +build linux,amd64 linux,arm64
    16  
    17  package fdbased
    18  
    19  import (
    20  	"encoding/binary"
    21  	"fmt"
    22  
    23  	"golang.org/x/sys/unix"
    24  	"github.com/SagerNet/gvisor/pkg/tcpip"
    25  	"github.com/SagerNet/gvisor/pkg/tcpip/buffer"
    26  	"github.com/SagerNet/gvisor/pkg/tcpip/header"
    27  	"github.com/SagerNet/gvisor/pkg/tcpip/link/rawfile"
    28  	"github.com/SagerNet/gvisor/pkg/tcpip/stack"
    29  )
    30  
    31  const (
    32  	tPacketAlignment = uintptr(16)
    33  	tpStatusKernel   = 0
    34  	tpStatusUser     = 1
    35  	tpStatusCopy     = 2
    36  	tpStatusLosing   = 4
    37  )
    38  
    39  // We overallocate the frame size to accommodate space for the
    40  // TPacketHdr+RawSockAddrLinkLayer+MAC header and any padding.
    41  //
    42  // Memory allocated for the ring buffer: tpBlockSize * tpBlockNR = 2 MiB
    43  //
    44  // NOTE:
    45  //   Frames need to be aligned at 16 byte boundaries.
    46  //   BlockSize needs to be page aligned.
    47  //
    48  //   For details see PACKET_MMAP setting constraints in
    49  //   https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt
    50  const (
    51  	tpFrameSize = 65536 + 128
    52  	tpBlockSize = tpFrameSize * 32
    53  	tpBlockNR   = 1
    54  	tpFrameNR   = (tpBlockSize * tpBlockNR) / tpFrameSize
    55  )
    56  
    57  // tPacketAlign aligns the pointer v at a tPacketAlignment boundary. Direct
    58  // translation of the TPACKET_ALIGN macro in <linux/if_packet.h>.
    59  func tPacketAlign(v uintptr) uintptr {
    60  	return (v + tPacketAlignment - 1) & uintptr(^(tPacketAlignment - 1))
    61  }
    62  
    63  // tPacketReq is the tpacket_req structure as described in
    64  // https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt
    65  type tPacketReq struct {
    66  	tpBlockSize uint32
    67  	tpBlockNR   uint32
    68  	tpFrameSize uint32
    69  	tpFrameNR   uint32
    70  }
    71  
    72  // tPacketHdr is tpacket_hdr structure as described in <linux/if_packet.h>
    73  type tPacketHdr []byte
    74  
    75  const (
    76  	tpStatusOffset  = 0
    77  	tpLenOffset     = 8
    78  	tpSnapLenOffset = 12
    79  	tpMacOffset     = 16
    80  	tpNetOffset     = 18
    81  	tpSecOffset     = 20
    82  	tpUSecOffset    = 24
    83  )
    84  
    85  func (t tPacketHdr) tpLen() uint32 {
    86  	return binary.LittleEndian.Uint32(t[tpLenOffset:])
    87  }
    88  
    89  func (t tPacketHdr) tpSnapLen() uint32 {
    90  	return binary.LittleEndian.Uint32(t[tpSnapLenOffset:])
    91  }
    92  
    93  func (t tPacketHdr) tpMac() uint16 {
    94  	return binary.LittleEndian.Uint16(t[tpMacOffset:])
    95  }
    96  
    97  func (t tPacketHdr) tpNet() uint16 {
    98  	return binary.LittleEndian.Uint16(t[tpNetOffset:])
    99  }
   100  
   101  func (t tPacketHdr) tpSec() uint32 {
   102  	return binary.LittleEndian.Uint32(t[tpSecOffset:])
   103  }
   104  
   105  func (t tPacketHdr) tpUSec() uint32 {
   106  	return binary.LittleEndian.Uint32(t[tpUSecOffset:])
   107  }
   108  
   109  func (t tPacketHdr) Payload() []byte {
   110  	return t[uint32(t.tpMac()) : uint32(t.tpMac())+t.tpSnapLen()]
   111  }
   112  
   113  // packetMMapDispatcher uses PACKET_RX_RING's to read/dispatch inbound packets.
   114  // See: mmap_amd64_unsafe.go for implementation details.
   115  type packetMMapDispatcher struct {
   116  	// fd is the file descriptor used to send and receive packets.
   117  	fd int
   118  
   119  	// e is the endpoint this dispatcher is attached to.
   120  	e *endpoint
   121  
   122  	// ringBuffer is only used when PacketMMap dispatcher is used and points
   123  	// to the start of the mmapped PACKET_RX_RING buffer.
   124  	ringBuffer []byte
   125  
   126  	// ringOffset is the current offset into the ring buffer where the next
   127  	// inbound packet will be placed by the kernel.
   128  	ringOffset int
   129  }
   130  
   131  func (d *packetMMapDispatcher) readMMappedPacket() ([]byte, tcpip.Error) {
   132  	hdr := tPacketHdr(d.ringBuffer[d.ringOffset*tpFrameSize:])
   133  	for hdr.tpStatus()&tpStatusUser == 0 {
   134  		event := rawfile.PollEvent{
   135  			FD:     int32(d.fd),
   136  			Events: unix.POLLIN | unix.POLLERR,
   137  		}
   138  		if _, errno := rawfile.BlockingPoll(&event, 1, nil); errno != 0 {
   139  			if errno == unix.EINTR {
   140  				continue
   141  			}
   142  			return nil, rawfile.TranslateErrno(errno)
   143  		}
   144  		if hdr.tpStatus()&tpStatusCopy != 0 {
   145  			// This frame is truncated so skip it after flipping the
   146  			// buffer to the kernel.
   147  			hdr.setTPStatus(tpStatusKernel)
   148  			d.ringOffset = (d.ringOffset + 1) % tpFrameNR
   149  			hdr = (tPacketHdr)(d.ringBuffer[d.ringOffset*tpFrameSize:])
   150  			continue
   151  		}
   152  	}
   153  
   154  	// Copy out the packet from the mmapped frame to a locally owned buffer.
   155  	pkt := make([]byte, hdr.tpSnapLen())
   156  	copy(pkt, hdr.Payload())
   157  	// Release packet to kernel.
   158  	hdr.setTPStatus(tpStatusKernel)
   159  	d.ringOffset = (d.ringOffset + 1) % tpFrameNR
   160  	return pkt, nil
   161  }
   162  
   163  // dispatch reads packets from an mmaped ring buffer and dispatches them to the
   164  // network stack.
   165  func (d *packetMMapDispatcher) dispatch() (bool, tcpip.Error) {
   166  	pkt, err := d.readMMappedPacket()
   167  	if err != nil {
   168  		return false, err
   169  	}
   170  	var (
   171  		p             tcpip.NetworkProtocolNumber
   172  		remote, local tcpip.LinkAddress
   173  	)
   174  	if d.e.hdrSize > 0 {
   175  		eth := header.Ethernet(pkt)
   176  		p = eth.Type()
   177  		remote = eth.SourceAddress()
   178  		local = eth.DestinationAddress()
   179  	} else {
   180  		// We don't get any indication of what the packet is, so try to guess
   181  		// if it's an IPv4 or IPv6 packet.
   182  		switch header.IPVersion(pkt) {
   183  		case header.IPv4Version:
   184  			p = header.IPv4ProtocolNumber
   185  		case header.IPv6Version:
   186  			p = header.IPv6ProtocolNumber
   187  		default:
   188  			return true, nil
   189  		}
   190  	}
   191  
   192  	pbuf := stack.NewPacketBuffer(stack.PacketBufferOptions{
   193  		Data: buffer.View(pkt).ToVectorisedView(),
   194  	})
   195  	if d.e.hdrSize > 0 {
   196  		if _, ok := pbuf.LinkHeader().Consume(d.e.hdrSize); !ok {
   197  			panic(fmt.Sprintf("LinkHeader().Consume(%d) must succeed", d.e.hdrSize))
   198  		}
   199  	}
   200  	d.e.dispatcher.DeliverNetworkPacket(remote, local, p, pbuf)
   201  	return true, nil
   202  }