github.com/noisysockets/netstack@v0.6.0/pkg/tcpip/link/fdbased/mmap.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build (linux && amd64) || (linux && arm64) 16 // +build linux,amd64 linux,arm64 17 18 package fdbased 19 20 import ( 21 "encoding/binary" 22 "fmt" 23 24 "golang.org/x/sys/unix" 25 "github.com/noisysockets/netstack/pkg/buffer" 26 "github.com/noisysockets/netstack/pkg/tcpip" 27 "github.com/noisysockets/netstack/pkg/tcpip/header" 28 "github.com/noisysockets/netstack/pkg/tcpip/link/rawfile" 29 "github.com/noisysockets/netstack/pkg/tcpip/link/stopfd" 30 "github.com/noisysockets/netstack/pkg/tcpip/stack" 31 ) 32 33 const ( 34 tPacketAlignment = uintptr(16) 35 tpStatusKernel = 0 36 tpStatusUser = 1 37 tpStatusCopy = 2 38 tpStatusLosing = 4 39 ) 40 41 // We overallocate the frame size to accommodate space for the 42 // TPacketHdr+RawSockAddrLinkLayer+MAC header and any padding. 43 // 44 // Memory allocated for the ring buffer: tpBlockSize * tpBlockNR = 2 MiB 45 // 46 // NOTE: 47 // 48 // Frames need to be aligned at 16 byte boundaries. 49 // BlockSize needs to be page aligned. 50 // 51 // For details see PACKET_MMAP setting constraints in 52 // https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt 53 const ( 54 tpFrameSize = 65536 + 128 55 tpBlockSize = tpFrameSize * 32 56 tpBlockNR = 1 57 tpFrameNR = (tpBlockSize * tpBlockNR) / tpFrameSize 58 ) 59 60 // tPacketAlign aligns the pointer v at a tPacketAlignment boundary. Direct 61 // translation of the TPACKET_ALIGN macro in <linux/if_packet.h>. 62 func tPacketAlign(v uintptr) uintptr { 63 return (v + tPacketAlignment - 1) & uintptr(^(tPacketAlignment - 1)) 64 } 65 66 // tPacketReq is the tpacket_req structure as described in 67 // https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt 68 type tPacketReq struct { 69 tpBlockSize uint32 70 tpBlockNR uint32 71 tpFrameSize uint32 72 tpFrameNR uint32 73 } 74 75 // tPacketHdr is tpacket_hdr structure as described in <linux/if_packet.h> 76 type tPacketHdr []byte 77 78 const ( 79 tpStatusOffset = 0 80 tpLenOffset = 8 81 tpSnapLenOffset = 12 82 tpMacOffset = 16 83 tpNetOffset = 18 84 tpSecOffset = 20 85 tpUSecOffset = 24 86 ) 87 88 func (t tPacketHdr) tpLen() uint32 { 89 return binary.LittleEndian.Uint32(t[tpLenOffset:]) 90 } 91 92 func (t tPacketHdr) tpSnapLen() uint32 { 93 return binary.LittleEndian.Uint32(t[tpSnapLenOffset:]) 94 } 95 96 func (t tPacketHdr) tpMac() uint16 { 97 return binary.LittleEndian.Uint16(t[tpMacOffset:]) 98 } 99 100 func (t tPacketHdr) tpNet() uint16 { 101 return binary.LittleEndian.Uint16(t[tpNetOffset:]) 102 } 103 104 func (t tPacketHdr) tpSec() uint32 { 105 return binary.LittleEndian.Uint32(t[tpSecOffset:]) 106 } 107 108 func (t tPacketHdr) tpUSec() uint32 { 109 return binary.LittleEndian.Uint32(t[tpUSecOffset:]) 110 } 111 112 func (t tPacketHdr) Payload() []byte { 113 return t[uint32(t.tpMac()) : uint32(t.tpMac())+t.tpSnapLen()] 114 } 115 116 // packetMMapDispatcher uses PACKET_RX_RING's to read/dispatch inbound packets. 117 // See: mmap_amd64_unsafe.go for implementation details. 118 type packetMMapDispatcher struct { 119 stopfd.StopFD 120 // fd is the file descriptor used to send and receive packets. 121 fd int 122 123 // e is the endpoint this dispatcher is attached to. 124 e *endpoint 125 126 // ringBuffer is only used when PacketMMap dispatcher is used and points 127 // to the start of the mmapped PACKET_RX_RING buffer. 128 ringBuffer []byte 129 130 // ringOffset is the current offset into the ring buffer where the next 131 // inbound packet will be placed by the kernel. 132 ringOffset int 133 } 134 135 func (*packetMMapDispatcher) release() {} 136 137 func (d *packetMMapDispatcher) readMMappedPacket() (*buffer.View, bool, tcpip.Error) { 138 hdr := tPacketHdr(d.ringBuffer[d.ringOffset*tpFrameSize:]) 139 for hdr.tpStatus()&tpStatusUser == 0 { 140 stopped, errno := rawfile.BlockingPollUntilStopped(d.EFD, d.fd, unix.POLLIN|unix.POLLERR) 141 if errno != 0 { 142 if errno == unix.EINTR { 143 continue 144 } 145 return nil, stopped, rawfile.TranslateErrno(errno) 146 } 147 if stopped { 148 return nil, true, nil 149 } 150 if hdr.tpStatus()&tpStatusCopy != 0 { 151 // This frame is truncated so skip it after flipping the 152 // buffer to the kernel. 153 hdr.setTPStatus(tpStatusKernel) 154 d.ringOffset = (d.ringOffset + 1) % tpFrameNR 155 hdr = (tPacketHdr)(d.ringBuffer[d.ringOffset*tpFrameSize:]) 156 continue 157 } 158 } 159 160 // Copy out the packet from the mmapped frame to a locally owned buffer. 161 pkt := buffer.NewView(int(hdr.tpSnapLen())) 162 pkt.Write(hdr.Payload()) 163 // Release packet to kernel. 164 hdr.setTPStatus(tpStatusKernel) 165 d.ringOffset = (d.ringOffset + 1) % tpFrameNR 166 return pkt, false, nil 167 } 168 169 // dispatch reads packets from an mmaped ring buffer and dispatches them to the 170 // network stack. 171 func (d *packetMMapDispatcher) dispatch() (bool, tcpip.Error) { 172 pkt, stopped, err := d.readMMappedPacket() 173 if err != nil || stopped { 174 return false, err 175 } 176 var p tcpip.NetworkProtocolNumber 177 if d.e.hdrSize > 0 { 178 p = header.Ethernet(pkt.AsSlice()).Type() 179 } else { 180 // We don't get any indication of what the packet is, so try to guess 181 // if it's an IPv4 or IPv6 packet. 182 switch header.IPVersion(pkt.AsSlice()) { 183 case header.IPv4Version: 184 p = header.IPv4ProtocolNumber 185 case header.IPv6Version: 186 p = header.IPv6ProtocolNumber 187 default: 188 return true, nil 189 } 190 } 191 192 pbuf := stack.NewPacketBuffer(stack.PacketBufferOptions{ 193 Payload: buffer.MakeWithView(pkt), 194 }) 195 defer pbuf.DecRef() 196 if d.e.hdrSize > 0 { 197 if _, ok := pbuf.LinkHeader().Consume(d.e.hdrSize); !ok { 198 panic(fmt.Sprintf("LinkHeader().Consume(%d) must succeed", d.e.hdrSize)) 199 } 200 } 201 d.e.mu.RLock() 202 dsp := d.e.dispatcher 203 d.e.mu.RUnlock() 204 dsp.DeliverNetworkPacket(p, pbuf) 205 return true, nil 206 }