github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/link/fdbased/mmap.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // +build linux,amd64 linux,arm64 16 17 package fdbased 18 19 import ( 20 "encoding/binary" 21 "fmt" 22 23 "golang.org/x/sys/unix" 24 "github.com/SagerNet/gvisor/pkg/tcpip" 25 "github.com/SagerNet/gvisor/pkg/tcpip/buffer" 26 "github.com/SagerNet/gvisor/pkg/tcpip/header" 27 "github.com/SagerNet/gvisor/pkg/tcpip/link/rawfile" 28 "github.com/SagerNet/gvisor/pkg/tcpip/stack" 29 ) 30 31 const ( 32 tPacketAlignment = uintptr(16) 33 tpStatusKernel = 0 34 tpStatusUser = 1 35 tpStatusCopy = 2 36 tpStatusLosing = 4 37 ) 38 39 // We overallocate the frame size to accommodate space for the 40 // TPacketHdr+RawSockAddrLinkLayer+MAC header and any padding. 41 // 42 // Memory allocated for the ring buffer: tpBlockSize * tpBlockNR = 2 MiB 43 // 44 // NOTE: 45 // Frames need to be aligned at 16 byte boundaries. 46 // BlockSize needs to be page aligned. 47 // 48 // For details see PACKET_MMAP setting constraints in 49 // https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt 50 const ( 51 tpFrameSize = 65536 + 128 52 tpBlockSize = tpFrameSize * 32 53 tpBlockNR = 1 54 tpFrameNR = (tpBlockSize * tpBlockNR) / tpFrameSize 55 ) 56 57 // tPacketAlign aligns the pointer v at a tPacketAlignment boundary. Direct 58 // translation of the TPACKET_ALIGN macro in <linux/if_packet.h>. 59 func tPacketAlign(v uintptr) uintptr { 60 return (v + tPacketAlignment - 1) & uintptr(^(tPacketAlignment - 1)) 61 } 62 63 // tPacketReq is the tpacket_req structure as described in 64 // https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt 65 type tPacketReq struct { 66 tpBlockSize uint32 67 tpBlockNR uint32 68 tpFrameSize uint32 69 tpFrameNR uint32 70 } 71 72 // tPacketHdr is tpacket_hdr structure as described in <linux/if_packet.h> 73 type tPacketHdr []byte 74 75 const ( 76 tpStatusOffset = 0 77 tpLenOffset = 8 78 tpSnapLenOffset = 12 79 tpMacOffset = 16 80 tpNetOffset = 18 81 tpSecOffset = 20 82 tpUSecOffset = 24 83 ) 84 85 func (t tPacketHdr) tpLen() uint32 { 86 return binary.LittleEndian.Uint32(t[tpLenOffset:]) 87 } 88 89 func (t tPacketHdr) tpSnapLen() uint32 { 90 return binary.LittleEndian.Uint32(t[tpSnapLenOffset:]) 91 } 92 93 func (t tPacketHdr) tpMac() uint16 { 94 return binary.LittleEndian.Uint16(t[tpMacOffset:]) 95 } 96 97 func (t tPacketHdr) tpNet() uint16 { 98 return binary.LittleEndian.Uint16(t[tpNetOffset:]) 99 } 100 101 func (t tPacketHdr) tpSec() uint32 { 102 return binary.LittleEndian.Uint32(t[tpSecOffset:]) 103 } 104 105 func (t tPacketHdr) tpUSec() uint32 { 106 return binary.LittleEndian.Uint32(t[tpUSecOffset:]) 107 } 108 109 func (t tPacketHdr) Payload() []byte { 110 return t[uint32(t.tpMac()) : uint32(t.tpMac())+t.tpSnapLen()] 111 } 112 113 // packetMMapDispatcher uses PACKET_RX_RING's to read/dispatch inbound packets. 114 // See: mmap_amd64_unsafe.go for implementation details. 115 type packetMMapDispatcher struct { 116 // fd is the file descriptor used to send and receive packets. 117 fd int 118 119 // e is the endpoint this dispatcher is attached to. 120 e *endpoint 121 122 // ringBuffer is only used when PacketMMap dispatcher is used and points 123 // to the start of the mmapped PACKET_RX_RING buffer. 124 ringBuffer []byte 125 126 // ringOffset is the current offset into the ring buffer where the next 127 // inbound packet will be placed by the kernel. 128 ringOffset int 129 } 130 131 func (d *packetMMapDispatcher) readMMappedPacket() ([]byte, tcpip.Error) { 132 hdr := tPacketHdr(d.ringBuffer[d.ringOffset*tpFrameSize:]) 133 for hdr.tpStatus()&tpStatusUser == 0 { 134 event := rawfile.PollEvent{ 135 FD: int32(d.fd), 136 Events: unix.POLLIN | unix.POLLERR, 137 } 138 if _, errno := rawfile.BlockingPoll(&event, 1, nil); errno != 0 { 139 if errno == unix.EINTR { 140 continue 141 } 142 return nil, rawfile.TranslateErrno(errno) 143 } 144 if hdr.tpStatus()&tpStatusCopy != 0 { 145 // This frame is truncated so skip it after flipping the 146 // buffer to the kernel. 147 hdr.setTPStatus(tpStatusKernel) 148 d.ringOffset = (d.ringOffset + 1) % tpFrameNR 149 hdr = (tPacketHdr)(d.ringBuffer[d.ringOffset*tpFrameSize:]) 150 continue 151 } 152 } 153 154 // Copy out the packet from the mmapped frame to a locally owned buffer. 155 pkt := make([]byte, hdr.tpSnapLen()) 156 copy(pkt, hdr.Payload()) 157 // Release packet to kernel. 158 hdr.setTPStatus(tpStatusKernel) 159 d.ringOffset = (d.ringOffset + 1) % tpFrameNR 160 return pkt, nil 161 } 162 163 // dispatch reads packets from an mmaped ring buffer and dispatches them to the 164 // network stack. 165 func (d *packetMMapDispatcher) dispatch() (bool, tcpip.Error) { 166 pkt, err := d.readMMappedPacket() 167 if err != nil { 168 return false, err 169 } 170 var ( 171 p tcpip.NetworkProtocolNumber 172 remote, local tcpip.LinkAddress 173 ) 174 if d.e.hdrSize > 0 { 175 eth := header.Ethernet(pkt) 176 p = eth.Type() 177 remote = eth.SourceAddress() 178 local = eth.DestinationAddress() 179 } else { 180 // We don't get any indication of what the packet is, so try to guess 181 // if it's an IPv4 or IPv6 packet. 182 switch header.IPVersion(pkt) { 183 case header.IPv4Version: 184 p = header.IPv4ProtocolNumber 185 case header.IPv6Version: 186 p = header.IPv6ProtocolNumber 187 default: 188 return true, nil 189 } 190 } 191 192 pbuf := stack.NewPacketBuffer(stack.PacketBufferOptions{ 193 Data: buffer.View(pkt).ToVectorisedView(), 194 }) 195 if d.e.hdrSize > 0 { 196 if _, ok := pbuf.LinkHeader().Consume(d.e.hdrSize); !ok { 197 panic(fmt.Sprintf("LinkHeader().Consume(%d) must succeed", d.e.hdrSize)) 198 } 199 } 200 d.e.dispatcher.DeliverNetworkPacket(remote, local, p, pbuf) 201 return true, nil 202 }