github.com/FlowerWrong/netstack@v0.0.0-20191009141956-e5848263af28/tcpip/link/fdbased/mmap.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // +build linux,amd64 linux,arm64 darwin,amd64 16 17 package fdbased 18 19 import ( 20 "encoding/binary" 21 "syscall" 22 23 "github.com/FlowerWrong/netstack/tcpip" 24 "github.com/FlowerWrong/netstack/tcpip/buffer" 25 "github.com/FlowerWrong/netstack/tcpip/header" 26 "github.com/FlowerWrong/netstack/tcpip/link/rawfile" 27 "golang.org/x/sys/unix" 28 ) 29 30 const ( 31 tPacketAlignment = uintptr(16) 32 tpStatusKernel = 0 33 tpStatusUser = 1 34 tpStatusCopy = 2 35 tpStatusLosing = 4 36 ) 37 38 // We overallocate the frame size to accommodate space for the 39 // TPacketHdr+RawSockAddrLinkLayer+MAC header and any padding. 40 // 41 // Memory allocated for the ring buffer: tpBlockSize * tpBlockNR = 2 MiB 42 // 43 // NOTE: 44 // Frames need to be aligned at 16 byte boundaries. 45 // BlockSize needs to be page aligned. 46 // 47 // For details see PACKET_MMAP setting constraints in 48 // https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt 49 const ( 50 tpFrameSize = 65536 + 128 51 tpBlockSize = tpFrameSize * 32 52 tpBlockNR = 1 53 tpFrameNR = (tpBlockSize * tpBlockNR) / tpFrameSize 54 ) 55 56 // tPacketAlign aligns the pointer v at a tPacketAlignment boundary. Direct 57 // translation of the TPACKET_ALIGN macro in <linux/if_packet.h>. 58 func tPacketAlign(v uintptr) uintptr { 59 return (v + tPacketAlignment - 1) & uintptr(^(tPacketAlignment - 1)) 60 } 61 62 // tPacketReq is the tpacket_req structure as described in 63 // https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt 64 type tPacketReq struct { 65 tpBlockSize uint32 66 tpBlockNR uint32 67 tpFrameSize uint32 68 tpFrameNR uint32 69 } 70 71 // tPacketHdr is tpacket_hdr structure as described in <linux/if_packet.h> 72 type tPacketHdr []byte 73 74 const ( 75 tpStatusOffset = 0 76 tpLenOffset = 8 77 tpSnapLenOffset = 12 78 tpMacOffset = 16 79 tpNetOffset = 18 80 tpSecOffset = 20 81 tpUSecOffset = 24 82 ) 83 84 func (t tPacketHdr) tpLen() uint32 { 85 return binary.LittleEndian.Uint32(t[tpLenOffset:]) 86 } 87 88 func (t tPacketHdr) tpSnapLen() uint32 { 89 return binary.LittleEndian.Uint32(t[tpSnapLenOffset:]) 90 } 91 92 func (t tPacketHdr) tpMac() uint16 { 93 return binary.LittleEndian.Uint16(t[tpMacOffset:]) 94 } 95 96 func (t tPacketHdr) tpNet() uint16 { 97 return binary.LittleEndian.Uint16(t[tpNetOffset:]) 98 } 99 100 func (t tPacketHdr) tpSec() uint32 { 101 return binary.LittleEndian.Uint32(t[tpSecOffset:]) 102 } 103 104 func (t tPacketHdr) tpUSec() uint32 { 105 return binary.LittleEndian.Uint32(t[tpUSecOffset:]) 106 } 107 108 func (t tPacketHdr) Payload() []byte { 109 return t[uint32(t.tpMac()) : uint32(t.tpMac())+t.tpSnapLen()] 110 } 111 112 // packetMMapDispatcher uses PACKET_RX_RING's to read/dispatch inbound packets. 113 // See: mmap_amd64_unsafe.go for implementation details. 114 type packetMMapDispatcher struct { 115 // fd is the file descriptor used to send and receive packets. 116 fd int 117 118 // e is the endpoint this dispatcher is attached to. 119 e *endpoint 120 121 // ringBuffer is only used when PacketMMap dispatcher is used and points 122 // to the start of the mmapped PACKET_RX_RING buffer. 123 ringBuffer []byte 124 125 // ringOffset is the current offset into the ring buffer where the next 126 // inbound packet will be placed by the kernel. 127 ringOffset int 128 } 129 130 func (d *packetMMapDispatcher) readMMappedPacket() ([]byte, *tcpip.Error) { 131 hdr := tPacketHdr(d.ringBuffer[d.ringOffset*tpFrameSize:]) 132 for hdr.tpStatus()&tpStatusUser == 0 { 133 event := rawfile.PollEvent{ 134 FD: int32(d.fd), 135 Events: unix.POLLIN | unix.POLLERR, 136 } 137 if _, errno := rawfile.BlockingPoll(&event, 1, nil); errno != 0 { 138 if errno == syscall.EINTR { 139 continue 140 } 141 return nil, rawfile.TranslateErrno(errno) 142 } 143 if hdr.tpStatus()&tpStatusCopy != 0 { 144 // This frame is truncated so skip it after flipping the 145 // buffer to the kernel. 146 hdr.setTPStatus(tpStatusKernel) 147 d.ringOffset = (d.ringOffset + 1) % tpFrameNR 148 hdr = (tPacketHdr)(d.ringBuffer[d.ringOffset*tpFrameSize:]) 149 continue 150 } 151 } 152 153 // Copy out the packet from the mmapped frame to a locally owned buffer. 154 pkt := make([]byte, hdr.tpSnapLen()) 155 copy(pkt, hdr.Payload()) 156 // Release packet to kernel. 157 hdr.setTPStatus(tpStatusKernel) 158 d.ringOffset = (d.ringOffset + 1) % tpFrameNR 159 return pkt, nil 160 } 161 162 // dispatch reads packets from an mmaped ring buffer and dispatches them to the 163 // network stack. 164 func (d *packetMMapDispatcher) dispatch() (bool, *tcpip.Error) { 165 pkt, err := d.readMMappedPacket() 166 if err != nil { 167 return false, err 168 } 169 var ( 170 p tcpip.NetworkProtocolNumber 171 remote, local tcpip.LinkAddress 172 ) 173 if d.e.hdrSize > 0 { 174 eth := header.Ethernet(pkt) 175 p = eth.Type() 176 remote = eth.SourceAddress() 177 local = eth.DestinationAddress() 178 } else { 179 // We don't get any indication of what the packet is, so try to guess 180 // if it's an IPv4 or IPv6 packet. 181 switch header.IPVersion(pkt) { 182 case header.IPv4Version: 183 p = header.IPv4ProtocolNumber 184 case header.IPv6Version: 185 p = header.IPv6ProtocolNumber 186 default: 187 return true, nil 188 } 189 } 190 191 pkt = pkt[d.e.hdrSize:] 192 d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, buffer.NewVectorisedView(len(pkt), []buffer.View{buffer.View(pkt)})) 193 return true, nil 194 }