gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/tcpip/link/fdbased/mmap.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build (linux && amd64) || (linux && arm64) 16 // +build linux,amd64 linux,arm64 17 18 package fdbased 19 20 import ( 21 "encoding/binary" 22 "fmt" 23 24 "golang.org/x/sys/unix" 25 "gvisor.dev/gvisor/pkg/buffer" 26 "gvisor.dev/gvisor/pkg/tcpip" 27 "gvisor.dev/gvisor/pkg/tcpip/header" 28 "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" 29 "gvisor.dev/gvisor/pkg/tcpip/link/stopfd" 30 "gvisor.dev/gvisor/pkg/tcpip/stack" 31 ) 32 33 const ( 34 tPacketAlignment = uintptr(16) 35 tpStatusKernel = 0 36 tpStatusUser = 1 37 tpStatusCopy = 2 38 tpStatusLosing = 4 39 ) 40 41 // We overallocate the frame size to accommodate space for the 42 // TPacketHdr+RawSockAddrLinkLayer+MAC header and any padding. 43 // 44 // Memory allocated for the ring buffer: tpBlockSize * tpBlockNR = 2 MiB 45 // 46 // NOTE: 47 // 48 // Frames need to be aligned at 16 byte boundaries. 49 // BlockSize needs to be page aligned. 50 // 51 // For details see PACKET_MMAP setting constraints in 52 // https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt 53 const ( 54 tpFrameSize = 65536 + 128 55 tpBlockSize = tpFrameSize * 32 56 tpBlockNR = 1 57 tpFrameNR = (tpBlockSize * tpBlockNR) / tpFrameSize 58 ) 59 60 // tPacketAlign aligns the pointer v at a tPacketAlignment boundary. Direct 61 // translation of the TPACKET_ALIGN macro in <linux/if_packet.h>. 62 func tPacketAlign(v uintptr) uintptr { 63 return (v + tPacketAlignment - 1) & uintptr(^(tPacketAlignment - 1)) 64 } 65 66 // tPacketReq is the tpacket_req structure as described in 67 // https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt 68 type tPacketReq struct { 69 tpBlockSize uint32 70 tpBlockNR uint32 71 tpFrameSize uint32 72 tpFrameNR uint32 73 } 74 75 // tPacketHdr is tpacket_hdr structure as described in <linux/if_packet.h> 76 type tPacketHdr []byte 77 78 const ( 79 tpStatusOffset = 0 80 tpLenOffset = 8 81 tpSnapLenOffset = 12 82 tpMacOffset = 16 83 tpNetOffset = 18 84 tpSecOffset = 20 85 tpUSecOffset = 24 86 ) 87 88 func (t tPacketHdr) tpLen() uint32 { 89 return binary.LittleEndian.Uint32(t[tpLenOffset:]) 90 } 91 92 func (t tPacketHdr) tpSnapLen() uint32 { 93 return binary.LittleEndian.Uint32(t[tpSnapLenOffset:]) 94 } 95 96 func (t tPacketHdr) tpMac() uint16 { 97 return binary.LittleEndian.Uint16(t[tpMacOffset:]) 98 } 99 100 func (t tPacketHdr) tpNet() uint16 { 101 return binary.LittleEndian.Uint16(t[tpNetOffset:]) 102 } 103 104 func (t tPacketHdr) tpSec() uint32 { 105 return binary.LittleEndian.Uint32(t[tpSecOffset:]) 106 } 107 108 func (t tPacketHdr) tpUSec() uint32 { 109 return binary.LittleEndian.Uint32(t[tpUSecOffset:]) 110 } 111 112 func (t tPacketHdr) Payload() []byte { 113 return t[uint32(t.tpMac()) : uint32(t.tpMac())+t.tpSnapLen()] 114 } 115 116 // packetMMapDispatcher uses PACKET_RX_RING's to read/dispatch inbound packets. 117 // See: mmap_amd64_unsafe.go for implementation details. 118 type packetMMapDispatcher struct { 119 stopfd.StopFD 120 // fd is the file descriptor used to send and receive packets. 121 fd int 122 123 // e is the endpoint this dispatcher is attached to. 124 e *endpoint 125 126 // ringBuffer is only used when PacketMMap dispatcher is used and points 127 // to the start of the mmapped PACKET_RX_RING buffer. 128 ringBuffer []byte 129 130 // ringOffset is the current offset into the ring buffer where the next 131 // inbound packet will be placed by the kernel. 132 ringOffset int 133 134 // mgr is the processor goroutine manager. 135 mgr *processorManager 136 } 137 138 func (d *packetMMapDispatcher) release() { 139 d.mgr.close() 140 } 141 142 func (d *packetMMapDispatcher) readMMappedPackets() (stack.PacketBufferList, bool, tcpip.Error) { 143 var pkts stack.PacketBufferList 144 hdr := tPacketHdr(d.ringBuffer[d.ringOffset*tpFrameSize:]) 145 for hdr.tpStatus()&tpStatusUser == 0 { 146 stopped, errno := rawfile.BlockingPollUntilStopped(d.EFD, d.fd, unix.POLLIN|unix.POLLERR) 147 if errno != 0 { 148 if errno == unix.EINTR { 149 continue 150 } 151 return pkts, stopped, rawfile.TranslateErrno(errno) 152 } 153 if stopped { 154 return pkts, true, nil 155 } 156 if hdr.tpStatus()&tpStatusCopy != 0 { 157 // This frame is truncated so skip it after flipping the 158 // buffer to the kernel. 159 hdr.setTPStatus(tpStatusKernel) 160 d.ringOffset = (d.ringOffset + 1) % tpFrameNR 161 hdr = (tPacketHdr)(d.ringBuffer[d.ringOffset*tpFrameSize:]) 162 continue 163 } 164 } 165 166 for hdr.tpStatus()&tpStatusUser == 1 { 167 // Copy out the packet from the mmapped frame to a locally owned buffer. 168 pkts.PushBack(stack.NewPacketBuffer(stack.PacketBufferOptions{ 169 Payload: buffer.MakeWithView(buffer.NewViewWithData(hdr.Payload())), 170 })) 171 // Release packet to kernel. 172 hdr.setTPStatus(tpStatusKernel) 173 d.ringOffset = (d.ringOffset + 1) % tpFrameNR 174 hdr = tPacketHdr(d.ringBuffer[d.ringOffset*tpFrameSize:]) 175 } 176 return pkts, false, nil 177 } 178 179 // dispatch reads packets from an mmaped ring buffer and dispatches them to the 180 // network stack. 181 func (d *packetMMapDispatcher) dispatch() (bool, tcpip.Error) { 182 pkts, stopped, err := d.readMMappedPackets() 183 defer pkts.Reset() 184 if err != nil || stopped { 185 return false, err 186 } 187 for _, pkt := range pkts.AsSlice() { 188 if d.e.hdrSize > 0 { 189 hdr, ok := pkt.LinkHeader().Consume(d.e.hdrSize) 190 if !ok { 191 panic(fmt.Sprintf("LinkHeader().Consume(%d) must succeed", d.e.hdrSize)) 192 } 193 pkt.NetworkProtocolNumber = header.Ethernet(hdr).Type() 194 } 195 d.mgr.queuePacket(pkt, d.e.hdrSize > 0) 196 } 197 if pkts.Len() > 0 { 198 d.mgr.wakeReady() 199 } 200 return true, nil 201 }