github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/link/fdbased/packet_dispatchers.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // +build linux 16 17 package fdbased 18 19 import ( 20 "golang.org/x/sys/unix" 21 "github.com/SagerNet/gvisor/pkg/tcpip" 22 "github.com/SagerNet/gvisor/pkg/tcpip/buffer" 23 "github.com/SagerNet/gvisor/pkg/tcpip/header" 24 "github.com/SagerNet/gvisor/pkg/tcpip/link/rawfile" 25 "github.com/SagerNet/gvisor/pkg/tcpip/stack" 26 ) 27 28 // BufConfig defines the shape of the vectorised view used to read packets from the NIC. 29 var BufConfig = []int{128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768} 30 31 type iovecBuffer struct { 32 // views are the actual buffers that hold the packet contents. 33 views []buffer.View 34 35 // iovecs are initialized with base pointers/len of the corresponding 36 // entries in the views defined above, except when GSO is enabled 37 // (skipsVnetHdr) then the first iovec points to a buffer for the vnet header 38 // which is stripped before the views are passed up the stack for further 39 // processing. 40 iovecs []unix.Iovec 41 42 // sizes is an array of buffer sizes for the underlying views. sizes is 43 // immutable. 44 sizes []int 45 46 // skipsVnetHdr is true if virtioNetHdr is to skipped. 47 skipsVnetHdr bool 48 } 49 50 func newIovecBuffer(sizes []int, skipsVnetHdr bool) *iovecBuffer { 51 b := &iovecBuffer{ 52 views: make([]buffer.View, len(sizes)), 53 sizes: sizes, 54 skipsVnetHdr: skipsVnetHdr, 55 } 56 niov := len(b.views) 57 if b.skipsVnetHdr { 58 niov++ 59 } 60 b.iovecs = make([]unix.Iovec, niov) 61 return b 62 } 63 64 func (b *iovecBuffer) nextIovecs() []unix.Iovec { 65 vnetHdrOff := 0 66 if b.skipsVnetHdr { 67 var vnetHdr [virtioNetHdrSize]byte 68 // The kernel adds virtioNetHdr before each packet, but 69 // we don't use it, so so we allocate a buffer for it, 70 // add it in iovecs but don't add it in a view. 71 b.iovecs[0] = unix.Iovec{Base: &vnetHdr[0]} 72 b.iovecs[0].SetLen(virtioNetHdrSize) 73 vnetHdrOff++ 74 } 75 for i := range b.views { 76 if b.views[i] != nil { 77 break 78 } 79 v := buffer.NewView(b.sizes[i]) 80 b.views[i] = v 81 b.iovecs[i+vnetHdrOff] = unix.Iovec{Base: &v[0]} 82 b.iovecs[i+vnetHdrOff].SetLen(len(v)) 83 } 84 return b.iovecs 85 } 86 87 func (b *iovecBuffer) pullViews(n int) buffer.VectorisedView { 88 var views []buffer.View 89 c := 0 90 if b.skipsVnetHdr { 91 c += virtioNetHdrSize 92 if c >= n { 93 // Nothing in the packet. 94 return buffer.NewVectorisedView(0, nil) 95 } 96 } 97 for i, v := range b.views { 98 c += len(v) 99 if c >= n { 100 b.views[i].CapLength(len(v) - (c - n)) 101 views = append([]buffer.View(nil), b.views[:i+1]...) 102 break 103 } 104 } 105 // Remove the first len(views) used views from the state. 106 for i := range views { 107 b.views[i] = nil 108 } 109 if b.skipsVnetHdr { 110 // Exclude the size of the vnet header. 111 n -= virtioNetHdrSize 112 } 113 return buffer.NewVectorisedView(n, views) 114 } 115 116 // readVDispatcher uses readv() system call to read inbound packets and 117 // dispatches them. 118 type readVDispatcher struct { 119 // fd is the file descriptor used to send and receive packets. 120 fd int 121 122 // e is the endpoint this dispatcher is attached to. 123 e *endpoint 124 125 // buf is the iovec buffer that contains the packet contents. 126 buf *iovecBuffer 127 } 128 129 func newReadVDispatcher(fd int, e *endpoint) (linkDispatcher, error) { 130 d := &readVDispatcher{fd: fd, e: e} 131 skipsVnetHdr := d.e.gsoKind == stack.HWGSOSupported 132 d.buf = newIovecBuffer(BufConfig, skipsVnetHdr) 133 return d, nil 134 } 135 136 // dispatch reads one packet from the file descriptor and dispatches it. 137 func (d *readVDispatcher) dispatch() (bool, tcpip.Error) { 138 n, err := rawfile.BlockingReadv(d.fd, d.buf.nextIovecs()) 139 if n == 0 || err != nil { 140 return false, err 141 } 142 143 pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ 144 Data: d.buf.pullViews(n), 145 }) 146 147 var ( 148 p tcpip.NetworkProtocolNumber 149 remote, local tcpip.LinkAddress 150 ) 151 if d.e.hdrSize > 0 { 152 hdr, ok := pkt.LinkHeader().Consume(d.e.hdrSize) 153 if !ok { 154 return false, nil 155 } 156 eth := header.Ethernet(hdr) 157 p = eth.Type() 158 remote = eth.SourceAddress() 159 local = eth.DestinationAddress() 160 } else { 161 // We don't get any indication of what the packet is, so try to guess 162 // if it's an IPv4 or IPv6 packet. 163 // IP version information is at the first octet, so pulling up 1 byte. 164 h, ok := pkt.Data().PullUp(1) 165 if !ok { 166 return true, nil 167 } 168 switch header.IPVersion(h) { 169 case header.IPv4Version: 170 p = header.IPv4ProtocolNumber 171 case header.IPv6Version: 172 p = header.IPv6ProtocolNumber 173 default: 174 return true, nil 175 } 176 } 177 178 d.e.dispatcher.DeliverNetworkPacket(remote, local, p, pkt) 179 180 return true, nil 181 } 182 183 // recvMMsgDispatcher uses the recvmmsg system call to read inbound packets and 184 // dispatches them. 185 type recvMMsgDispatcher struct { 186 // fd is the file descriptor used to send and receive packets. 187 fd int 188 189 // e is the endpoint this dispatcher is attached to. 190 e *endpoint 191 192 // bufs is an array of iovec buffers that contain packet contents. 193 bufs []*iovecBuffer 194 195 // msgHdrs is an array of MMsgHdr objects where each MMsghdr is used to 196 // reference an array of iovecs in the iovecs field defined above. This 197 // array is passed as the parameter to recvmmsg call to retrieve 198 // potentially more than 1 packet per unix. 199 msgHdrs []rawfile.MMsgHdr 200 } 201 202 const ( 203 // MaxMsgsPerRecv is the maximum number of packets we want to retrieve 204 // in a single RecvMMsg call. 205 MaxMsgsPerRecv = 8 206 ) 207 208 func newRecvMMsgDispatcher(fd int, e *endpoint) (linkDispatcher, error) { 209 d := &recvMMsgDispatcher{ 210 fd: fd, 211 e: e, 212 bufs: make([]*iovecBuffer, MaxMsgsPerRecv), 213 msgHdrs: make([]rawfile.MMsgHdr, MaxMsgsPerRecv), 214 } 215 skipsVnetHdr := d.e.gsoKind == stack.HWGSOSupported 216 for i := range d.bufs { 217 d.bufs[i] = newIovecBuffer(BufConfig, skipsVnetHdr) 218 } 219 return d, nil 220 } 221 222 // recvMMsgDispatch reads more than one packet at a time from the file 223 // descriptor and dispatches it. 224 func (d *recvMMsgDispatcher) dispatch() (bool, tcpip.Error) { 225 // Fill message headers. 226 for k := range d.msgHdrs { 227 if d.msgHdrs[k].Msg.Iovlen > 0 { 228 break 229 } 230 iovecs := d.bufs[k].nextIovecs() 231 iovLen := len(iovecs) 232 d.msgHdrs[k].Len = 0 233 d.msgHdrs[k].Msg.Iov = &iovecs[0] 234 d.msgHdrs[k].Msg.SetIovlen(iovLen) 235 } 236 237 nMsgs, err := rawfile.BlockingRecvMMsg(d.fd, d.msgHdrs) 238 if err != nil { 239 return false, err 240 } 241 // Process each of received packets. 242 for k := 0; k < nMsgs; k++ { 243 n := int(d.msgHdrs[k].Len) 244 245 pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ 246 Data: d.bufs[k].pullViews(n), 247 }) 248 249 // Mark that this iovec has been processed. 250 d.msgHdrs[k].Msg.Iovlen = 0 251 252 var ( 253 p tcpip.NetworkProtocolNumber 254 remote, local tcpip.LinkAddress 255 ) 256 if d.e.hdrSize > 0 { 257 hdr, ok := pkt.LinkHeader().Consume(d.e.hdrSize) 258 if !ok { 259 return false, nil 260 } 261 eth := header.Ethernet(hdr) 262 p = eth.Type() 263 remote = eth.SourceAddress() 264 local = eth.DestinationAddress() 265 } else { 266 // We don't get any indication of what the packet is, so try to guess 267 // if it's an IPv4 or IPv6 packet. 268 // IP version information is at the first octet, so pulling up 1 byte. 269 h, ok := pkt.Data().PullUp(1) 270 if !ok { 271 // Skip this packet. 272 continue 273 } 274 switch header.IPVersion(h) { 275 case header.IPv4Version: 276 p = header.IPv4ProtocolNumber 277 case header.IPv6Version: 278 p = header.IPv6ProtocolNumber 279 default: 280 // Skip this packet. 281 continue 282 } 283 } 284 285 d.e.dispatcher.DeliverNetworkPacket(remote, local, p, pkt) 286 } 287 288 return true, nil 289 }