github.com/google/netstack@v0.0.0-20191123085552-55fcc16cd0eb/tcpip/link/fdbased/packet_dispatchers.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // +build linux 16 17 package fdbased 18 19 import ( 20 "syscall" 21 22 "github.com/google/netstack/tcpip" 23 "github.com/google/netstack/tcpip/buffer" 24 "github.com/google/netstack/tcpip/header" 25 "github.com/google/netstack/tcpip/link/rawfile" 26 "github.com/google/netstack/tcpip/stack" 27 ) 28 29 // BufConfig defines the shape of the vectorised view used to read packets from the NIC. 30 var BufConfig = []int{128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768} 31 32 // readVDispatcher uses readv() system call to read inbound packets and 33 // dispatches them. 34 type readVDispatcher struct { 35 // fd is the file descriptor used to send and receive packets. 36 fd int 37 38 // e is the endpoint this dispatcher is attached to. 39 e *endpoint 40 41 // views are the actual buffers that hold the packet contents. 42 views []buffer.View 43 44 // iovecs are initialized with base pointers/len of the corresponding 45 // entries in the views defined above, except when GSO is enabled then 46 // the first iovec points to a buffer for the vnet header which is 47 // stripped before the views are passed up the stack for further 48 // processing. 49 iovecs []syscall.Iovec 50 } 51 52 func newReadVDispatcher(fd int, e *endpoint) (linkDispatcher, error) { 53 d := &readVDispatcher{fd: fd, e: e} 54 d.views = make([]buffer.View, len(BufConfig)) 55 iovLen := len(BufConfig) 56 if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 { 57 iovLen++ 58 } 59 d.iovecs = make([]syscall.Iovec, iovLen) 60 return d, nil 61 } 62 63 func (d *readVDispatcher) allocateViews(bufConfig []int) { 64 var vnetHdr [virtioNetHdrSize]byte 65 vnetHdrOff := 0 66 if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 { 67 // The kernel adds virtioNetHdr before each packet, but 68 // we don't use it, so so we allocate a buffer for it, 69 // add it in iovecs but don't add it in a view. 70 d.iovecs[0] = syscall.Iovec{ 71 Base: &vnetHdr[0], 72 Len: uint64(virtioNetHdrSize), 73 } 74 vnetHdrOff++ 75 } 76 for i := 0; i < len(bufConfig); i++ { 77 if d.views[i] != nil { 78 break 79 } 80 b := buffer.NewView(bufConfig[i]) 81 d.views[i] = b 82 d.iovecs[i+vnetHdrOff] = syscall.Iovec{ 83 Base: &b[0], 84 Len: uint64(len(b)), 85 } 86 } 87 } 88 89 func (d *readVDispatcher) capViews(n int, buffers []int) int { 90 c := 0 91 for i, s := range buffers { 92 c += s 93 if c >= n { 94 d.views[i].CapLength(s - (c - n)) 95 return i + 1 96 } 97 } 98 return len(buffers) 99 } 100 101 // dispatch reads one packet from the file descriptor and dispatches it. 102 func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) { 103 d.allocateViews(BufConfig) 104 105 n, err := rawfile.BlockingReadv(d.fd, d.iovecs) 106 if err != nil { 107 return false, err 108 } 109 if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 { 110 // Skip virtioNetHdr which is added before each packet, it 111 // isn't used and it isn't in a view. 112 n -= virtioNetHdrSize 113 } 114 if n <= d.e.hdrSize { 115 return false, nil 116 } 117 118 var ( 119 p tcpip.NetworkProtocolNumber 120 remote, local tcpip.LinkAddress 121 eth header.Ethernet 122 ) 123 if d.e.hdrSize > 0 { 124 eth = header.Ethernet(d.views[0][:header.EthernetMinimumSize]) 125 p = eth.Type() 126 remote = eth.SourceAddress() 127 local = eth.DestinationAddress() 128 } else { 129 // We don't get any indication of what the packet is, so try to guess 130 // if it's an IPv4 or IPv6 packet. 131 switch header.IPVersion(d.views[0]) { 132 case header.IPv4Version: 133 p = header.IPv4ProtocolNumber 134 case header.IPv6Version: 135 p = header.IPv6ProtocolNumber 136 default: 137 return true, nil 138 } 139 } 140 141 used := d.capViews(n, BufConfig) 142 pkt := tcpip.PacketBuffer{ 143 Data: buffer.NewVectorisedView(n, append([]buffer.View(nil), d.views[:used]...)), 144 LinkHeader: buffer.View(eth), 145 } 146 pkt.Data.TrimFront(d.e.hdrSize) 147 148 d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, pkt) 149 150 // Prepare e.views for another packet: release used views. 151 for i := 0; i < used; i++ { 152 d.views[i] = nil 153 } 154 155 return true, nil 156 } 157 158 // recvMMsgDispatcher uses the recvmmsg system call to read inbound packets and 159 // dispatches them. 160 type recvMMsgDispatcher struct { 161 // fd is the file descriptor used to send and receive packets. 162 fd int 163 164 // e is the endpoint this dispatcher is attached to. 165 e *endpoint 166 167 // views is an array of array of buffers that contain packet contents. 168 views [][]buffer.View 169 170 // iovecs is an array of array of iovec records where each iovec base 171 // pointer and length are initialzed to the corresponding view above, 172 // except when GSO is neabled then the first iovec in each array of 173 // iovecs points to a buffer for the vnet header which is stripped 174 // before the views are passed up the stack for further processing. 175 iovecs [][]syscall.Iovec 176 177 // msgHdrs is an array of MMsgHdr objects where each MMsghdr is used to 178 // reference an array of iovecs in the iovecs field defined above. This 179 // array is passed as the parameter to recvmmsg call to retrieve 180 // potentially more than 1 packet per syscall. 181 msgHdrs []rawfile.MMsgHdr 182 } 183 184 const ( 185 // MaxMsgsPerRecv is the maximum number of packets we want to retrieve 186 // in a single RecvMMsg call. 187 MaxMsgsPerRecv = 8 188 ) 189 190 func newRecvMMsgDispatcher(fd int, e *endpoint) (linkDispatcher, error) { 191 d := &recvMMsgDispatcher{ 192 fd: fd, 193 e: e, 194 } 195 d.views = make([][]buffer.View, MaxMsgsPerRecv) 196 for i := range d.views { 197 d.views[i] = make([]buffer.View, len(BufConfig)) 198 } 199 d.iovecs = make([][]syscall.Iovec, MaxMsgsPerRecv) 200 iovLen := len(BufConfig) 201 if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 { 202 // virtioNetHdr is prepended before each packet. 203 iovLen++ 204 } 205 for i := range d.iovecs { 206 d.iovecs[i] = make([]syscall.Iovec, iovLen) 207 } 208 d.msgHdrs = make([]rawfile.MMsgHdr, MaxMsgsPerRecv) 209 for i := range d.msgHdrs { 210 d.msgHdrs[i].Msg.Iov = &d.iovecs[i][0] 211 d.msgHdrs[i].Msg.Iovlen = uint64(iovLen) 212 } 213 return d, nil 214 } 215 216 func (d *recvMMsgDispatcher) capViews(k, n int, buffers []int) int { 217 c := 0 218 for i, s := range buffers { 219 c += s 220 if c >= n { 221 d.views[k][i].CapLength(s - (c - n)) 222 return i + 1 223 } 224 } 225 return len(buffers) 226 } 227 228 func (d *recvMMsgDispatcher) allocateViews(bufConfig []int) { 229 for k := 0; k < len(d.views); k++ { 230 var vnetHdr [virtioNetHdrSize]byte 231 vnetHdrOff := 0 232 if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 { 233 // The kernel adds virtioNetHdr before each packet, but 234 // we don't use it, so so we allocate a buffer for it, 235 // add it in iovecs but don't add it in a view. 236 d.iovecs[k][0] = syscall.Iovec{ 237 Base: &vnetHdr[0], 238 Len: uint64(virtioNetHdrSize), 239 } 240 vnetHdrOff++ 241 } 242 for i := 0; i < len(bufConfig); i++ { 243 if d.views[k][i] != nil { 244 break 245 } 246 b := buffer.NewView(bufConfig[i]) 247 d.views[k][i] = b 248 d.iovecs[k][i+vnetHdrOff] = syscall.Iovec{ 249 Base: &b[0], 250 Len: uint64(len(b)), 251 } 252 } 253 } 254 } 255 256 // recvMMsgDispatch reads more than one packet at a time from the file 257 // descriptor and dispatches it. 258 func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) { 259 d.allocateViews(BufConfig) 260 261 nMsgs, err := rawfile.BlockingRecvMMsg(d.fd, d.msgHdrs) 262 if err != nil { 263 return false, err 264 } 265 // Process each of received packets. 266 for k := 0; k < nMsgs; k++ { 267 n := int(d.msgHdrs[k].Len) 268 if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 { 269 n -= virtioNetHdrSize 270 } 271 if n <= d.e.hdrSize { 272 return false, nil 273 } 274 275 var ( 276 p tcpip.NetworkProtocolNumber 277 remote, local tcpip.LinkAddress 278 eth header.Ethernet 279 ) 280 if d.e.hdrSize > 0 { 281 eth = header.Ethernet(d.views[k][0]) 282 p = eth.Type() 283 remote = eth.SourceAddress() 284 local = eth.DestinationAddress() 285 } else { 286 // We don't get any indication of what the packet is, so try to guess 287 // if it's an IPv4 or IPv6 packet. 288 switch header.IPVersion(d.views[k][0]) { 289 case header.IPv4Version: 290 p = header.IPv4ProtocolNumber 291 case header.IPv6Version: 292 p = header.IPv6ProtocolNumber 293 default: 294 return true, nil 295 } 296 } 297 298 used := d.capViews(k, int(n), BufConfig) 299 pkt := tcpip.PacketBuffer{ 300 Data: buffer.NewVectorisedView(int(n), append([]buffer.View(nil), d.views[k][:used]...)), 301 LinkHeader: buffer.View(eth), 302 } 303 pkt.Data.TrimFront(d.e.hdrSize) 304 d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, pkt) 305 306 // Prepare e.views for another packet: release used views. 307 for i := 0; i < used; i++ { 308 d.views[k][i] = nil 309 } 310 } 311 312 for k := 0; k < nMsgs; k++ { 313 d.msgHdrs[k].Len = 0 314 } 315 316 return true, nil 317 }