github.com/FlowerWrong/netstack@v0.0.0-20191009141956-e5848263af28/tcpip/transport/raw/endpoint.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package raw provides the implementation of raw sockets (see raw(7)). Raw 16 // sockets allow applications to: 17 // 18 // * manually write and inspect transport layer headers and payloads 19 // * receive all traffic of a given transport protocol (e.g. ICMP or UDP) 20 // * optionally write and inspect network layer and link layer headers for 21 // packets 22 // 23 // Raw sockets don't have any notion of ports, and incoming packets are 24 // demultiplexed solely by protocol number. Thus, a raw UDP endpoint will 25 // receive every UDP packet received by netstack. bind(2) and connect(2) can be 26 // used to filter incoming packets by source and destination. 27 package raw 28 29 import ( 30 "sync" 31 32 "github.com/FlowerWrong/netstack/tcpip" 33 "github.com/FlowerWrong/netstack/tcpip/buffer" 34 "github.com/FlowerWrong/netstack/tcpip/header" 35 "github.com/FlowerWrong/netstack/tcpip/iptables" 36 "github.com/FlowerWrong/netstack/tcpip/stack" 37 "github.com/FlowerWrong/netstack/waiter" 38 ) 39 40 // +stateify savable 41 type packet struct { 42 packetEntry 43 // data holds the actual packet data, including any headers and 44 // payload. 45 data buffer.VectorisedView 46 // views is pre-allocated space to back data. As long as the packet is 47 // made up of fewer than 8 buffer.Views, no extra allocation is 48 // necessary to store packet data. 49 views [8]buffer.View 50 // timestampNS is the unix time at which the packet was received. 51 timestampNS int64 52 // senderAddr is the network address of the sender. 53 senderAddr tcpip.FullAddress 54 } 55 56 // endpoint is the raw socket implementation of tcpip.Endpoint. It is legal to 57 // have goroutines make concurrent calls into the endpoint. 58 // 59 // Lock order: 60 // endpoint.mu 61 // endpoint.rcvMu 62 // 63 // +stateify savable 64 type endpoint struct { 65 // The following fields are initialized at creation time and are 66 // immutable. 67 stack *stack.Stack 68 netProto tcpip.NetworkProtocolNumber 69 transProto tcpip.TransportProtocolNumber 70 waiterQueue *waiter.Queue 71 associated bool 72 73 // The following fields are used to manage the receive queue and are 74 // protected by rcvMu. 75 rcvMu sync.Mutex 76 rcvList packetList 77 rcvBufSizeMax int 78 rcvBufSize int 79 rcvClosed bool 80 81 // The following fields are protected by mu. 82 mu sync.RWMutex 83 sndBufSize int 84 closed bool 85 connected bool 86 bound bool 87 // registeredNIC is the NIC to which th endpoint is explicitly 88 // registered. Is set when Connect or Bind are used to specify a NIC. 89 registeredNIC tcpip.NICID 90 // boundNIC and boundAddr are set on calls to Bind(). When callers 91 // attempt actions that would invalidate the binding data (e.g. sending 92 // data via a NIC other than boundNIC), the endpoint will return an 93 // error. 94 boundNIC tcpip.NICID 95 boundAddr tcpip.Address 96 // route is the route to a remote network endpoint. It is set via 97 // Connect(), and is valid only when conneted is true. 98 route stack.Route 99 } 100 101 // NewEndpoint returns a raw endpoint for the given protocols. 102 // TODO(b/129292371): IP_HDRINCL and AF_PACKET. 103 func NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) { 104 return newEndpoint(stack, netProto, transProto, waiterQueue, true /* associated */) 105 } 106 107 func newEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, *tcpip.Error) { 108 if netProto != header.IPv4ProtocolNumber { 109 return nil, tcpip.ErrUnknownProtocol 110 } 111 112 ep := &endpoint{ 113 stack: stack, 114 netProto: netProto, 115 transProto: transProto, 116 waiterQueue: waiterQueue, 117 rcvBufSizeMax: 32 * 1024, 118 sndBufSize: 32 * 1024, 119 associated: associated, 120 } 121 122 // Unassociated endpoints are write-only and users call Write() with IP 123 // headers included. Because they're write-only, We don't need to 124 // register with the stack. 125 if !associated { 126 ep.rcvBufSizeMax = 0 127 ep.waiterQueue = nil 128 return ep, nil 129 } 130 131 if err := ep.stack.RegisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep); err != nil { 132 return nil, err 133 } 134 135 return ep, nil 136 } 137 138 // Close implements tcpip.Endpoint.Close. 139 func (ep *endpoint) Close() { 140 ep.mu.Lock() 141 defer ep.mu.Unlock() 142 143 if ep.closed || !ep.associated { 144 return 145 } 146 147 ep.stack.UnregisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep) 148 149 ep.rcvMu.Lock() 150 defer ep.rcvMu.Unlock() 151 152 // Clear the receive list. 153 ep.rcvClosed = true 154 ep.rcvBufSize = 0 155 for !ep.rcvList.Empty() { 156 ep.rcvList.Remove(ep.rcvList.Front()) 157 } 158 159 if ep.connected { 160 ep.route.Release() 161 ep.connected = false 162 } 163 164 ep.closed = true 165 166 ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut) 167 } 168 169 // ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf. 170 func (ep *endpoint) ModerateRecvBuf(copied int) {} 171 172 // IPTables implements tcpip.Endpoint.IPTables. 173 func (ep *endpoint) IPTables() (iptables.IPTables, error) { 174 return ep.stack.IPTables(), nil 175 } 176 177 // Read implements tcpip.Endpoint.Read. 178 func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) { 179 if !ep.associated { 180 return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrInvalidOptionValue 181 } 182 183 ep.rcvMu.Lock() 184 185 // If there's no data to read, return that read would block or that the 186 // endpoint is closed. 187 if ep.rcvList.Empty() { 188 err := tcpip.ErrWouldBlock 189 if ep.rcvClosed { 190 err = tcpip.ErrClosedForReceive 191 } 192 ep.rcvMu.Unlock() 193 return buffer.View{}, tcpip.ControlMessages{}, err 194 } 195 196 packet := ep.rcvList.Front() 197 ep.rcvList.Remove(packet) 198 ep.rcvBufSize -= packet.data.Size() 199 200 ep.rcvMu.Unlock() 201 202 if addr != nil { 203 *addr = packet.senderAddr 204 } 205 206 return packet.data.ToView(), tcpip.ControlMessages{HasTimestamp: true, Timestamp: packet.timestampNS}, nil 207 } 208 209 // Write implements tcpip.Endpoint.Write. 210 func (ep *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) { 211 // MSG_MORE is unimplemented. This also means that MSG_EOR is a no-op. 212 if opts.More { 213 return 0, nil, tcpip.ErrInvalidOptionValue 214 } 215 216 ep.mu.RLock() 217 218 if ep.closed { 219 ep.mu.RUnlock() 220 return 0, nil, tcpip.ErrInvalidEndpointState 221 } 222 223 payloadBytes, err := p.FullPayload() 224 if err != nil { 225 return 0, nil, err 226 } 227 228 // If this is an unassociated socket and callee provided a nonzero 229 // destination address, route using that address. 230 if !ep.associated { 231 ip := header.IPv4(payloadBytes) 232 if !ip.IsValid(len(payloadBytes)) { 233 ep.mu.RUnlock() 234 return 0, nil, tcpip.ErrInvalidOptionValue 235 } 236 dstAddr := ip.DestinationAddress() 237 // Update dstAddr with the address in the IP header, unless 238 // opts.To is set (e.g. if sendto specifies a specific 239 // address). 240 if dstAddr != tcpip.Address([]byte{0, 0, 0, 0}) && opts.To == nil { 241 opts.To = &tcpip.FullAddress{ 242 NIC: 0, // NIC is unset. 243 Addr: dstAddr, // The address from the payload. 244 Port: 0, // There are no ports here. 245 } 246 } 247 } 248 249 // Did the user caller provide a destination? If not, use the connected 250 // destination. 251 if opts.To == nil { 252 // If the user doesn't specify a destination, they should have 253 // connected to another address. 254 if !ep.connected { 255 ep.mu.RUnlock() 256 return 0, nil, tcpip.ErrDestinationRequired 257 } 258 259 if ep.route.IsResolutionRequired() { 260 savedRoute := &ep.route 261 // Promote lock to exclusive if using a shared route, 262 // given that it may need to change in finishWrite. 263 ep.mu.RUnlock() 264 ep.mu.Lock() 265 266 // Make sure that the route didn't change during the 267 // time we didn't hold the lock. 268 if !ep.connected || savedRoute != &ep.route { 269 ep.mu.Unlock() 270 return 0, nil, tcpip.ErrInvalidEndpointState 271 } 272 273 n, ch, err := ep.finishWrite(payloadBytes, savedRoute) 274 ep.mu.Unlock() 275 return n, ch, err 276 } 277 278 n, ch, err := ep.finishWrite(payloadBytes, &ep.route) 279 ep.mu.RUnlock() 280 return n, ch, err 281 } 282 283 // The caller provided a destination. Reject destination address if it 284 // goes through a different NIC than the endpoint was bound to. 285 nic := opts.To.NIC 286 if ep.bound && nic != 0 && nic != ep.boundNIC { 287 ep.mu.RUnlock() 288 return 0, nil, tcpip.ErrNoRoute 289 } 290 291 // We don't support IPv6 yet, so this has to be an IPv4 address. 292 if len(opts.To.Addr) != header.IPv4AddressSize { 293 ep.mu.RUnlock() 294 return 0, nil, tcpip.ErrInvalidEndpointState 295 } 296 297 // Find the route to the destination. If boundAddress is 0, 298 // FindRoute will choose an appropriate source address. 299 route, err := ep.stack.FindRoute(nic, ep.boundAddr, opts.To.Addr, ep.netProto, false) 300 if err != nil { 301 ep.mu.RUnlock() 302 return 0, nil, err 303 } 304 305 n, ch, err := ep.finishWrite(payloadBytes, &route) 306 route.Release() 307 ep.mu.RUnlock() 308 return n, ch, err 309 } 310 311 // finishWrite writes the payload to a route. It resolves the route if 312 // necessary. It's really just a helper to make defer unnecessary in Write. 313 func (ep *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64, <-chan struct{}, *tcpip.Error) { 314 // We may need to resolve the route (match a link layer address to the 315 // network address). If that requires blocking (e.g. to use ARP), 316 // return a channel on which the caller can wait. 317 if route.IsResolutionRequired() { 318 if ch, err := route.Resolve(nil); err != nil { 319 if err == tcpip.ErrWouldBlock { 320 return 0, ch, tcpip.ErrNoLinkAddress 321 } 322 return 0, nil, err 323 } 324 } 325 326 switch ep.netProto { 327 case header.IPv4ProtocolNumber: 328 if !ep.associated { 329 if err := route.WriteHeaderIncludedPacket(buffer.View(payloadBytes).ToVectorisedView()); err != nil { 330 return 0, nil, err 331 } 332 break 333 } 334 hdr := buffer.NewPrependable(len(payloadBytes) + int(route.MaxHeaderLength())) 335 if err := route.WritePacket(nil /* gso */, hdr, buffer.View(payloadBytes).ToVectorisedView(), ep.transProto, 0, true /* useDefaultTTL */); err != nil { 336 return 0, nil, err 337 } 338 339 default: 340 return 0, nil, tcpip.ErrUnknownProtocol 341 } 342 343 return int64(len(payloadBytes)), nil, nil 344 } 345 346 // Peek implements tcpip.Endpoint.Peek. 347 func (ep *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) { 348 return 0, tcpip.ControlMessages{}, nil 349 } 350 351 // Disconnect implements tcpip.Endpoint.Disconnect. 352 func (*endpoint) Disconnect() *tcpip.Error { 353 return tcpip.ErrNotSupported 354 } 355 356 // Connect implements tcpip.Endpoint.Connect. 357 func (ep *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error { 358 ep.mu.Lock() 359 defer ep.mu.Unlock() 360 361 if ep.closed { 362 return tcpip.ErrInvalidEndpointState 363 } 364 365 // We don't support IPv6 yet. 366 if len(addr.Addr) != header.IPv4AddressSize { 367 return tcpip.ErrInvalidEndpointState 368 } 369 370 nic := addr.NIC 371 if ep.bound { 372 if ep.boundNIC == 0 { 373 // If we're bound, but not to a specific NIC, the NIC 374 // in addr will be used. Nothing to do here. 375 } else if addr.NIC == 0 { 376 // If we're bound to a specific NIC, but addr doesn't 377 // specify a NIC, use the bound NIC. 378 nic = ep.boundNIC 379 } else if addr.NIC != ep.boundNIC { 380 // We're bound and addr specifies a NIC. They must be 381 // the same. 382 return tcpip.ErrInvalidEndpointState 383 } 384 } 385 386 // Find a route to the destination. 387 route, err := ep.stack.FindRoute(nic, tcpip.Address(""), addr.Addr, ep.netProto, false) 388 if err != nil { 389 return err 390 } 391 defer route.Release() 392 393 if ep.associated { 394 // Re-register the endpoint with the appropriate NIC. 395 if err := ep.stack.RegisterRawTransportEndpoint(addr.NIC, ep.netProto, ep.transProto, ep); err != nil { 396 return err 397 } 398 ep.stack.UnregisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep) 399 ep.registeredNIC = nic 400 } 401 402 // Save the route we've connected via. 403 ep.route = route.Clone() 404 ep.connected = true 405 406 return nil 407 } 408 409 // Shutdown implements tcpip.Endpoint.Shutdown. It's a noop for raw sockets. 410 func (ep *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error { 411 ep.mu.Lock() 412 defer ep.mu.Unlock() 413 414 if !ep.connected { 415 return tcpip.ErrNotConnected 416 } 417 return nil 418 } 419 420 // Listen implements tcpip.Endpoint.Listen. 421 func (ep *endpoint) Listen(backlog int) *tcpip.Error { 422 return tcpip.ErrNotSupported 423 } 424 425 // Accept implements tcpip.Endpoint.Accept. 426 func (ep *endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { 427 return nil, nil, tcpip.ErrNotSupported 428 } 429 430 // Bind implements tcpip.Endpoint.Bind. 431 func (ep *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error { 432 ep.mu.Lock() 433 defer ep.mu.Unlock() 434 435 // Callers must provide an IPv4 address or no network address (for 436 // binding to a NIC, but not an address). 437 if len(addr.Addr) != 0 && len(addr.Addr) != 4 { 438 return tcpip.ErrInvalidEndpointState 439 } 440 441 // If a local address was specified, verify that it's valid. 442 if len(addr.Addr) == header.IPv4AddressSize && ep.stack.CheckLocalAddress(addr.NIC, ep.netProto, addr.Addr) == 0 { 443 return tcpip.ErrBadLocalAddress 444 } 445 446 if ep.associated { 447 // Re-register the endpoint with the appropriate NIC. 448 if err := ep.stack.RegisterRawTransportEndpoint(addr.NIC, ep.netProto, ep.transProto, ep); err != nil { 449 return err 450 } 451 ep.stack.UnregisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep) 452 ep.registeredNIC = addr.NIC 453 ep.boundNIC = addr.NIC 454 } 455 456 ep.boundAddr = addr.Addr 457 ep.bound = true 458 459 return nil 460 } 461 462 // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress. 463 func (ep *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) { 464 return tcpip.FullAddress{}, tcpip.ErrNotSupported 465 } 466 467 // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress. 468 func (ep *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) { 469 // Even a connected socket doesn't return a remote address. 470 return tcpip.FullAddress{}, tcpip.ErrNotConnected 471 } 472 473 // Readiness implements tcpip.Endpoint.Readiness. 474 func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { 475 // The endpoint is always writable. 476 result := waiter.EventOut & mask 477 478 // Determine whether the endpoint is readable. 479 if (mask & waiter.EventIn) != 0 { 480 ep.rcvMu.Lock() 481 if !ep.rcvList.Empty() || ep.rcvClosed { 482 result |= waiter.EventIn 483 } 484 ep.rcvMu.Unlock() 485 } 486 487 return result 488 } 489 490 // SetSockOpt implements tcpip.Endpoint.SetSockOpt. 491 func (ep *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { 492 return tcpip.ErrUnknownProtocolOption 493 } 494 495 // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt. 496 func (ep *endpoint) SetSockOptInt(opt tcpip.SockOpt, v int) *tcpip.Error { 497 return tcpip.ErrUnknownProtocolOption 498 } 499 500 // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt. 501 func (ep *endpoint) GetSockOptInt(opt tcpip.SockOpt) (int, *tcpip.Error) { 502 switch opt { 503 case tcpip.ReceiveQueueSizeOption: 504 v := 0 505 ep.rcvMu.Lock() 506 if !ep.rcvList.Empty() { 507 p := ep.rcvList.Front() 508 v = p.data.Size() 509 } 510 ep.rcvMu.Unlock() 511 return v, nil 512 513 case tcpip.SendBufferSizeOption: 514 ep.mu.Lock() 515 v := ep.sndBufSize 516 ep.mu.Unlock() 517 return v, nil 518 519 case tcpip.ReceiveBufferSizeOption: 520 ep.rcvMu.Lock() 521 v := ep.rcvBufSizeMax 522 ep.rcvMu.Unlock() 523 return v, nil 524 525 } 526 527 return -1, tcpip.ErrUnknownProtocolOption 528 } 529 530 // GetSockOpt implements tcpip.Endpoint.GetSockOpt. 531 func (ep *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { 532 switch o := opt.(type) { 533 case tcpip.ErrorOption: 534 return nil 535 536 case *tcpip.KeepaliveEnabledOption: 537 *o = 0 538 return nil 539 540 default: 541 return tcpip.ErrUnknownProtocolOption 542 } 543 } 544 545 // HandlePacket implements stack.RawTransportEndpoint.HandlePacket. 546 func (ep *endpoint) HandlePacket(route *stack.Route, netHeader buffer.View, vv buffer.VectorisedView) { 547 ep.rcvMu.Lock() 548 549 // Drop the packet if our buffer is currently full. 550 if ep.rcvClosed || ep.rcvBufSize >= ep.rcvBufSizeMax { 551 ep.stack.Stats().DroppedPackets.Increment() 552 ep.rcvMu.Unlock() 553 return 554 } 555 556 if ep.bound { 557 // If bound to a NIC, only accept data for that NIC. 558 if ep.boundNIC != 0 && ep.boundNIC != route.NICID() { 559 ep.rcvMu.Unlock() 560 return 561 } 562 // If bound to an address, only accept data for that address. 563 if ep.boundAddr != "" && ep.boundAddr != route.RemoteAddress { 564 ep.rcvMu.Unlock() 565 return 566 } 567 } 568 569 // If connected, only accept packets from the remote address we 570 // connected to. 571 if ep.connected && ep.route.RemoteAddress != route.RemoteAddress { 572 ep.rcvMu.Unlock() 573 return 574 } 575 576 wasEmpty := ep.rcvBufSize == 0 577 578 // Push new packet into receive list and increment the buffer size. 579 packet := &packet{ 580 senderAddr: tcpip.FullAddress{ 581 NIC: route.NICID(), 582 Addr: route.RemoteAddress, 583 }, 584 } 585 586 combinedVV := netHeader.ToVectorisedView() 587 combinedVV.Append(vv) 588 packet.data = combinedVV.Clone(packet.views[:]) 589 packet.timestampNS = ep.stack.NowNanoseconds() 590 591 ep.rcvList.PushBack(packet) 592 ep.rcvBufSize += packet.data.Size() 593 594 ep.rcvMu.Unlock() 595 596 // Notify waiters that there's data to be read. 597 if wasEmpty { 598 ep.waiterQueue.Notify(waiter.EventIn) 599 } 600 } 601 602 // State implements socket.Socket.State. 603 func (ep *endpoint) State() uint32 { 604 return 0 605 }