gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/tcpip/network/ipv4/ipv4.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package ipv4 contains the implementation of the ipv4 network protocol. 16 package ipv4 17 18 import ( 19 "fmt" 20 "math" 21 "reflect" 22 "time" 23 24 "gvisor.dev/gvisor/pkg/atomicbitops" 25 "gvisor.dev/gvisor/pkg/buffer" 26 "gvisor.dev/gvisor/pkg/sync" 27 "gvisor.dev/gvisor/pkg/tcpip" 28 "gvisor.dev/gvisor/pkg/tcpip/header" 29 "gvisor.dev/gvisor/pkg/tcpip/header/parse" 30 "gvisor.dev/gvisor/pkg/tcpip/network/hash" 31 "gvisor.dev/gvisor/pkg/tcpip/network/internal/fragmentation" 32 "gvisor.dev/gvisor/pkg/tcpip/network/internal/ip" 33 "gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast" 34 "gvisor.dev/gvisor/pkg/tcpip/stack" 35 ) 36 37 const ( 38 // ReassembleTimeout is the time a packet stays in the reassembly 39 // system before being evicted. 40 // As per RFC 791 section 3.2: 41 // The current recommendation for the initial timer setting is 15 seconds. 42 // This may be changed as experience with this protocol accumulates. 43 // 44 // Considering that it is an old recommendation, we use the same reassembly 45 // timeout that linux defines, which is 30 seconds: 46 // https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/include/net/ip.h#L138 47 ReassembleTimeout = 30 * time.Second 48 49 // ProtocolNumber is the ipv4 protocol number. 50 ProtocolNumber = header.IPv4ProtocolNumber 51 52 // MaxTotalSize is maximum size that can be encoded in the 16-bit 53 // TotalLength field of the ipv4 header. 54 MaxTotalSize = 0xffff 55 56 // DefaultTTL is the default time-to-live value for this endpoint. 57 DefaultTTL = 64 58 59 // buckets is the number of identifier buckets. 60 buckets = 2048 61 62 // The size of a fragment block, in bytes, as per RFC 791 section 3.1, 63 // page 14. 64 fragmentblockSize = 8 65 ) 66 67 const ( 68 forwardingDisabled = 0 69 forwardingEnabled = 1 70 ) 71 72 var ipv4BroadcastAddr = header.IPv4Broadcast.WithPrefix() 73 74 var _ stack.LinkResolvableNetworkEndpoint = (*endpoint)(nil) 75 var _ stack.ForwardingNetworkEndpoint = (*endpoint)(nil) 76 var _ stack.MulticastForwardingNetworkEndpoint = (*endpoint)(nil) 77 var _ stack.GroupAddressableEndpoint = (*endpoint)(nil) 78 var _ stack.AddressableEndpoint = (*endpoint)(nil) 79 var _ stack.NetworkEndpoint = (*endpoint)(nil) 80 var _ IGMPEndpoint = (*endpoint)(nil) 81 82 type endpoint struct { 83 nic stack.NetworkInterface 84 dispatcher stack.TransportDispatcher 85 protocol *protocol 86 stats sharedStats 87 88 // enabled is set to 1 when the endpoint is enabled and 0 when it is 89 // disabled. 90 enabled atomicbitops.Uint32 91 92 // forwarding is set to forwardingEnabled when the endpoint has forwarding 93 // enabled and forwardingDisabled when it is disabled. 94 forwarding atomicbitops.Uint32 95 96 // multicastForwarding is set to forwardingEnabled when the endpoint has 97 // forwarding enabled and forwardingDisabled when it is disabled. 98 // 99 // TODO(https://gvisor.dev/issue/7338): Implement support for multicast 100 //forwarding. Currently, setting this value to true is a no-op. 101 multicastForwarding atomicbitops.Uint32 102 103 // mu protects below. 104 mu sync.RWMutex 105 106 // +checklocks:mu 107 addressableEndpointState stack.AddressableEndpointState 108 109 // +checklocks:mu 110 igmp igmpState 111 } 112 113 // SetIGMPVersion implements IGMPEndpoint. 114 func (e *endpoint) SetIGMPVersion(v IGMPVersion) IGMPVersion { 115 e.mu.Lock() 116 defer e.mu.Unlock() 117 return e.setIGMPVersionLocked(v) 118 } 119 120 // GetIGMPVersion implements IGMPEndpoint. 121 func (e *endpoint) GetIGMPVersion() IGMPVersion { 122 e.mu.RLock() 123 defer e.mu.RUnlock() 124 return e.getIGMPVersionLocked() 125 } 126 127 // +checklocks:e.mu 128 // +checklocksalias:e.igmp.ep.mu=e.mu 129 func (e *endpoint) setIGMPVersionLocked(v IGMPVersion) IGMPVersion { 130 return e.igmp.setVersion(v) 131 } 132 133 // +checklocksread:e.mu 134 // +checklocksalias:e.igmp.ep.mu=e.mu 135 func (e *endpoint) getIGMPVersionLocked() IGMPVersion { 136 return e.igmp.getVersion() 137 } 138 139 // HandleLinkResolutionFailure implements stack.LinkResolvableNetworkEndpoint. 140 func (e *endpoint) HandleLinkResolutionFailure(pkt *stack.PacketBuffer) { 141 // If we are operating as a router, return an ICMP error to the original 142 // packet's sender. 143 if pkt.NetworkPacketInfo.IsForwardedPacket { 144 // TODO(gvisor.dev/issue/6005): Propagate asynchronously generated ICMP 145 // errors to local endpoints. 146 e.protocol.returnError(&icmpReasonHostUnreachable{}, pkt, false /* deliveredLocally */) 147 e.stats.ip.Forwarding.Errors.Increment() 148 e.stats.ip.Forwarding.HostUnreachable.Increment() 149 return 150 } 151 // handleControl expects the entire offending packet to be in the packet 152 // buffer's data field. 153 pkt = stack.NewPacketBuffer(stack.PacketBufferOptions{ 154 Payload: pkt.ToBuffer(), 155 }) 156 defer pkt.DecRef() 157 pkt.NICID = e.nic.ID() 158 pkt.NetworkProtocolNumber = ProtocolNumber 159 // Use the same control type as an ICMPv4 destination host unreachable error 160 // since the host is considered unreachable if we cannot resolve the link 161 // address to the next hop. 162 e.handleControl(&icmpv4DestinationHostUnreachableSockError{}, pkt) 163 } 164 165 // NewEndpoint creates a new ipv4 endpoint. 166 func (p *protocol) NewEndpoint(nic stack.NetworkInterface, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint { 167 e := &endpoint{ 168 nic: nic, 169 dispatcher: dispatcher, 170 protocol: p, 171 } 172 e.mu.Lock() 173 e.addressableEndpointState.Init(e, stack.AddressableEndpointStateOptions{HiddenWhileDisabled: false}) 174 e.igmp.init(e) 175 e.mu.Unlock() 176 177 tcpip.InitStatCounters(reflect.ValueOf(&e.stats.localStats).Elem()) 178 179 stackStats := p.stack.Stats() 180 e.stats.ip.Init(&e.stats.localStats.IP, &stackStats.IP) 181 e.stats.icmp.init(&e.stats.localStats.ICMP, &stackStats.ICMP.V4) 182 e.stats.igmp.init(&e.stats.localStats.IGMP, &stackStats.IGMP) 183 184 p.mu.Lock() 185 p.eps[nic.ID()] = e 186 p.mu.Unlock() 187 188 return e 189 } 190 191 func (p *protocol) findEndpointWithAddress(addr tcpip.Address) *endpoint { 192 p.mu.RLock() 193 defer p.mu.RUnlock() 194 195 for _, e := range p.eps { 196 if addressEndpoint := e.AcquireAssignedAddress(addr, false /* allowTemp */, stack.NeverPrimaryEndpoint, true /* readOnly */); addressEndpoint != nil { 197 return e 198 } 199 } 200 201 return nil 202 } 203 204 func (p *protocol) getEndpointForNIC(id tcpip.NICID) (*endpoint, bool) { 205 p.mu.RLock() 206 defer p.mu.RUnlock() 207 ep, ok := p.eps[id] 208 return ep, ok 209 } 210 211 func (p *protocol) forgetEndpoint(nicID tcpip.NICID) { 212 p.mu.Lock() 213 defer p.mu.Unlock() 214 delete(p.eps, nicID) 215 } 216 217 // Forwarding implements stack.ForwardingNetworkEndpoint. 218 func (e *endpoint) Forwarding() bool { 219 return e.forwarding.Load() == forwardingEnabled 220 } 221 222 // setForwarding sets the forwarding status for the endpoint. 223 // 224 // Returns the previous forwarding status. 225 func (e *endpoint) setForwarding(v bool) bool { 226 forwarding := uint32(forwardingDisabled) 227 if v { 228 forwarding = forwardingEnabled 229 } 230 231 return e.forwarding.Swap(forwarding) != forwardingDisabled 232 } 233 234 // SetForwarding implements stack.ForwardingNetworkEndpoint. 235 func (e *endpoint) SetForwarding(forwarding bool) bool { 236 e.mu.Lock() 237 defer e.mu.Unlock() 238 239 prevForwarding := e.setForwarding(forwarding) 240 if prevForwarding == forwarding { 241 return prevForwarding 242 } 243 244 if forwarding { 245 // There does not seem to be an RFC requirement for a node to join the all 246 // routers multicast address but 247 // https://www.iana.org/assignments/multicast-addresses/multicast-addresses.xhtml 248 // specifies the address as a group for all routers on a subnet so we join 249 // the group here. 250 if err := e.joinGroupLocked(header.IPv4AllRoutersGroup); err != nil { 251 // joinGroupLocked only returns an error if the group address is not a 252 // valid IPv4 multicast address. 253 panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv4AllRoutersGroup, err)) 254 } 255 256 return prevForwarding 257 } 258 259 switch err := e.leaveGroupLocked(header.IPv4AllRoutersGroup).(type) { 260 case nil: 261 case *tcpip.ErrBadLocalAddress: 262 // The endpoint may have already left the multicast group. 263 default: 264 panic(fmt.Sprintf("e.leaveGroupLocked(%s): %s", header.IPv4AllRoutersGroup, err)) 265 } 266 267 return prevForwarding 268 } 269 270 // MulticastForwarding implements stack.MulticastForwardingNetworkEndpoint. 271 func (e *endpoint) MulticastForwarding() bool { 272 return e.multicastForwarding.Load() == forwardingEnabled 273 } 274 275 // SetMulticastForwarding implements stack.MulticastForwardingNetworkEndpoint. 276 func (e *endpoint) SetMulticastForwarding(forwarding bool) bool { 277 updatedForwarding := uint32(forwardingDisabled) 278 if forwarding { 279 updatedForwarding = forwardingEnabled 280 } 281 282 return e.multicastForwarding.Swap(updatedForwarding) != forwardingDisabled 283 } 284 285 // Enable implements stack.NetworkEndpoint. 286 func (e *endpoint) Enable() tcpip.Error { 287 e.mu.Lock() 288 defer e.mu.Unlock() 289 return e.enableLocked() 290 } 291 292 // +checklocks:e.mu 293 // +checklocksalias:e.igmp.ep.mu=e.mu 294 func (e *endpoint) enableLocked() tcpip.Error { 295 // If the NIC is not enabled, the endpoint can't do anything meaningful so 296 // don't enable the endpoint. 297 if !e.nic.Enabled() { 298 return &tcpip.ErrNotPermitted{} 299 } 300 301 // If the endpoint is already enabled, there is nothing for it to do. 302 if !e.setEnabled(true) { 303 return nil 304 } 305 306 // Must be called after Enabled has already been set. 307 e.addressableEndpointState.OnNetworkEndpointEnabledChanged() 308 309 // Create an endpoint to receive broadcast packets on this interface. 310 ep, err := e.addressableEndpointState.AddAndAcquirePermanentAddress(ipv4BroadcastAddr, stack.AddressProperties{PEB: stack.NeverPrimaryEndpoint}) 311 if err != nil { 312 return err 313 } 314 // We have no need for the address endpoint. 315 ep.DecRef() 316 317 // Groups may have been joined while the endpoint was disabled, or the 318 // endpoint may have left groups from the perspective of IGMP when the 319 // endpoint was disabled. Either way, we need to let routers know to 320 // send us multicast traffic. 321 e.igmp.initializeAll() 322 323 // As per RFC 1122 section 3.3.7, all hosts should join the all-hosts 324 // multicast group. Note, the IANA calls the all-hosts multicast group the 325 // all-systems multicast group. 326 if err := e.joinGroupLocked(header.IPv4AllSystems); err != nil { 327 // joinGroupLocked only returns an error if the group address is not a valid 328 // IPv4 multicast address. 329 panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv4AllSystems, err)) 330 } 331 332 return nil 333 } 334 335 // Enabled implements stack.NetworkEndpoint. 336 func (e *endpoint) Enabled() bool { 337 return e.nic.Enabled() && e.isEnabled() 338 } 339 340 // isEnabled returns true if the endpoint is enabled, regardless of the 341 // enabled status of the NIC. 342 func (e *endpoint) isEnabled() bool { 343 return e.enabled.Load() == 1 344 } 345 346 // setEnabled sets the enabled status for the endpoint. 347 // 348 // Returns true if the enabled status was updated. 349 func (e *endpoint) setEnabled(v bool) bool { 350 if v { 351 return e.enabled.Swap(1) == 0 352 } 353 return e.enabled.Swap(0) == 1 354 } 355 356 // Disable implements stack.NetworkEndpoint. 357 func (e *endpoint) Disable() { 358 e.mu.Lock() 359 defer e.mu.Unlock() 360 e.disableLocked() 361 } 362 363 // +checklocks:e.mu 364 // +checklocksalias:e.igmp.ep.mu=e.mu 365 func (e *endpoint) disableLocked() { 366 if !e.isEnabled() { 367 return 368 } 369 370 // The endpoint may have already left the multicast group. 371 switch err := e.leaveGroupLocked(header.IPv4AllSystems).(type) { 372 case nil, *tcpip.ErrBadLocalAddress: 373 default: 374 panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv4AllSystems, err)) 375 } 376 377 // Leave groups from the perspective of IGMP so that routers know that 378 // we are no longer interested in the group. 379 e.igmp.softLeaveAll() 380 381 // The address may have already been removed. 382 switch err := e.addressableEndpointState.RemovePermanentAddress(ipv4BroadcastAddr.Address); err.(type) { 383 case nil, *tcpip.ErrBadLocalAddress: 384 default: 385 panic(fmt.Sprintf("unexpected error when removing address = %s: %s", ipv4BroadcastAddr.Address, err)) 386 } 387 388 // Reset the IGMP V1 present flag. 389 // 390 // If the node comes back up on the same network, it will re-learn that it 391 // needs to perform IGMPv1. 392 e.igmp.resetV1Present() 393 394 if !e.setEnabled(false) { 395 panic("should have only done work to disable the endpoint if it was enabled") 396 } 397 398 // Must be called after Enabled has been set. 399 e.addressableEndpointState.OnNetworkEndpointEnabledChanged() 400 } 401 402 // emitMulticastEvent emits a multicast forwarding event using the provided 403 // generator if a valid event dispatcher exists. 404 func (e *endpoint) emitMulticastEvent(eventGenerator func(stack.MulticastForwardingEventDispatcher)) { 405 e.protocol.mu.RLock() 406 defer e.protocol.mu.RUnlock() 407 408 if mcastDisp := e.protocol.multicastForwardingDisp; mcastDisp != nil { 409 eventGenerator(mcastDisp) 410 } 411 } 412 413 // DefaultTTL is the default time-to-live value for this endpoint. 414 func (e *endpoint) DefaultTTL() uint8 { 415 return e.protocol.DefaultTTL() 416 } 417 418 // MTU implements stack.NetworkEndpoint. It returns the link-layer MTU minus the 419 // network layer max header length. 420 func (e *endpoint) MTU() uint32 { 421 networkMTU, err := calculateNetworkMTU(e.nic.MTU(), header.IPv4MinimumSize) 422 if err != nil { 423 return 0 424 } 425 return networkMTU 426 } 427 428 // MaxHeaderLength returns the maximum length needed by ipv4 headers (and 429 // underlying protocols). 430 func (e *endpoint) MaxHeaderLength() uint16 { 431 return e.nic.MaxHeaderLength() + header.IPv4MaximumHeaderSize 432 } 433 434 // NetworkProtocolNumber implements stack.NetworkEndpoint. 435 func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber { 436 return e.protocol.Number() 437 } 438 439 // getID returns a random uint16 number (other than zero) to be used as ID in 440 // the IPv4 header. 441 func (e *endpoint) getID() uint16 { 442 rng := e.protocol.stack.SecureRNG() 443 id := rng.Uint16() 444 for id == 0 { 445 id = rng.Uint16() 446 } 447 return id 448 } 449 450 func (e *endpoint) addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams, options header.IPv4OptionsSerializer) tcpip.Error { 451 hdrLen := header.IPv4MinimumSize 452 var optLen int 453 if options != nil { 454 optLen = int(options.Length()) 455 } 456 hdrLen += optLen 457 if hdrLen > header.IPv4MaximumHeaderSize { 458 return &tcpip.ErrMessageTooLong{} 459 } 460 ipH := header.IPv4(pkt.NetworkHeader().Push(hdrLen)) 461 length := pkt.Size() 462 if length > math.MaxUint16 { 463 return &tcpip.ErrMessageTooLong{} 464 } 465 466 fields := header.IPv4Fields{ 467 TotalLength: uint16(length), 468 TTL: params.TTL, 469 TOS: params.TOS, 470 Protocol: uint8(params.Protocol), 471 SrcAddr: srcAddr, 472 DstAddr: dstAddr, 473 Options: options, 474 } 475 if params.DF { 476 // Treat want and do the same. 477 fields.Flags = header.IPv4FlagDontFragment 478 } else { 479 // RFC 6864 section 4.3 mandates uniqueness of ID values for 480 // non-atomic datagrams. 481 fields.ID = e.getID() 482 } 483 ipH.Encode(&fields) 484 485 ipH.SetChecksum(^ipH.CalculateChecksum()) 486 pkt.NetworkProtocolNumber = ProtocolNumber 487 return nil 488 } 489 490 // handleFragments fragments pkt and calls the handler function on each 491 // fragment. It returns the number of fragments handled and the number of 492 // fragments left to be processed. The IP header must already be present in the 493 // original packet. 494 func (e *endpoint) handleFragments(_ *stack.Route, networkMTU uint32, pkt *stack.PacketBuffer, handler func(*stack.PacketBuffer) tcpip.Error) (int, int, tcpip.Error) { 495 // Round the MTU down to align to 8 bytes. 496 fragmentPayloadSize := networkMTU &^ 7 497 networkHeader := header.IPv4(pkt.NetworkHeader().Slice()) 498 pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadSize, pkt.AvailableHeaderBytes()+len(networkHeader)) 499 defer pf.Release() 500 501 var n int 502 for { 503 fragPkt, more := buildNextFragment(&pf, networkHeader) 504 err := handler(fragPkt) 505 fragPkt.DecRef() 506 if err != nil { 507 return n, pf.RemainingFragmentCount() + 1, err 508 } 509 n++ 510 if !more { 511 return n, pf.RemainingFragmentCount(), nil 512 } 513 } 514 } 515 516 // WritePacket writes a packet to the given destination address and protocol. 517 func (e *endpoint) WritePacket(r *stack.Route, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) tcpip.Error { 518 if err := e.addIPHeader(r.LocalAddress(), r.RemoteAddress(), pkt, params, nil /* options */); err != nil { 519 return err 520 } 521 522 return e.writePacket(r, pkt) 523 } 524 525 func (e *endpoint) writePacket(r *stack.Route, pkt *stack.PacketBuffer) tcpip.Error { 526 netHeader := header.IPv4(pkt.NetworkHeader().Slice()) 527 dstAddr := netHeader.DestinationAddress() 528 529 // iptables filtering. All packets that reach here are locally 530 // generated. 531 outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 532 if ok := e.protocol.stack.IPTables().CheckOutput(pkt, r, outNicName); !ok { 533 // iptables is telling us to drop the packet. 534 e.stats.ip.IPTablesOutputDropped.Increment() 535 return nil 536 } 537 538 // If the packet is manipulated as per DNAT Output rules, handle packet 539 // based on destination address and do not send the packet to link 540 // layer. 541 // 542 // We should do this for every packet, rather than only DNATted packets, but 543 // removing this check short circuits broadcasts before they are sent out to 544 // other hosts. 545 if newDstAddr := netHeader.DestinationAddress(); dstAddr != newDstAddr { 546 if ep := e.protocol.findEndpointWithAddress(newDstAddr); ep != nil { 547 // Since we rewrote the packet but it is being routed back to us, we 548 // can safely assume the checksum is valid. 549 ep.handleLocalPacket(pkt, true /* canSkipRXChecksum */) 550 return nil 551 } 552 } 553 554 return e.writePacketPostRouting(r, pkt, false /* headerIncluded */) 555 } 556 557 func (e *endpoint) writePacketPostRouting(r *stack.Route, pkt *stack.PacketBuffer, headerIncluded bool) tcpip.Error { 558 if r.Loop()&stack.PacketLoop != 0 { 559 // If the packet was generated by the stack (not a raw/packet endpoint 560 // where a packet may be written with the header included), then we can 561 // safely assume the checksum is valid. 562 e.handleLocalPacket(pkt, !headerIncluded /* canSkipRXChecksum */) 563 } 564 if r.Loop()&stack.PacketOut == 0 { 565 return nil 566 } 567 568 // Postrouting NAT can only change the source address, and does not alter the 569 // route or outgoing interface of the packet. 570 outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 571 if ok := e.protocol.stack.IPTables().CheckPostrouting(pkt, r, e, outNicName); !ok { 572 // iptables is telling us to drop the packet. 573 e.stats.ip.IPTablesPostroutingDropped.Increment() 574 return nil 575 } 576 577 stats := e.stats.ip 578 579 networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(len(pkt.NetworkHeader().Slice()))) 580 if err != nil { 581 stats.OutgoingPacketErrors.Increment() 582 return err 583 } 584 585 if packetMustBeFragmented(pkt, networkMTU) { 586 h := header.IPv4(pkt.NetworkHeader().Slice()) 587 if h.Flags()&header.IPv4FlagDontFragment != 0 && pkt.NetworkPacketInfo.IsForwardedPacket { 588 // TODO(gvisor.dev/issue/5919): Handle error condition in which DontFragment 589 // is set but the packet must be fragmented for the non-forwarding case. 590 return &tcpip.ErrMessageTooLong{} 591 } 592 sent, remain, err := e.handleFragments(r, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) tcpip.Error { 593 // TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each 594 // fragment one by one using WritePacket() (current strategy) or if we 595 // want to create a PacketBufferList from the fragments and feed it to 596 // WritePackets(). It'll be faster but cost more memory. 597 return e.nic.WritePacket(r, fragPkt) 598 }) 599 stats.PacketsSent.IncrementBy(uint64(sent)) 600 stats.OutgoingPacketErrors.IncrementBy(uint64(remain)) 601 return err 602 } 603 604 if err := e.nic.WritePacket(r, pkt); err != nil { 605 stats.OutgoingPacketErrors.Increment() 606 return err 607 } 608 stats.PacketsSent.Increment() 609 return nil 610 } 611 612 // WriteHeaderIncludedPacket implements stack.NetworkEndpoint. 613 func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) tcpip.Error { 614 // The packet already has an IP header, but there are a few required 615 // checks. 616 h, ok := pkt.Data().PullUp(header.IPv4MinimumSize) 617 if !ok { 618 return &tcpip.ErrMalformedHeader{} 619 } 620 621 hdrLen := header.IPv4(h).HeaderLength() 622 if hdrLen < header.IPv4MinimumSize { 623 return &tcpip.ErrMalformedHeader{} 624 } 625 626 h, ok = pkt.Data().PullUp(int(hdrLen)) 627 if !ok { 628 return &tcpip.ErrMalformedHeader{} 629 } 630 ipH := header.IPv4(h) 631 632 // Always set the total length. 633 pktSize := pkt.Data().Size() 634 ipH.SetTotalLength(uint16(pktSize)) 635 636 // Set the source address when zero. 637 if ipH.SourceAddress() == header.IPv4Any { 638 ipH.SetSourceAddress(r.LocalAddress()) 639 } 640 641 // Set the packet ID when zero. 642 if ipH.ID() == 0 { 643 // RFC 6864 section 4.3 mandates uniqueness of ID values for 644 // non-atomic datagrams, so assign an ID to all such datagrams 645 // according to the definition given in RFC 6864 section 4. 646 if ipH.Flags()&header.IPv4FlagDontFragment == 0 || ipH.Flags()&header.IPv4FlagMoreFragments != 0 || ipH.FragmentOffset() > 0 { 647 ipH.SetID(e.getID()) 648 } 649 } 650 651 // Always set the checksum. 652 ipH.SetChecksum(0) 653 ipH.SetChecksum(^ipH.CalculateChecksum()) 654 655 // Populate the packet buffer's network header and don't allow an invalid 656 // packet to be sent. 657 // 658 // Note that parsing only makes sure that the packet is well formed as per the 659 // wire format. We also want to check if the header's fields are valid before 660 // sending the packet. 661 if !parse.IPv4(pkt) || !header.IPv4(pkt.NetworkHeader().Slice()).IsValid(pktSize) { 662 return &tcpip.ErrMalformedHeader{} 663 } 664 665 return e.writePacketPostRouting(r, pkt, true /* headerIncluded */) 666 } 667 668 // forwardPacketWithRoute emits the pkt using the provided route. 669 // 670 // If updateOptions is true, then the IP options will be updated in the copied 671 // pkt using the outgoing endpoint. Otherwise, the caller is responsible for 672 // updating the options. 673 // 674 // This method should be invoked by the endpoint that received the pkt. 675 func (e *endpoint) forwardPacketWithRoute(route *stack.Route, pkt *stack.PacketBuffer, updateOptions bool) ip.ForwardingError { 676 h := header.IPv4(pkt.NetworkHeader().Slice()) 677 stk := e.protocol.stack 678 679 inNicName := stk.FindNICNameFromID(e.nic.ID()) 680 outNicName := stk.FindNICNameFromID(route.NICID()) 681 if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok { 682 // iptables is telling us to drop the packet. 683 e.stats.ip.IPTablesForwardDropped.Increment() 684 return nil 685 } 686 687 // We need to do a deep copy of the IP packet because 688 // WriteHeaderIncludedPacket may modify the packet buffer, but we do 689 // not own it. 690 // 691 // TODO(https://gvisor.dev/issue/7473): For multicast, only create one deep 692 // copy and then clone. 693 newPkt := pkt.DeepCopyForForwarding(int(route.MaxHeaderLength())) 694 newHdr := header.IPv4(newPkt.NetworkHeader().Slice()) 695 defer newPkt.DecRef() 696 697 forwardToEp, ok := e.protocol.getEndpointForNIC(route.NICID()) 698 if !ok { 699 return &ip.ErrUnknownOutputEndpoint{} 700 } 701 702 if updateOptions { 703 if err := forwardToEp.updateOptionsForForwarding(newPkt); err != nil { 704 return err 705 } 706 } 707 708 ttl := h.TTL() 709 // As per RFC 791 page 30, Time to Live, 710 // 711 // This field must be decreased at each point that the internet header 712 // is processed to reflect the time spent processing the datagram. 713 // Even if no local information is available on the time actually 714 // spent, the field must be decremented by 1. 715 newHdr.SetTTL(ttl - 1) 716 // We perform a full checksum as we may have updated options above. The IP 717 // header is relatively small so this is not expected to be an expensive 718 // operation. 719 newHdr.SetChecksum(0) 720 newHdr.SetChecksum(^newHdr.CalculateChecksum()) 721 722 switch err := forwardToEp.writePacketPostRouting(route, newPkt, true /* headerIncluded */); err.(type) { 723 case nil: 724 return nil 725 case *tcpip.ErrMessageTooLong: 726 // As per RFC 792, page 4, Destination Unreachable: 727 // 728 // Another case is when a datagram must be fragmented to be forwarded by a 729 // gateway yet the Don't Fragment flag is on. In this case the gateway must 730 // discard the datagram and may return a destination unreachable message. 731 // 732 // WriteHeaderIncludedPacket checks for the presence of the Don't Fragment bit 733 // while sending the packet and returns this error iff fragmentation is 734 // necessary and the bit is also set. 735 _ = e.protocol.returnError(&icmpReasonFragmentationNeeded{}, pkt, false /* deliveredLocally */) 736 return &ip.ErrMessageTooLong{} 737 case *tcpip.ErrNoBufferSpace: 738 return &ip.ErrOutgoingDeviceNoBufferSpace{} 739 default: 740 return &ip.ErrOther{Err: err} 741 } 742 } 743 744 // forwardUnicastPacket attempts to forward a packet to its final destination. 745 func (e *endpoint) forwardUnicastPacket(pkt *stack.PacketBuffer) ip.ForwardingError { 746 hView := pkt.NetworkHeader().View() 747 defer hView.Release() 748 h := header.IPv4(hView.AsSlice()) 749 750 dstAddr := h.DestinationAddress() 751 752 if err := validateAddressesForForwarding(h); err != nil { 753 return err 754 } 755 756 ttl := h.TTL() 757 if ttl == 0 { 758 // As per RFC 792 page 6, Time Exceeded Message, 759 // 760 // If the gateway processing a datagram finds the time to live field 761 // is zero it must discard the datagram. The gateway may also notify 762 // the source host via the time exceeded message. 763 // 764 // We return the original error rather than the result of returning 765 // the ICMP packet because the original error is more relevant to 766 // the caller. 767 _ = e.protocol.returnError(&icmpReasonTTLExceeded{}, pkt, false /* deliveredLocally */) 768 return &ip.ErrTTLExceeded{} 769 } 770 771 if err := e.updateOptionsForForwarding(pkt); err != nil { 772 return err 773 } 774 775 stk := e.protocol.stack 776 777 // Check if the destination is owned by the stack. 778 if ep := e.protocol.findEndpointWithAddress(dstAddr); ep != nil { 779 inNicName := stk.FindNICNameFromID(e.nic.ID()) 780 outNicName := stk.FindNICNameFromID(ep.nic.ID()) 781 if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok { 782 // iptables is telling us to drop the packet. 783 e.stats.ip.IPTablesForwardDropped.Increment() 784 return nil 785 } 786 787 // The packet originally arrived on e so provide its NIC as the input NIC. 788 ep.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 789 return nil 790 } 791 792 r, err := stk.FindRoute(0, tcpip.Address{}, dstAddr, ProtocolNumber, false /* multicastLoop */) 793 switch err.(type) { 794 case nil: 795 // TODO(https://gvisor.dev/issues/8105): We should not observe ErrHostUnreachable from route 796 // lookups. 797 case *tcpip.ErrHostUnreachable, *tcpip.ErrNetworkUnreachable: 798 // We return the original error rather than the result of returning 799 // the ICMP packet because the original error is more relevant to 800 // the caller. 801 _ = e.protocol.returnError(&icmpReasonNetworkUnreachable{}, pkt, false /* deliveredLocally */) 802 return &ip.ErrHostUnreachable{} 803 default: 804 return &ip.ErrOther{Err: err} 805 } 806 defer r.Release() 807 808 // TODO(https://gvisor.dev/issue/7472): Unicast IP options should be updated 809 // using the output endpoint (instead of the input endpoint). In particular, 810 // RFC 1812 section 5.2.1 states the following: 811 // 812 // Processing of certain IP options requires that the router insert its IP 813 // address into the option. As noted in Section [5.2.4], the address 814 // inserted MUST be the address of the logical interface on which the 815 // packet is sent or the router's router-id if the packet is sent over an 816 // unnumbered interface. Thus, processing of these options cannot be 817 // completed until after the output interface is chosen. 818 return e.forwardPacketWithRoute(r, pkt, false /* updateOptions */) 819 } 820 821 // HandlePacket is called by the link layer when new ipv4 packets arrive for 822 // this endpoint. 823 func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) { 824 stats := e.stats.ip 825 826 stats.PacketsReceived.Increment() 827 828 if !e.isEnabled() { 829 stats.DisabledPacketsReceived.Increment() 830 return 831 } 832 833 hView, ok := e.protocol.parseAndValidate(pkt) 834 if !ok { 835 stats.MalformedPacketsReceived.Increment() 836 return 837 } 838 h := header.IPv4(hView.AsSlice()) 839 defer hView.Release() 840 841 if !e.nic.IsLoopback() { 842 if !e.protocol.options.AllowExternalLoopbackTraffic { 843 if header.IsV4LoopbackAddress(h.SourceAddress()) { 844 stats.InvalidSourceAddressesReceived.Increment() 845 return 846 } 847 848 if header.IsV4LoopbackAddress(h.DestinationAddress()) { 849 stats.InvalidDestinationAddressesReceived.Increment() 850 return 851 } 852 } 853 854 if e.protocol.stack.HandleLocal() { 855 addressEndpoint := e.AcquireAssignedAddress(header.IPv4(pkt.NetworkHeader().Slice()).SourceAddress(), e.nic.Promiscuous(), stack.CanBePrimaryEndpoint, true /* readOnly */) 856 if addressEndpoint != nil { 857 // The source address is one of our own, so we never should have gotten 858 // a packet like this unless HandleLocal is false or our NIC is the 859 // loopback interface. 860 stats.InvalidSourceAddressesReceived.Increment() 861 return 862 } 863 } 864 865 // Loopback traffic skips the prerouting chain. 866 inNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 867 if ok := e.protocol.stack.IPTables().CheckPrerouting(pkt, e, inNicName); !ok { 868 // iptables is telling us to drop the packet. 869 stats.IPTablesPreroutingDropped.Increment() 870 return 871 } 872 } 873 874 e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 875 } 876 877 // handleLocalPacket is like HandlePacket except it does not perform the 878 // prerouting iptables hook or check for loopback traffic that originated from 879 // outside of the netstack (i.e. martian loopback packets). 880 func (e *endpoint) handleLocalPacket(pkt *stack.PacketBuffer, canSkipRXChecksum bool) { 881 stats := e.stats.ip 882 stats.PacketsReceived.Increment() 883 884 pkt = pkt.CloneToInbound() 885 defer pkt.DecRef() 886 pkt.RXChecksumValidated = canSkipRXChecksum 887 888 hView, ok := e.protocol.parseAndValidate(pkt) 889 if !ok { 890 stats.MalformedPacketsReceived.Increment() 891 return 892 } 893 h := header.IPv4(hView.AsSlice()) 894 defer hView.Release() 895 896 e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 897 } 898 899 func validateAddressesForForwarding(h header.IPv4) ip.ForwardingError { 900 srcAddr := h.SourceAddress() 901 902 // As per RFC 5735 section 3, 903 // 904 // 0.0.0.0/8 - Addresses in this block refer to source hosts on "this" 905 // network. Address 0.0.0.0/32 may be used as a source address for this 906 // host on this network; other addresses within 0.0.0.0/8 may be used to 907 // refer to specified hosts on this network ([RFC1122], Section 3.2.1.3). 908 // 909 // And RFC 6890 section 2.2.2, 910 // 911 // +----------------------+----------------------------+ 912 // | Attribute | Value | 913 // +----------------------+----------------------------+ 914 // | Address Block | 0.0.0.0/8 | 915 // | Name | "This host on this network"| 916 // | RFC | [RFC1122], Section 3.2.1.3 | 917 // | Allocation Date | September 1981 | 918 // | Termination Date | N/A | 919 // | Source | True | 920 // | Destination | False | 921 // | Forwardable | False | 922 // | Global | False | 923 // | Reserved-by-Protocol | True | 924 // +----------------------+----------------------------+ 925 if header.IPv4CurrentNetworkSubnet.Contains(srcAddr) { 926 return &ip.ErrInitializingSourceAddress{} 927 } 928 929 // As per RFC 3927 section 7, 930 // 931 // A router MUST NOT forward a packet with an IPv4 Link-Local source or 932 // destination address, irrespective of the router's default route 933 // configuration or routes obtained from dynamic routing protocols. 934 // 935 // A router which receives a packet with an IPv4 Link-Local source or 936 // destination address MUST NOT forward the packet. This prevents 937 // forwarding of packets back onto the network segment from which they 938 // originated, or to any other segment. 939 if header.IsV4LinkLocalUnicastAddress(srcAddr) { 940 return &ip.ErrLinkLocalSourceAddress{} 941 } 942 if dstAddr := h.DestinationAddress(); header.IsV4LinkLocalUnicastAddress(dstAddr) || header.IsV4LinkLocalMulticastAddress(dstAddr) { 943 return &ip.ErrLinkLocalDestinationAddress{} 944 } 945 return nil 946 } 947 948 // forwardMulticastPacket validates a multicast pkt and attempts to forward it. 949 // 950 // This method should be invoked for incoming multicast packets using the 951 // endpoint that received the packet. 952 func (e *endpoint) forwardMulticastPacket(h header.IPv4, pkt *stack.PacketBuffer) ip.ForwardingError { 953 if err := validateAddressesForForwarding(h); err != nil { 954 return err 955 } 956 957 if opts := h.Options(); len(opts) != 0 { 958 // Check if the options are valid, but don't mutate them. This corresponds 959 // to step 3 of RFC 1812 section 5.2.1.1. 960 if _, _, optProblem := e.processIPOptions(pkt, opts, &optionUsageVerify{}); optProblem != nil { 961 // Per RFC 1812 section 4.3.2.7, an ICMP error message should not be 962 // sent for: 963 // 964 // A packet destined to an IP broadcast or IP multicast address. 965 // 966 // Note that protocol.returnError also enforces this requirement. 967 // However, we intentionally omit it here since this path is multicast 968 // only. 969 return &ip.ErrParameterProblem{} 970 } 971 } 972 973 routeKey := stack.UnicastSourceAndMulticastDestination{ 974 Source: h.SourceAddress(), 975 Destination: h.DestinationAddress(), 976 } 977 978 // The pkt has been validated. Consequently, if a route is not found, then 979 // the pkt can safely be queued. 980 result, hasBufferSpace := e.protocol.multicastRouteTable.GetRouteOrInsertPending(routeKey, pkt) 981 982 if !hasBufferSpace { 983 // Unable to queue the pkt. Silently drop it. 984 return &ip.ErrNoMulticastPendingQueueBufferSpace{} 985 } 986 987 switch result.GetRouteResultState { 988 case multicast.InstalledRouteFound: 989 // Attempt to forward the pkt using an existing route. 990 return e.forwardValidatedMulticastPacket(pkt, result.InstalledRoute) 991 case multicast.NoRouteFoundAndPendingInserted: 992 e.emitMulticastEvent(func(disp stack.MulticastForwardingEventDispatcher) { 993 disp.OnMissingRoute(stack.MulticastPacketContext{ 994 stack.UnicastSourceAndMulticastDestination{h.SourceAddress(), h.DestinationAddress()}, 995 e.nic.ID(), 996 }) 997 }) 998 case multicast.PacketQueuedInPendingRoute: 999 default: 1000 panic(fmt.Sprintf("unexpected GetRouteResultState: %s", result.GetRouteResultState)) 1001 } 1002 return &ip.ErrHostUnreachable{} 1003 } 1004 1005 func (e *endpoint) updateOptionsForForwarding(pkt *stack.PacketBuffer) ip.ForwardingError { 1006 h := header.IPv4(pkt.NetworkHeader().Slice()) 1007 if opts := h.Options(); len(opts) != 0 { 1008 newOpts, _, optProblem := e.processIPOptions(pkt, opts, &optionUsageForward{}) 1009 if optProblem != nil { 1010 if optProblem.NeedICMP { 1011 // Note that this will not emit an ICMP error if the destination is 1012 // multicast. 1013 _ = e.protocol.returnError(&icmpReasonParamProblem{ 1014 pointer: optProblem.Pointer, 1015 }, pkt, false /* deliveredLocally */) 1016 } 1017 return &ip.ErrParameterProblem{} 1018 } 1019 copied := copy(opts, newOpts) 1020 if copied != len(newOpts) { 1021 panic(fmt.Sprintf("copied %d bytes of new options, expected %d bytes", copied, len(newOpts))) 1022 } 1023 // Since in forwarding we handle all options, including copying those we 1024 // do not recognise, the options region should remain the same size which 1025 // simplifies processing. As we MAY receive a packet with a lot of padded 1026 // bytes after the "end of options list" byte, make sure we copy 1027 // them as the legal padding value (0). 1028 for i := copied; i < len(opts); i++ { 1029 // Pad with 0 (EOL). RFC 791 page 23 says "The padding is zero". 1030 opts[i] = byte(header.IPv4OptionListEndType) 1031 } 1032 } 1033 return nil 1034 } 1035 1036 // forwardValidatedMulticastPacket attempts to forward the pkt using the 1037 // provided installedRoute. 1038 // 1039 // This method should be invoked by the endpoint that received the pkt. 1040 func (e *endpoint) forwardValidatedMulticastPacket(pkt *stack.PacketBuffer, installedRoute *multicast.InstalledRoute) ip.ForwardingError { 1041 // Per RFC 1812 section 5.2.1.3, 1042 // 1043 // Based on the IP source and destination addresses found in the datagram 1044 // header, the router determines whether the datagram has been received 1045 // on the proper interface for forwarding. If not, the datagram is 1046 // dropped silently. 1047 if e.nic.ID() != installedRoute.ExpectedInputInterface { 1048 h := header.IPv4(pkt.NetworkHeader().Slice()) 1049 e.emitMulticastEvent(func(disp stack.MulticastForwardingEventDispatcher) { 1050 disp.OnUnexpectedInputInterface(stack.MulticastPacketContext{ 1051 stack.UnicastSourceAndMulticastDestination{h.SourceAddress(), h.DestinationAddress()}, 1052 e.nic.ID(), 1053 }, installedRoute.ExpectedInputInterface) 1054 }) 1055 return &ip.ErrUnexpectedMulticastInputInterface{} 1056 } 1057 1058 for _, outgoingInterface := range installedRoute.OutgoingInterfaces { 1059 if err := e.forwardMulticastPacketForOutgoingInterface(pkt, outgoingInterface); err != nil { 1060 e.handleForwardingError(err) 1061 continue 1062 } 1063 // The pkt was successfully forwarded. Mark the route as used. 1064 installedRoute.SetLastUsedTimestamp(e.protocol.stack.Clock().NowMonotonic()) 1065 } 1066 return nil 1067 } 1068 1069 // forwardMulticastPacketForOutgoingInterface attempts to forward the pkt out 1070 // of the provided outgoingInterface. 1071 // 1072 // This method should be invoked by the endpoint that received the pkt. 1073 func (e *endpoint) forwardMulticastPacketForOutgoingInterface(pkt *stack.PacketBuffer, outgoingInterface stack.MulticastRouteOutgoingInterface) ip.ForwardingError { 1074 h := header.IPv4(pkt.NetworkHeader().Slice()) 1075 1076 // Per RFC 1812 section 5.2.1.3, 1077 // 1078 // A copy of the multicast datagram is forwarded out each outgoing 1079 // interface whose minimum TTL value is less than or equal to the TTL 1080 // value in the datagram header. 1081 // 1082 // Copying of the packet is deferred to forwardPacketWithRoute since unicast 1083 // and multicast both require a copy. 1084 if outgoingInterface.MinTTL > h.TTL() { 1085 return &ip.ErrTTLExceeded{} 1086 } 1087 1088 route := e.protocol.stack.NewRouteForMulticast(outgoingInterface.ID, h.DestinationAddress(), e.NetworkProtocolNumber()) 1089 1090 if route == nil { 1091 // Failed to convert to a stack.Route. This likely means that the outgoing 1092 // endpoint no longer exists. 1093 return &ip.ErrHostUnreachable{} 1094 } 1095 defer route.Release() 1096 1097 return e.forwardPacketWithRoute(route, pkt, true /* updateOptions */) 1098 } 1099 1100 func (e *endpoint) handleValidatedPacket(h header.IPv4, pkt *stack.PacketBuffer, inNICName string) { 1101 pkt.NICID = e.nic.ID() 1102 1103 // Raw socket packets are delivered based solely on the transport protocol 1104 // number. We only require that the packet be valid IPv4, and that they not 1105 // be fragmented. 1106 if !h.More() && h.FragmentOffset() == 0 { 1107 e.dispatcher.DeliverRawPacket(h.TransportProtocol(), pkt) 1108 } 1109 1110 stats := e.stats 1111 stats.ip.ValidPacketsReceived.Increment() 1112 1113 srcAddr := h.SourceAddress() 1114 dstAddr := h.DestinationAddress() 1115 1116 // As per RFC 1122 section 3.2.1.3: 1117 // When a host sends any datagram, the IP source address MUST 1118 // be one of its own IP addresses (but not a broadcast or 1119 // multicast address). 1120 if srcAddr == header.IPv4Broadcast || header.IsV4MulticastAddress(srcAddr) { 1121 stats.ip.InvalidSourceAddressesReceived.Increment() 1122 return 1123 } 1124 // Make sure the source address is not a subnet-local broadcast address. 1125 if addressEndpoint := e.AcquireAssignedAddress(srcAddr, false /* createTemp */, stack.NeverPrimaryEndpoint, true /* readOnly */); addressEndpoint != nil { 1126 subnet := addressEndpoint.Subnet() 1127 if subnet.IsBroadcast(srcAddr) { 1128 stats.ip.InvalidSourceAddressesReceived.Increment() 1129 return 1130 } 1131 } 1132 1133 if header.IsV4MulticastAddress(dstAddr) { 1134 // Handle all packets destined to a multicast address separately. Unlike 1135 // unicast, these packets can be both delivered locally and forwarded. See 1136 // RFC 1812 section 5.2.3 for details regarding the forwarding/local 1137 // delivery decision. 1138 1139 multicastForwarding := e.MulticastForwarding() && e.protocol.multicastForwarding() 1140 1141 if multicastForwarding { 1142 e.handleForwardingError(e.forwardMulticastPacket(h, pkt)) 1143 } 1144 1145 if e.IsInGroup(dstAddr) { 1146 e.deliverPacketLocally(h, pkt, inNICName) 1147 return 1148 } 1149 1150 if !multicastForwarding { 1151 // Only consider the destination address invalid if we didn't attempt to 1152 // forward the pkt and it was not delivered locally. 1153 stats.ip.InvalidDestinationAddressesReceived.Increment() 1154 } 1155 return 1156 } 1157 1158 // Before we do any processing, check if the packet was received as some 1159 // sort of broadcast. 1160 // 1161 // If the packet is destined for this device, then it should be delivered 1162 // locally. Otherwise, if forwarding is enabled, it should be forwarded. 1163 if addressEndpoint := e.AcquireAssignedAddress(dstAddr, e.nic.Promiscuous(), stack.CanBePrimaryEndpoint, true /* readOnly */); addressEndpoint != nil { 1164 subnet := addressEndpoint.AddressWithPrefix().Subnet() 1165 pkt.NetworkPacketInfo.LocalAddressBroadcast = subnet.IsBroadcast(dstAddr) || dstAddr == header.IPv4Broadcast 1166 e.deliverPacketLocally(h, pkt, inNICName) 1167 } else if e.Forwarding() { 1168 e.handleForwardingError(e.forwardUnicastPacket(pkt)) 1169 } else { 1170 stats.ip.InvalidDestinationAddressesReceived.Increment() 1171 } 1172 } 1173 1174 // handleForwardingError processes the provided err and increments any relevant 1175 // counters. 1176 func (e *endpoint) handleForwardingError(err ip.ForwardingError) { 1177 stats := e.stats.ip 1178 switch err := err.(type) { 1179 case nil: 1180 return 1181 case *ip.ErrInitializingSourceAddress: 1182 stats.Forwarding.InitializingSource.Increment() 1183 case *ip.ErrLinkLocalSourceAddress: 1184 stats.Forwarding.LinkLocalSource.Increment() 1185 case *ip.ErrLinkLocalDestinationAddress: 1186 stats.Forwarding.LinkLocalDestination.Increment() 1187 case *ip.ErrTTLExceeded: 1188 stats.Forwarding.ExhaustedTTL.Increment() 1189 case *ip.ErrHostUnreachable: 1190 stats.Forwarding.Unrouteable.Increment() 1191 case *ip.ErrParameterProblem: 1192 stats.MalformedPacketsReceived.Increment() 1193 case *ip.ErrMessageTooLong: 1194 stats.Forwarding.PacketTooBig.Increment() 1195 case *ip.ErrNoMulticastPendingQueueBufferSpace: 1196 stats.Forwarding.NoMulticastPendingQueueBufferSpace.Increment() 1197 case *ip.ErrUnexpectedMulticastInputInterface: 1198 stats.Forwarding.UnexpectedMulticastInputInterface.Increment() 1199 case *ip.ErrUnknownOutputEndpoint: 1200 stats.Forwarding.UnknownOutputEndpoint.Increment() 1201 case *ip.ErrOutgoingDeviceNoBufferSpace: 1202 stats.Forwarding.OutgoingDeviceNoBufferSpace.Increment() 1203 default: 1204 panic(fmt.Sprintf("unrecognized forwarding error: %s", err)) 1205 } 1206 stats.Forwarding.Errors.Increment() 1207 } 1208 1209 func (e *endpoint) deliverPacketLocally(h header.IPv4, pkt *stack.PacketBuffer, inNICName string) { 1210 stats := e.stats 1211 // iptables filtering. All packets that reach here are intended for 1212 // this machine and will not be forwarded. 1213 if ok := e.protocol.stack.IPTables().CheckInput(pkt, inNICName); !ok { 1214 // iptables is telling us to drop the packet. 1215 stats.ip.IPTablesInputDropped.Increment() 1216 return 1217 } 1218 1219 if h.More() || h.FragmentOffset() != 0 { 1220 if pkt.Data().Size()+len(pkt.TransportHeader().Slice()) == 0 { 1221 // Drop the packet as it's marked as a fragment but has 1222 // no payload. 1223 stats.ip.MalformedPacketsReceived.Increment() 1224 stats.ip.MalformedFragmentsReceived.Increment() 1225 return 1226 } 1227 if opts := h.Options(); len(opts) != 0 { 1228 // If there are options we need to check them before we do assembly 1229 // or we could be assembling errant packets. However we do not change the 1230 // options as that could lead to double processing later. 1231 if _, _, optProblem := e.processIPOptions(pkt, opts, &optionUsageVerify{}); optProblem != nil { 1232 if optProblem.NeedICMP { 1233 _ = e.protocol.returnError(&icmpReasonParamProblem{ 1234 pointer: optProblem.Pointer, 1235 }, pkt, true /* deliveredLocally */) 1236 e.stats.ip.MalformedPacketsReceived.Increment() 1237 } 1238 return 1239 } 1240 } 1241 // The packet is a fragment, let's try to reassemble it. 1242 start := h.FragmentOffset() 1243 // Drop the fragment if the size of the reassembled payload would exceed the 1244 // maximum payload size. 1245 // 1246 // Note that this addition doesn't overflow even on 32bit architecture 1247 // because pkt.Data().Size() should not exceed 65535 (the max IP datagram 1248 // size). Otherwise the packet would've been rejected as invalid before 1249 // reaching here. 1250 if int(start)+pkt.Data().Size() > header.IPv4MaximumPayloadSize { 1251 stats.ip.MalformedPacketsReceived.Increment() 1252 stats.ip.MalformedFragmentsReceived.Increment() 1253 return 1254 } 1255 1256 proto := h.Protocol() 1257 resPkt, transProtoNum, ready, err := e.protocol.fragmentation.Process( 1258 // As per RFC 791 section 2.3, the identification value is unique 1259 // for a source-destination pair and protocol. 1260 fragmentation.FragmentID{ 1261 Source: h.SourceAddress(), 1262 Destination: h.DestinationAddress(), 1263 ID: uint32(h.ID()), 1264 Protocol: proto, 1265 }, 1266 start, 1267 start+uint16(pkt.Data().Size())-1, 1268 h.More(), 1269 proto, 1270 pkt, 1271 ) 1272 if err != nil { 1273 stats.ip.MalformedPacketsReceived.Increment() 1274 stats.ip.MalformedFragmentsReceived.Increment() 1275 return 1276 } 1277 if !ready { 1278 return 1279 } 1280 defer resPkt.DecRef() 1281 pkt = resPkt 1282 h = header.IPv4(pkt.NetworkHeader().Slice()) 1283 1284 // The reassembler doesn't take care of fixing up the header, so we need 1285 // to do it here. 1286 h.SetTotalLength(uint16(pkt.Data().Size() + len(h))) 1287 h.SetFlagsFragmentOffset(0, 0) 1288 1289 e.protocol.parseTransport(pkt, tcpip.TransportProtocolNumber(transProtoNum)) 1290 1291 // Now that the packet is reassembled, it can be sent to raw sockets. 1292 e.dispatcher.DeliverRawPacket(h.TransportProtocol(), pkt) 1293 } 1294 stats.ip.PacketsDelivered.Increment() 1295 1296 p := h.TransportProtocol() 1297 if p == header.ICMPv4ProtocolNumber { 1298 // TODO(gvisor.dev/issues/3810): when we sort out ICMP and transport 1299 // headers, the setting of the transport number here should be 1300 // unnecessary and removed. 1301 pkt.TransportProtocolNumber = p 1302 e.handleICMP(pkt) 1303 return 1304 } 1305 // ICMP handles options itself but do it here for all remaining destinations. 1306 var hasRouterAlertOption bool 1307 if opts := h.Options(); len(opts) != 0 { 1308 newOpts, processedOpts, optProblem := e.processIPOptions(pkt, opts, &optionUsageReceive{}) 1309 if optProblem != nil { 1310 if optProblem.NeedICMP { 1311 _ = e.protocol.returnError(&icmpReasonParamProblem{ 1312 pointer: optProblem.Pointer, 1313 }, pkt, true /* deliveredLocally */) 1314 stats.ip.MalformedPacketsReceived.Increment() 1315 } 1316 return 1317 } 1318 hasRouterAlertOption = processedOpts.routerAlert 1319 copied := copy(opts, newOpts) 1320 if copied != len(newOpts) { 1321 panic(fmt.Sprintf("copied %d bytes of new options, expected %d bytes", copied, len(newOpts))) 1322 } 1323 for i := copied; i < len(opts); i++ { 1324 // Pad with 0 (EOL). RFC 791 page 23 says "The padding is zero". 1325 opts[i] = byte(header.IPv4OptionListEndType) 1326 } 1327 } 1328 if p == header.IGMPProtocolNumber { 1329 e.mu.Lock() 1330 e.igmp.handleIGMP(pkt, hasRouterAlertOption) // +checklocksforce: e == e.igmp.ep. 1331 e.mu.Unlock() 1332 return 1333 } 1334 1335 switch res := e.dispatcher.DeliverTransportPacket(p, pkt); res { 1336 case stack.TransportPacketHandled: 1337 case stack.TransportPacketDestinationPortUnreachable: 1338 // As per RFC: 1122 Section 3.2.2.1 A host SHOULD generate Destination 1339 // Unreachable messages with code: 1340 // 3 (Port Unreachable), when the designated transport protocol 1341 // (e.g., UDP) is unable to demultiplex the datagram but has no 1342 // protocol mechanism to inform the sender. 1343 _ = e.protocol.returnError(&icmpReasonPortUnreachable{}, pkt, true /* deliveredLocally */) 1344 case stack.TransportPacketProtocolUnreachable: 1345 // As per RFC: 1122 Section 3.2.2.1 1346 // A host SHOULD generate Destination Unreachable messages with code: 1347 // 2 (Protocol Unreachable), when the designated transport protocol 1348 // is not supported 1349 _ = e.protocol.returnError(&icmpReasonProtoUnreachable{}, pkt, true /* deliveredLocally */) 1350 default: 1351 panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res)) 1352 } 1353 } 1354 1355 // Close cleans up resources associated with the endpoint. 1356 func (e *endpoint) Close() { 1357 e.mu.Lock() 1358 e.disableLocked() 1359 e.addressableEndpointState.Cleanup() 1360 e.mu.Unlock() 1361 1362 e.protocol.forgetEndpoint(e.nic.ID()) 1363 } 1364 1365 // AddAndAcquirePermanentAddress implements stack.AddressableEndpoint. 1366 func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, properties stack.AddressProperties) (stack.AddressEndpoint, tcpip.Error) { 1367 e.mu.Lock() 1368 defer e.mu.Unlock() 1369 1370 ep, err := e.addressableEndpointState.AddAndAcquireAddress(addr, properties, stack.Permanent) 1371 if err == nil { 1372 e.sendQueuedReports() 1373 } 1374 return ep, err 1375 } 1376 1377 // sendQueuedReports sends queued igmp reports. 1378 // 1379 // +checklocks:e.mu 1380 // +checklocksalias:e.igmp.ep.mu=e.mu 1381 func (e *endpoint) sendQueuedReports() { 1382 e.igmp.sendQueuedReports() 1383 } 1384 1385 // RemovePermanentAddress implements stack.AddressableEndpoint. 1386 func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) tcpip.Error { 1387 e.mu.RLock() 1388 defer e.mu.RUnlock() 1389 return e.addressableEndpointState.RemovePermanentAddress(addr) 1390 } 1391 1392 // SetDeprecated implements stack.AddressableEndpoint. 1393 func (e *endpoint) SetDeprecated(addr tcpip.Address, deprecated bool) tcpip.Error { 1394 e.mu.RLock() 1395 defer e.mu.RUnlock() 1396 return e.addressableEndpointState.SetDeprecated(addr, deprecated) 1397 } 1398 1399 // SetLifetimes implements stack.AddressableEndpoint. 1400 func (e *endpoint) SetLifetimes(addr tcpip.Address, lifetimes stack.AddressLifetimes) tcpip.Error { 1401 e.mu.RLock() 1402 defer e.mu.RUnlock() 1403 return e.addressableEndpointState.SetLifetimes(addr, lifetimes) 1404 } 1405 1406 // MainAddress implements stack.AddressableEndpoint. 1407 func (e *endpoint) MainAddress() tcpip.AddressWithPrefix { 1408 e.mu.RLock() 1409 defer e.mu.RUnlock() 1410 return e.addressableEndpointState.MainAddress() 1411 } 1412 1413 // AcquireAssignedAddress implements stack.AddressableEndpoint. 1414 func (e *endpoint) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior, readOnly bool) stack.AddressEndpoint { 1415 e.mu.RLock() 1416 defer e.mu.RUnlock() 1417 1418 loopback := e.nic.IsLoopback() 1419 return e.addressableEndpointState.AcquireAssignedAddressOrMatching(localAddr, func(addressEndpoint stack.AddressEndpoint) bool { 1420 subnet := addressEndpoint.Subnet() 1421 // IPv4 has a notion of a subnet broadcast address and considers the 1422 // loopback interface bound to an address's whole subnet (on linux). 1423 return subnet.IsBroadcast(localAddr) || (loopback && subnet.Contains(localAddr)) 1424 }, allowTemp, tempPEB, readOnly) 1425 } 1426 1427 // AcquireOutgoingPrimaryAddress implements stack.AddressableEndpoint. 1428 func (e *endpoint) AcquireOutgoingPrimaryAddress(remoteAddr, srcHint tcpip.Address, allowExpired bool) stack.AddressEndpoint { 1429 e.mu.RLock() 1430 defer e.mu.RUnlock() 1431 return e.acquireOutgoingPrimaryAddressRLocked(remoteAddr, srcHint, allowExpired) 1432 } 1433 1434 // acquireOutgoingPrimaryAddressRLocked is like AcquireOutgoingPrimaryAddress 1435 // but with locking requirements 1436 // 1437 // +checklocksread:e.mu 1438 func (e *endpoint) acquireOutgoingPrimaryAddressRLocked(remoteAddr, srcHint tcpip.Address, allowExpired bool) stack.AddressEndpoint { 1439 return e.addressableEndpointState.AcquireOutgoingPrimaryAddress(remoteAddr, srcHint, allowExpired) 1440 } 1441 1442 // PrimaryAddresses implements stack.AddressableEndpoint. 1443 func (e *endpoint) PrimaryAddresses() []tcpip.AddressWithPrefix { 1444 e.mu.RLock() 1445 defer e.mu.RUnlock() 1446 return e.addressableEndpointState.PrimaryAddresses() 1447 } 1448 1449 // PermanentAddresses implements stack.AddressableEndpoint. 1450 func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix { 1451 e.mu.RLock() 1452 defer e.mu.RUnlock() 1453 return e.addressableEndpointState.PermanentAddresses() 1454 } 1455 1456 // JoinGroup implements stack.GroupAddressableEndpoint. 1457 func (e *endpoint) JoinGroup(addr tcpip.Address) tcpip.Error { 1458 e.mu.Lock() 1459 defer e.mu.Unlock() 1460 return e.joinGroupLocked(addr) 1461 } 1462 1463 // joinGroupLocked is like JoinGroup but with locking requirements. 1464 // 1465 // +checklocks:e.mu 1466 // +checklocksalias:e.igmp.ep.mu=e.mu 1467 func (e *endpoint) joinGroupLocked(addr tcpip.Address) tcpip.Error { 1468 if !header.IsV4MulticastAddress(addr) { 1469 return &tcpip.ErrBadAddress{} 1470 } 1471 1472 e.igmp.joinGroup(addr) 1473 return nil 1474 } 1475 1476 // LeaveGroup implements stack.GroupAddressableEndpoint. 1477 func (e *endpoint) LeaveGroup(addr tcpip.Address) tcpip.Error { 1478 e.mu.Lock() 1479 defer e.mu.Unlock() 1480 return e.leaveGroupLocked(addr) 1481 } 1482 1483 // leaveGroupLocked is like LeaveGroup but with locking requirements. 1484 // 1485 // +checklocks:e.mu 1486 // +checklocksalias:e.igmp.ep.mu=e.mu 1487 func (e *endpoint) leaveGroupLocked(addr tcpip.Address) tcpip.Error { 1488 return e.igmp.leaveGroup(addr) 1489 } 1490 1491 // IsInGroup implements stack.GroupAddressableEndpoint. 1492 func (e *endpoint) IsInGroup(addr tcpip.Address) bool { 1493 e.mu.RLock() 1494 defer e.mu.RUnlock() 1495 return e.igmp.isInGroup(addr) // +checklocksforce: e.mu==e.igmp.ep.mu. 1496 } 1497 1498 // Stats implements stack.NetworkEndpoint. 1499 func (e *endpoint) Stats() stack.NetworkEndpointStats { 1500 return &e.stats.localStats 1501 } 1502 1503 var _ stack.NetworkProtocol = (*protocol)(nil) 1504 var _ stack.MulticastForwardingNetworkProtocol = (*protocol)(nil) 1505 var _ stack.RejectIPv4WithHandler = (*protocol)(nil) 1506 var _ fragmentation.TimeoutHandler = (*protocol)(nil) 1507 1508 type protocol struct { 1509 stack *stack.Stack 1510 1511 // mu protects annotated fields below. 1512 mu sync.RWMutex 1513 1514 // eps is keyed by NICID to allow protocol methods to retrieve an endpoint 1515 // when handling a packet, by looking at which NIC handled the packet. 1516 // +checklocks:mu 1517 eps map[tcpip.NICID]*endpoint 1518 1519 // ICMP types for which the stack's global rate limiting must apply. 1520 // +checklocks:mu 1521 icmpRateLimitedTypes map[header.ICMPv4Type]struct{} 1522 1523 // defaultTTL is the current default TTL for the protocol. Only the 1524 // uint8 portion of it is meaningful. 1525 defaultTTL atomicbitops.Uint32 1526 1527 ids []atomicbitops.Uint32 1528 hashIV uint32 1529 // idTS is the unix timestamp in milliseconds 'ids' was last accessed. 1530 idTS atomicbitops.Int64 1531 1532 fragmentation *fragmentation.Fragmentation 1533 1534 options Options 1535 1536 multicastRouteTable multicast.RouteTable 1537 // multicastForwardingDisp is the multicast forwarding event dispatcher that 1538 // an integrator can provide to receive multicast forwarding events. Note 1539 // that multicast packets will only be forwarded if this is non-nil. 1540 // +checklocks:mu 1541 multicastForwardingDisp stack.MulticastForwardingEventDispatcher 1542 } 1543 1544 // Number returns the ipv4 protocol number. 1545 func (p *protocol) Number() tcpip.NetworkProtocolNumber { 1546 return ProtocolNumber 1547 } 1548 1549 // MinimumPacketSize returns the minimum valid ipv4 packet size. 1550 func (p *protocol) MinimumPacketSize() int { 1551 return header.IPv4MinimumSize 1552 } 1553 1554 // ParseAddresses implements stack.NetworkProtocol. 1555 func (*protocol) ParseAddresses(v []byte) (src, dst tcpip.Address) { 1556 h := header.IPv4(v) 1557 return h.SourceAddress(), h.DestinationAddress() 1558 } 1559 1560 // SetOption implements stack.NetworkProtocol. 1561 func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error { 1562 switch v := option.(type) { 1563 case *tcpip.DefaultTTLOption: 1564 p.SetDefaultTTL(uint8(*v)) 1565 return nil 1566 default: 1567 return &tcpip.ErrUnknownProtocolOption{} 1568 } 1569 } 1570 1571 // Option implements stack.NetworkProtocol. 1572 func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error { 1573 switch v := option.(type) { 1574 case *tcpip.DefaultTTLOption: 1575 *v = tcpip.DefaultTTLOption(p.DefaultTTL()) 1576 return nil 1577 default: 1578 return &tcpip.ErrUnknownProtocolOption{} 1579 } 1580 } 1581 1582 // SetDefaultTTL sets the default TTL for endpoints created with this protocol. 1583 func (p *protocol) SetDefaultTTL(ttl uint8) { 1584 p.defaultTTL.Store(uint32(ttl)) 1585 } 1586 1587 // DefaultTTL returns the default TTL for endpoints created with this protocol. 1588 func (p *protocol) DefaultTTL() uint8 { 1589 return uint8(p.defaultTTL.Load()) 1590 } 1591 1592 // Close implements stack.TransportProtocol. 1593 func (p *protocol) Close() { 1594 p.fragmentation.Release() 1595 p.multicastRouteTable.Close() 1596 } 1597 1598 // Wait implements stack.TransportProtocol. 1599 func (*protocol) Wait() {} 1600 1601 func (p *protocol) validateUnicastSourceAndMulticastDestination(addresses stack.UnicastSourceAndMulticastDestination) tcpip.Error { 1602 if !p.isUnicastAddress(addresses.Source) || header.IsV4LinkLocalUnicastAddress(addresses.Source) { 1603 return &tcpip.ErrBadAddress{} 1604 } 1605 1606 if !header.IsV4MulticastAddress(addresses.Destination) || header.IsV4LinkLocalMulticastAddress(addresses.Destination) { 1607 return &tcpip.ErrBadAddress{} 1608 } 1609 1610 return nil 1611 } 1612 1613 func (p *protocol) multicastForwarding() bool { 1614 p.mu.RLock() 1615 defer p.mu.RUnlock() 1616 return p.multicastForwardingDisp != nil 1617 } 1618 1619 func (p *protocol) newInstalledRoute(route stack.MulticastRoute) (*multicast.InstalledRoute, tcpip.Error) { 1620 if len(route.OutgoingInterfaces) == 0 { 1621 return nil, &tcpip.ErrMissingRequiredFields{} 1622 } 1623 1624 if !p.stack.HasNIC(route.ExpectedInputInterface) { 1625 return nil, &tcpip.ErrUnknownNICID{} 1626 } 1627 1628 for _, outgoingInterface := range route.OutgoingInterfaces { 1629 if route.ExpectedInputInterface == outgoingInterface.ID { 1630 return nil, &tcpip.ErrMulticastInputCannotBeOutput{} 1631 } 1632 1633 if !p.stack.HasNIC(outgoingInterface.ID) { 1634 return nil, &tcpip.ErrUnknownNICID{} 1635 } 1636 } 1637 return p.multicastRouteTable.NewInstalledRoute(route), nil 1638 } 1639 1640 // AddMulticastRoute implements stack.MulticastForwardingNetworkProtocol. 1641 func (p *protocol) AddMulticastRoute(addresses stack.UnicastSourceAndMulticastDestination, route stack.MulticastRoute) tcpip.Error { 1642 if !p.multicastForwarding() { 1643 return &tcpip.ErrNotPermitted{} 1644 } 1645 1646 if err := p.validateUnicastSourceAndMulticastDestination(addresses); err != nil { 1647 return err 1648 } 1649 1650 installedRoute, err := p.newInstalledRoute(route) 1651 if err != nil { 1652 return err 1653 } 1654 1655 pendingPackets := p.multicastRouteTable.AddInstalledRoute(addresses, installedRoute) 1656 1657 for _, pkt := range pendingPackets { 1658 p.forwardPendingMulticastPacket(pkt, installedRoute) 1659 } 1660 return nil 1661 } 1662 1663 // RemoveMulticastRoute implements 1664 // stack.MulticastForwardingNetworkProtocol.RemoveMulticastRoute. 1665 func (p *protocol) RemoveMulticastRoute(addresses stack.UnicastSourceAndMulticastDestination) tcpip.Error { 1666 if err := p.validateUnicastSourceAndMulticastDestination(addresses); err != nil { 1667 return err 1668 } 1669 1670 if removed := p.multicastRouteTable.RemoveInstalledRoute(addresses); !removed { 1671 return &tcpip.ErrHostUnreachable{} 1672 } 1673 1674 return nil 1675 } 1676 1677 // EnableMulticastForwarding implements 1678 // stack.MulticastForwardingNetworkProtocol.EnableMulticastForwarding. 1679 func (p *protocol) EnableMulticastForwarding(disp stack.MulticastForwardingEventDispatcher) (bool, tcpip.Error) { 1680 p.mu.Lock() 1681 defer p.mu.Unlock() 1682 1683 if p.multicastForwardingDisp != nil { 1684 return true, nil 1685 } 1686 1687 if disp == nil { 1688 return false, &tcpip.ErrInvalidOptionValue{} 1689 } 1690 1691 p.multicastForwardingDisp = disp 1692 return false, nil 1693 } 1694 1695 // DisableMulticastForwarding implements 1696 // stack.MulticastForwardingNetworkProtocol.DisableMulticastForwarding. 1697 func (p *protocol) DisableMulticastForwarding() { 1698 p.mu.Lock() 1699 defer p.mu.Unlock() 1700 1701 p.multicastForwardingDisp = nil 1702 p.multicastRouteTable.RemoveAllInstalledRoutes() 1703 } 1704 1705 // MulticastRouteLastUsedTime implements 1706 // stack.MulticastForwardingNetworkProtocol. 1707 func (p *protocol) MulticastRouteLastUsedTime(addresses stack.UnicastSourceAndMulticastDestination) (tcpip.MonotonicTime, tcpip.Error) { 1708 if err := p.validateUnicastSourceAndMulticastDestination(addresses); err != nil { 1709 return tcpip.MonotonicTime{}, err 1710 } 1711 1712 timestamp, found := p.multicastRouteTable.GetLastUsedTimestamp(addresses) 1713 1714 if !found { 1715 return tcpip.MonotonicTime{}, &tcpip.ErrHostUnreachable{} 1716 } 1717 1718 return timestamp, nil 1719 } 1720 1721 func (p *protocol) forwardPendingMulticastPacket(pkt *stack.PacketBuffer, installedRoute *multicast.InstalledRoute) { 1722 defer pkt.DecRef() 1723 1724 // Attempt to forward the packet using the endpoint that it originally 1725 // arrived on. This ensures that the packet is only forwarded if it 1726 // matches the route's expected input interface (see 5a of RFC 1812 section 1727 // 5.2.1.3). 1728 ep, ok := p.getEndpointForNIC(pkt.NICID) 1729 1730 if !ok { 1731 // The endpoint that the packet arrived on no longer exists. Silently 1732 // drop the pkt. 1733 return 1734 } 1735 1736 if !ep.MulticastForwarding() { 1737 return 1738 } 1739 1740 ep.handleForwardingError(ep.forwardValidatedMulticastPacket(pkt, installedRoute)) 1741 } 1742 1743 func (p *protocol) isUnicastAddress(addr tcpip.Address) bool { 1744 if addr.BitLen() != header.IPv4AddressSizeBits { 1745 return false 1746 } 1747 1748 if addr == header.IPv4Any || addr == header.IPv4Broadcast { 1749 return false 1750 } 1751 1752 if p.isSubnetLocalBroadcastAddress(addr) { 1753 return false 1754 } 1755 return !header.IsV4MulticastAddress(addr) 1756 } 1757 1758 func (p *protocol) isSubnetLocalBroadcastAddress(addr tcpip.Address) bool { 1759 p.mu.RLock() 1760 defer p.mu.RUnlock() 1761 1762 for _, e := range p.eps { 1763 if addressEndpoint := e.AcquireAssignedAddress(addr, false /* createTemp */, stack.NeverPrimaryEndpoint, true /* readOnly */); addressEndpoint != nil { 1764 subnet := addressEndpoint.Subnet() 1765 if subnet.IsBroadcast(addr) { 1766 return true 1767 } 1768 } 1769 } 1770 return false 1771 } 1772 1773 // parseAndValidate parses the packet (including its transport layer header) and 1774 // returns the parsed IP header. 1775 // 1776 // Returns true if the IP header was successfully parsed. 1777 func (p *protocol) parseAndValidate(pkt *stack.PacketBuffer) (*buffer.View, bool) { 1778 transProtoNum, hasTransportHdr, ok := p.Parse(pkt) 1779 if !ok { 1780 return nil, false 1781 } 1782 1783 h := header.IPv4(pkt.NetworkHeader().Slice()) 1784 // Do not include the link header's size when calculating the size of the IP 1785 // packet. 1786 if !h.IsValid(pkt.Size() - len(pkt.LinkHeader().Slice())) { 1787 return nil, false 1788 } 1789 1790 if !pkt.RXChecksumValidated && !h.IsChecksumValid() { 1791 return nil, false 1792 } 1793 1794 if hasTransportHdr { 1795 p.parseTransport(pkt, transProtoNum) 1796 } 1797 1798 return pkt.NetworkHeader().View(), true 1799 } 1800 1801 func (p *protocol) parseTransport(pkt *stack.PacketBuffer, transProtoNum tcpip.TransportProtocolNumber) { 1802 if transProtoNum == header.ICMPv4ProtocolNumber { 1803 // The transport layer will handle transport layer parsing errors. 1804 _ = parse.ICMPv4(pkt) 1805 return 1806 } 1807 1808 switch err := p.stack.ParsePacketBufferTransport(transProtoNum, pkt); err { 1809 case stack.ParsedOK: 1810 case stack.UnknownTransportProtocol, stack.TransportLayerParseError: 1811 // The transport layer will handle unknown protocols and transport layer 1812 // parsing errors. 1813 default: 1814 panic(fmt.Sprintf("unexpected error parsing transport header = %d", err)) 1815 } 1816 } 1817 1818 // Parse implements stack.NetworkProtocol. 1819 func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) { 1820 if ok := parse.IPv4(pkt); !ok { 1821 return 0, false, false 1822 } 1823 1824 ipHdr := header.IPv4(pkt.NetworkHeader().Slice()) 1825 return ipHdr.TransportProtocol(), !ipHdr.More() && ipHdr.FragmentOffset() == 0, true 1826 } 1827 1828 // allowICMPReply reports whether an ICMP reply with provided type and code may 1829 // be sent following the rate mask options and global ICMP rate limiter. 1830 func (p *protocol) allowICMPReply(icmpType header.ICMPv4Type, code header.ICMPv4Code) bool { 1831 // Mimic linux and never rate limit for PMTU discovery. 1832 // https://github.com/torvalds/linux/blob/9e9fb7655ed585da8f468e29221f0ba194a5f613/net/ipv4/icmp.c#L288 1833 if icmpType == header.ICMPv4DstUnreachable && code == header.ICMPv4FragmentationNeeded { 1834 return true 1835 } 1836 p.mu.RLock() 1837 defer p.mu.RUnlock() 1838 1839 if _, ok := p.icmpRateLimitedTypes[icmpType]; ok { 1840 return p.stack.AllowICMPMessage() 1841 } 1842 return true 1843 } 1844 1845 // SendRejectionError implements stack.RejectIPv4WithHandler. 1846 func (p *protocol) SendRejectionError(pkt *stack.PacketBuffer, rejectWith stack.RejectIPv4WithICMPType, inputHook bool) tcpip.Error { 1847 switch rejectWith { 1848 case stack.RejectIPv4WithICMPNetUnreachable: 1849 return p.returnError(&icmpReasonNetworkUnreachable{}, pkt, inputHook) 1850 case stack.RejectIPv4WithICMPHostUnreachable: 1851 return p.returnError(&icmpReasonHostUnreachable{}, pkt, inputHook) 1852 case stack.RejectIPv4WithICMPPortUnreachable: 1853 return p.returnError(&icmpReasonPortUnreachable{}, pkt, inputHook) 1854 case stack.RejectIPv4WithICMPNetProhibited: 1855 return p.returnError(&icmpReasonNetworkProhibited{}, pkt, inputHook) 1856 case stack.RejectIPv4WithICMPHostProhibited: 1857 return p.returnError(&icmpReasonHostProhibited{}, pkt, inputHook) 1858 case stack.RejectIPv4WithICMPAdminProhibited: 1859 return p.returnError(&icmpReasonAdministrativelyProhibited{}, pkt, inputHook) 1860 default: 1861 panic(fmt.Sprintf("unhandled %[1]T = %[1]d", rejectWith)) 1862 } 1863 } 1864 1865 // calculateNetworkMTU calculates the network-layer payload MTU based on the 1866 // link-layer payload mtu. 1867 func calculateNetworkMTU(linkMTU, networkHeaderSize uint32) (uint32, tcpip.Error) { 1868 if linkMTU < header.IPv4MinimumMTU { 1869 return 0, &tcpip.ErrInvalidEndpointState{} 1870 } 1871 1872 // As per RFC 791 section 3.1, an IPv4 header cannot exceed 60 bytes in 1873 // length: 1874 // The maximal internet header is 60 octets, and a typical internet header 1875 // is 20 octets, allowing a margin for headers of higher level protocols. 1876 if networkHeaderSize > header.IPv4MaximumHeaderSize { 1877 return 0, &tcpip.ErrMalformedHeader{} 1878 } 1879 1880 networkMTU := linkMTU 1881 if networkMTU > MaxTotalSize { 1882 networkMTU = MaxTotalSize 1883 } 1884 1885 return networkMTU - networkHeaderSize, nil 1886 } 1887 1888 func packetMustBeFragmented(pkt *stack.PacketBuffer, networkMTU uint32) bool { 1889 payload := len(pkt.TransportHeader().Slice()) + pkt.Data().Size() 1890 return pkt.GSOOptions.Type == stack.GSONone && uint32(payload) > networkMTU 1891 } 1892 1893 // addressToUint32 translates an IPv4 address into its little endian uint32 1894 // representation. 1895 // 1896 // This function does the same thing as binary.LittleEndian.Uint32 but operates 1897 // on a tcpip.Address (a string) without the need to convert it to a byte slice, 1898 // which would cause an allocation. 1899 func addressToUint32(addr tcpip.Address) uint32 { 1900 addrBytes := addr.As4() 1901 _ = addrBytes[3] // bounds check hint to compiler 1902 return uint32(addrBytes[0]) | uint32(addrBytes[1])<<8 | uint32(addrBytes[2])<<16 | uint32(addrBytes[3])<<24 1903 } 1904 1905 // hashRoute calculates a hash value for the given source/destination pair using 1906 // the addresses, transport protocol number and a 32-bit number to generate the 1907 // hash. 1908 func hashRoute(srcAddr, dstAddr tcpip.Address, protocol tcpip.TransportProtocolNumber, hashIV uint32) uint32 { 1909 a := addressToUint32(srcAddr) 1910 b := addressToUint32(dstAddr) 1911 return hash.Hash3Words(a, b, uint32(protocol), hashIV) 1912 } 1913 1914 // Options holds options to configure a new protocol. 1915 type Options struct { 1916 // IGMP holds options for IGMP. 1917 IGMP IGMPOptions 1918 1919 // AllowExternalLoopbackTraffic indicates that inbound loopback packets (i.e. 1920 // martian loopback packets) should be accepted. 1921 AllowExternalLoopbackTraffic bool 1922 } 1923 1924 // NewProtocolWithOptions returns an IPv4 network protocol. 1925 func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory { 1926 ids := make([]atomicbitops.Uint32, buckets) 1927 1928 // Randomly initialize hashIV and the ids. 1929 r := hash.RandN32(1 + buckets) 1930 for i := range ids { 1931 ids[i] = atomicbitops.FromUint32(r[i]) 1932 } 1933 hashIV := r[buckets] 1934 1935 return func(s *stack.Stack) stack.NetworkProtocol { 1936 p := &protocol{ 1937 stack: s, 1938 ids: ids, 1939 hashIV: hashIV, 1940 defaultTTL: atomicbitops.FromUint32(DefaultTTL), 1941 options: opts, 1942 } 1943 p.fragmentation = fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock(), p) 1944 p.eps = make(map[tcpip.NICID]*endpoint) 1945 // Set ICMP rate limiting to Linux defaults. 1946 // See https://man7.org/linux/man-pages/man7/icmp.7.html. 1947 p.icmpRateLimitedTypes = map[header.ICMPv4Type]struct{}{ 1948 header.ICMPv4DstUnreachable: {}, 1949 header.ICMPv4SrcQuench: {}, 1950 header.ICMPv4TimeExceeded: {}, 1951 header.ICMPv4ParamProblem: {}, 1952 } 1953 if err := p.multicastRouteTable.Init(multicast.DefaultConfig(s.Clock())); err != nil { 1954 panic(fmt.Sprintf("p.multicastRouteTable.Init(_): %s", err)) 1955 } 1956 return p 1957 } 1958 } 1959 1960 // NewProtocol is equivalent to NewProtocolWithOptions with an empty Options. 1961 func NewProtocol(s *stack.Stack) stack.NetworkProtocol { 1962 return NewProtocolWithOptions(Options{})(s) 1963 } 1964 1965 func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeader header.IPv4) (*stack.PacketBuffer, bool) { 1966 fragPkt, offset, copied, more := pf.BuildNextFragment() 1967 fragPkt.NetworkProtocolNumber = ProtocolNumber 1968 1969 originalIPHeaderLength := len(originalIPHeader) 1970 nextFragIPHeader := header.IPv4(fragPkt.NetworkHeader().Push(originalIPHeaderLength)) 1971 fragPkt.NetworkProtocolNumber = ProtocolNumber 1972 1973 if copied := copy(nextFragIPHeader, originalIPHeader); copied != len(originalIPHeader) { 1974 panic(fmt.Sprintf("wrong number of bytes copied into fragmentIPHeaders: got = %d, want = %d", copied, originalIPHeaderLength)) 1975 } 1976 1977 flags := originalIPHeader.Flags() 1978 if more { 1979 flags |= header.IPv4FlagMoreFragments 1980 } 1981 nextFragIPHeader.SetFlagsFragmentOffset(flags, uint16(offset)) 1982 nextFragIPHeader.SetTotalLength(uint16(nextFragIPHeader.HeaderLength()) + uint16(copied)) 1983 nextFragIPHeader.SetChecksum(0) 1984 nextFragIPHeader.SetChecksum(^nextFragIPHeader.CalculateChecksum()) 1985 1986 return fragPkt, more 1987 } 1988 1989 // optionAction describes possible actions that may be taken on an option 1990 // while processing it. 1991 type optionAction uint8 1992 1993 const ( 1994 // optionRemove says that the option should not be in the output option set. 1995 optionRemove optionAction = iota 1996 1997 // optionProcess says that the option should be fully processed. 1998 optionProcess 1999 2000 // optionVerify says the option should be checked and passed unchanged. 2001 optionVerify 2002 2003 // optionPass says to pass the output set without checking. 2004 optionPass 2005 ) 2006 2007 // optionActions list what to do for each option in a given scenario. 2008 type optionActions struct { 2009 // timestamp controls what to do with a Timestamp option. 2010 timestamp optionAction 2011 2012 // recordRoute controls what to do with a Record Route option. 2013 recordRoute optionAction 2014 2015 // routerAlert controls what to do with a Router Alert option. 2016 routerAlert optionAction 2017 2018 // unknown controls what to do with an unknown option. 2019 unknown optionAction 2020 } 2021 2022 // optionsUsage specifies the ways options may be operated upon for a given 2023 // scenario during packet processing. 2024 type optionsUsage interface { 2025 actions() optionActions 2026 } 2027 2028 // optionUsageVerify implements optionsUsage for when we just want to check 2029 // fragments. Don't change anything, just check and reject if bad. No 2030 // replacement options are generated. 2031 type optionUsageVerify struct{} 2032 2033 // actions implements optionsUsage. 2034 func (*optionUsageVerify) actions() optionActions { 2035 return optionActions{ 2036 timestamp: optionVerify, 2037 recordRoute: optionVerify, 2038 routerAlert: optionVerify, 2039 unknown: optionRemove, 2040 } 2041 } 2042 2043 // optionUsageReceive implements optionsUsage for packets we will pass 2044 // to the transport layer (with the exception of Echo requests). 2045 type optionUsageReceive struct{} 2046 2047 // actions implements optionsUsage. 2048 func (*optionUsageReceive) actions() optionActions { 2049 return optionActions{ 2050 timestamp: optionProcess, 2051 recordRoute: optionProcess, 2052 routerAlert: optionVerify, 2053 unknown: optionPass, 2054 } 2055 } 2056 2057 // optionUsageForward implements optionsUsage for packets about to be forwarded. 2058 // All options are passed on regardless of whether we recognise them, however 2059 // we do process the Timestamp and Record Route options. 2060 type optionUsageForward struct{} 2061 2062 // actions implements optionsUsage. 2063 func (*optionUsageForward) actions() optionActions { 2064 return optionActions{ 2065 timestamp: optionProcess, 2066 recordRoute: optionProcess, 2067 routerAlert: optionVerify, 2068 unknown: optionPass, 2069 } 2070 } 2071 2072 // optionUsageEcho implements optionsUsage for echo packet processing. 2073 // Only Timestamp and RecordRoute are processed and sent back. 2074 type optionUsageEcho struct{} 2075 2076 // actions implements optionsUsage. 2077 func (*optionUsageEcho) actions() optionActions { 2078 return optionActions{ 2079 timestamp: optionProcess, 2080 recordRoute: optionProcess, 2081 routerAlert: optionVerify, 2082 unknown: optionRemove, 2083 } 2084 } 2085 2086 // handleTimestamp does any required processing on a Timestamp option 2087 // in place. 2088 func handleTimestamp(tsOpt header.IPv4OptionTimestamp, localAddress tcpip.Address, clock tcpip.Clock, usage optionsUsage) *header.IPv4OptParameterProblem { 2089 flags := tsOpt.Flags() 2090 var entrySize uint8 2091 switch flags { 2092 case header.IPv4OptionTimestampOnlyFlag: 2093 entrySize = header.IPv4OptionTimestampSize 2094 case 2095 header.IPv4OptionTimestampWithIPFlag, 2096 header.IPv4OptionTimestampWithPredefinedIPFlag: 2097 entrySize = header.IPv4OptionTimestampWithAddrSize 2098 default: 2099 return &header.IPv4OptParameterProblem{ 2100 Pointer: header.IPv4OptTSOFLWAndFLGOffset, 2101 NeedICMP: true, 2102 } 2103 } 2104 2105 pointer := tsOpt.Pointer() 2106 // RFC 791 page 22 states: "The smallest legal value is 5." 2107 // Since the pointer is 1 based, and the header is 4 bytes long the 2108 // pointer must point beyond the header therefore 4 or less is bad. 2109 if pointer <= header.IPv4OptionTimestampHdrLength { 2110 return &header.IPv4OptParameterProblem{ 2111 Pointer: header.IPv4OptTSPointerOffset, 2112 NeedICMP: true, 2113 } 2114 } 2115 // To simplify processing below, base further work on the array of timestamps 2116 // beyond the header, rather than on the whole option. Also to aid 2117 // calculations set 'nextSlot' to be 0 based as in the packet it is 1 based. 2118 nextSlot := pointer - (header.IPv4OptionTimestampHdrLength + 1) 2119 optLen := tsOpt.Size() 2120 dataLength := optLen - header.IPv4OptionTimestampHdrLength 2121 2122 // In the section below, we verify the pointer, length and overflow counter 2123 // fields of the option. The distinction is in which byte you return as being 2124 // in error in the ICMP packet. Offsets 1 (length), 2 pointer) 2125 // or 3 (overflowed counter). 2126 // 2127 // The following RFC sections cover this section: 2128 // 2129 // RFC 791 (page 22): 2130 // If there is some room but not enough room for a full timestamp 2131 // to be inserted, or the overflow count itself overflows, the 2132 // original datagram is considered to be in error and is discarded. 2133 // In either case an ICMP parameter problem message may be sent to 2134 // the source host [3]. 2135 // 2136 // You can get this situation in two ways. Firstly if the data area is not 2137 // a multiple of the entry size or secondly, if the pointer is not at a 2138 // multiple of the entry size. The wording of the RFC suggests that 2139 // this is not an error until you actually run out of space. 2140 if pointer > optLen { 2141 // RFC 791 (page 22) says we should switch to using the overflow count. 2142 // If the timestamp data area is already full (the pointer exceeds 2143 // the length) the datagram is forwarded without inserting the 2144 // timestamp, but the overflow count is incremented by one. 2145 if flags == header.IPv4OptionTimestampWithPredefinedIPFlag { 2146 // By definition we have nothing to do. 2147 return nil 2148 } 2149 2150 if tsOpt.IncOverflow() != 0 { 2151 return nil 2152 } 2153 // The overflow count is also full. 2154 return &header.IPv4OptParameterProblem{ 2155 Pointer: header.IPv4OptTSOFLWAndFLGOffset, 2156 NeedICMP: true, 2157 } 2158 } 2159 if nextSlot+entrySize > dataLength { 2160 // The data area isn't full but there isn't room for a new entry. 2161 // Either Length or Pointer could be bad. 2162 if false { 2163 // We must select Pointer for Linux compatibility, even if 2164 // only the length is bad. 2165 // The Linux code is at (in October 2020) 2166 // https://github.com/torvalds/linux/blob/bbf5c979011a099af5dc76498918ed7df445635b/net/ipv4/ip_options.c#L367-L370 2167 // if (optptr[2]+3 > optlen) { 2168 // pp_ptr = optptr + 2; 2169 // goto error; 2170 // } 2171 // which doesn't distinguish between which of optptr[2] or optlen 2172 // is wrong, but just arbitrarily decides on optptr+2. 2173 if dataLength%entrySize != 0 { 2174 // The Data section size should be a multiple of the expected 2175 // timestamp entry size. 2176 return &header.IPv4OptParameterProblem{ 2177 Pointer: header.IPv4OptionLengthOffset, 2178 NeedICMP: false, 2179 } 2180 } 2181 // If the size is OK, the pointer must be corrupted. 2182 } 2183 return &header.IPv4OptParameterProblem{ 2184 Pointer: header.IPv4OptTSPointerOffset, 2185 NeedICMP: true, 2186 } 2187 } 2188 2189 if usage.actions().timestamp == optionProcess { 2190 tsOpt.UpdateTimestamp(localAddress, clock) 2191 } 2192 return nil 2193 } 2194 2195 // handleRecordRoute checks and processes a Record route option. It is much 2196 // like the timestamp type 1 option, but without timestamps. The passed in 2197 // address is stored in the option in the correct spot if possible. 2198 func handleRecordRoute(rrOpt header.IPv4OptionRecordRoute, localAddress tcpip.Address, usage optionsUsage) *header.IPv4OptParameterProblem { 2199 optlen := rrOpt.Size() 2200 2201 if optlen < header.IPv4AddressSize+header.IPv4OptionRecordRouteHdrLength { 2202 return &header.IPv4OptParameterProblem{ 2203 Pointer: header.IPv4OptionLengthOffset, 2204 NeedICMP: true, 2205 } 2206 } 2207 2208 pointer := rrOpt.Pointer() 2209 // RFC 791 page 20 states: 2210 // The pointer is relative to this option, and the 2211 // smallest legal value for the pointer is 4. 2212 // Since the pointer is 1 based, and the header is 3 bytes long the 2213 // pointer must point beyond the header therefore 3 or less is bad. 2214 if pointer <= header.IPv4OptionRecordRouteHdrLength { 2215 return &header.IPv4OptParameterProblem{ 2216 Pointer: header.IPv4OptRRPointerOffset, 2217 NeedICMP: true, 2218 } 2219 } 2220 2221 // RFC 791 page 21 says 2222 // If the route data area is already full (the pointer exceeds the 2223 // length) the datagram is forwarded without inserting the address 2224 // into the recorded route. If there is some room but not enough 2225 // room for a full address to be inserted, the original datagram is 2226 // considered to be in error and is discarded. In either case an 2227 // ICMP parameter problem message may be sent to the source 2228 // host. 2229 // The use of the words "In either case" suggests that a 'full' RR option 2230 // could generate an ICMP at every hop after it fills up. We chose to not 2231 // do this (as do most implementations). It is probable that the inclusion 2232 // of these words is a copy/paste error from the timestamp option where 2233 // there are two failure reasons given. 2234 if pointer > optlen { 2235 return nil 2236 } 2237 2238 // The data area isn't full but there isn't room for a new entry. 2239 // Either Length or Pointer could be bad. We must select Pointer for Linux 2240 // compatibility, even if only the length is bad. NB. pointer is 1 based. 2241 if pointer+header.IPv4AddressSize > optlen+1 { 2242 if false { 2243 // This is what we would do if we were not being Linux compatible. 2244 // Check for bad pointer or length value. Must be a multiple of 4 after 2245 // accounting for the 3 byte header and not within that header. 2246 // RFC 791, page 20 says: 2247 // The pointer is relative to this option, and the 2248 // smallest legal value for the pointer is 4. 2249 // 2250 // A recorded route is composed of a series of internet addresses. 2251 // Each internet address is 32 bits or 4 octets. 2252 // Linux skips this test so we must too. See Linux code at: 2253 // https://github.com/torvalds/linux/blob/bbf5c979011a099af5dc76498918ed7df445635b/net/ipv4/ip_options.c#L338-L341 2254 // if (optptr[2]+3 > optlen) { 2255 // pp_ptr = optptr + 2; 2256 // goto error; 2257 // } 2258 if (optlen-header.IPv4OptionRecordRouteHdrLength)%header.IPv4AddressSize != 0 { 2259 // Length is bad, not on integral number of slots. 2260 return &header.IPv4OptParameterProblem{ 2261 Pointer: header.IPv4OptionLengthOffset, 2262 NeedICMP: true, 2263 } 2264 } 2265 // If not length, the fault must be with the pointer. 2266 } 2267 return &header.IPv4OptParameterProblem{ 2268 Pointer: header.IPv4OptRRPointerOffset, 2269 NeedICMP: true, 2270 } 2271 } 2272 if usage.actions().recordRoute == optionVerify { 2273 return nil 2274 } 2275 rrOpt.StoreAddress(localAddress) 2276 return nil 2277 } 2278 2279 // handleRouterAlert performs sanity checks on a Router Alert option. 2280 func handleRouterAlert(raOpt header.IPv4OptionRouterAlert) *header.IPv4OptParameterProblem { 2281 // Only the zero value is acceptable, as per RFC 2113, section 2.1: 2282 // Value: A two octet code with the following values: 2283 // 0 - Router shall examine packet 2284 // 1-65535 - Reserved 2285 if raOpt.Value() != header.IPv4OptionRouterAlertValue { 2286 return &header.IPv4OptParameterProblem{ 2287 Pointer: header.IPv4OptionRouterAlertValueOffset, 2288 NeedICMP: true, 2289 } 2290 } 2291 return nil 2292 } 2293 2294 type optionTracker struct { 2295 timestamp bool 2296 recordRoute bool 2297 routerAlert bool 2298 } 2299 2300 // processIPOptions parses the IPv4 options and produces a new set of options 2301 // suitable for use in the next step of packet processing as informed by usage. 2302 // The original will not be touched. 2303 // 2304 // If there were no errors during parsing, the new set of options is returned as 2305 // a new buffer. 2306 func (e *endpoint) processIPOptions(pkt *stack.PacketBuffer, opts header.IPv4Options, usage optionsUsage) (header.IPv4Options, optionTracker, *header.IPv4OptParameterProblem) { 2307 stats := e.stats.ip 2308 optIter := opts.MakeIterator() 2309 2310 // Except NOP, each option must only appear at most once (RFC 791 section 3.1, 2311 // at the definition of every type). 2312 // Keep track of each option we find to enable duplicate option detection. 2313 var seenOptions [math.MaxUint8 + 1]bool 2314 2315 // TODO(https://gvisor.dev/issue/4586): This will need tweaking when we start 2316 // really forwarding packets as we may need to get two addresses, for rx and 2317 // tx interfaces. We will also have to take usage into account. 2318 localAddress := e.MainAddress().Address 2319 if localAddress.BitLen() == 0 { 2320 h := header.IPv4(pkt.NetworkHeader().Slice()) 2321 dstAddr := h.DestinationAddress() 2322 if pkt.NetworkPacketInfo.LocalAddressBroadcast || header.IsV4MulticastAddress(dstAddr) { 2323 return nil, optionTracker{}, &header.IPv4OptParameterProblem{ 2324 NeedICMP: false, 2325 } 2326 } 2327 localAddress = dstAddr 2328 } 2329 2330 var optionsProcessed optionTracker 2331 for { 2332 option, done, optProblem := optIter.Next() 2333 if done || optProblem != nil { 2334 return optIter.Finalize(), optionsProcessed, optProblem 2335 } 2336 optType := option.Type() 2337 if optType == header.IPv4OptionNOPType { 2338 optIter.PushNOPOrEnd(optType) 2339 continue 2340 } 2341 if optType == header.IPv4OptionListEndType { 2342 optIter.PushNOPOrEnd(optType) 2343 return optIter.Finalize(), optionsProcessed, nil 2344 } 2345 2346 // check for repeating options (multiple NOPs are OK) 2347 if seenOptions[optType] { 2348 return nil, optionTracker{}, &header.IPv4OptParameterProblem{ 2349 Pointer: optIter.ErrCursor, 2350 NeedICMP: true, 2351 } 2352 } 2353 seenOptions[optType] = true 2354 2355 optLen, optProblem := func() (int, *header.IPv4OptParameterProblem) { 2356 switch option := option.(type) { 2357 case *header.IPv4OptionTimestamp: 2358 stats.OptionTimestampReceived.Increment() 2359 optionsProcessed.timestamp = true 2360 if usage.actions().timestamp != optionRemove { 2361 clock := e.protocol.stack.Clock() 2362 newBuffer := optIter.InitReplacement(option) 2363 optProblem := handleTimestamp(header.IPv4OptionTimestamp(newBuffer), localAddress, clock, usage) 2364 return len(newBuffer), optProblem 2365 } 2366 2367 case *header.IPv4OptionRecordRoute: 2368 stats.OptionRecordRouteReceived.Increment() 2369 optionsProcessed.recordRoute = true 2370 if usage.actions().recordRoute != optionRemove { 2371 newBuffer := optIter.InitReplacement(option) 2372 optProblem := handleRecordRoute(header.IPv4OptionRecordRoute(newBuffer), localAddress, usage) 2373 return len(newBuffer), optProblem 2374 } 2375 2376 case *header.IPv4OptionRouterAlert: 2377 stats.OptionRouterAlertReceived.Increment() 2378 optionsProcessed.routerAlert = true 2379 if usage.actions().routerAlert != optionRemove { 2380 newBuffer := optIter.InitReplacement(option) 2381 optProblem := handleRouterAlert(header.IPv4OptionRouterAlert(newBuffer)) 2382 return len(newBuffer), optProblem 2383 } 2384 2385 default: 2386 stats.OptionUnknownReceived.Increment() 2387 if usage.actions().unknown == optionPass { 2388 return len(optIter.InitReplacement(option)), nil 2389 } 2390 } 2391 return 0, nil 2392 }() 2393 2394 if optProblem != nil { 2395 optProblem.Pointer += optIter.ErrCursor 2396 return nil, optionTracker{}, optProblem 2397 } 2398 optIter.ConsumeBuffer(optLen) 2399 } 2400 }