github.com/sagernet/gvisor@v0.0.0-20240428053021-e691de28565f/pkg/tcpip/network/ipv4/ipv4.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package ipv4 contains the implementation of the ipv4 network protocol. 16 package ipv4 17 18 import ( 19 "fmt" 20 "math" 21 "reflect" 22 "time" 23 24 "github.com/sagernet/gvisor/pkg/atomicbitops" 25 "github.com/sagernet/gvisor/pkg/buffer" 26 "github.com/sagernet/gvisor/pkg/sync" 27 "github.com/sagernet/gvisor/pkg/tcpip" 28 "github.com/sagernet/gvisor/pkg/tcpip/header" 29 "github.com/sagernet/gvisor/pkg/tcpip/header/parse" 30 "github.com/sagernet/gvisor/pkg/tcpip/network/hash" 31 "github.com/sagernet/gvisor/pkg/tcpip/network/internal/fragmentation" 32 "github.com/sagernet/gvisor/pkg/tcpip/network/internal/ip" 33 "github.com/sagernet/gvisor/pkg/tcpip/network/internal/multicast" 34 "github.com/sagernet/gvisor/pkg/tcpip/stack" 35 ) 36 37 const ( 38 // ReassembleTimeout is the time a packet stays in the reassembly 39 // system before being evicted. 40 // As per RFC 791 section 3.2: 41 // The current recommendation for the initial timer setting is 15 seconds. 42 // This may be changed as experience with this protocol accumulates. 43 // 44 // Considering that it is an old recommendation, we use the same reassembly 45 // timeout that linux defines, which is 30 seconds: 46 // https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/include/net/ip.h#L138 47 ReassembleTimeout = 30 * time.Second 48 49 // ProtocolNumber is the ipv4 protocol number. 50 ProtocolNumber = header.IPv4ProtocolNumber 51 52 // MaxTotalSize is maximum size that can be encoded in the 16-bit 53 // TotalLength field of the ipv4 header. 54 MaxTotalSize = 0xffff 55 56 // DefaultTTL is the default time-to-live value for this endpoint. 57 DefaultTTL = 64 58 59 // buckets is the number of identifier buckets. 60 buckets = 2048 61 62 // The size of a fragment block, in bytes, as per RFC 791 section 3.1, 63 // page 14. 64 fragmentblockSize = 8 65 ) 66 67 const ( 68 forwardingDisabled = 0 69 forwardingEnabled = 1 70 ) 71 72 var ipv4BroadcastAddr = header.IPv4Broadcast.WithPrefix() 73 74 var _ stack.LinkResolvableNetworkEndpoint = (*endpoint)(nil) 75 var _ stack.ForwardingNetworkEndpoint = (*endpoint)(nil) 76 var _ stack.MulticastForwardingNetworkEndpoint = (*endpoint)(nil) 77 var _ stack.GroupAddressableEndpoint = (*endpoint)(nil) 78 var _ stack.AddressableEndpoint = (*endpoint)(nil) 79 var _ stack.NetworkEndpoint = (*endpoint)(nil) 80 var _ IGMPEndpoint = (*endpoint)(nil) 81 82 type endpoint struct { 83 nic stack.NetworkInterface 84 dispatcher stack.TransportDispatcher 85 protocol *protocol 86 stats sharedStats 87 88 // enabled is set to 1 when the endpoint is enabled and 0 when it is 89 // disabled. 90 enabled atomicbitops.Uint32 91 92 // forwarding is set to forwardingEnabled when the endpoint has forwarding 93 // enabled and forwardingDisabled when it is disabled. 94 forwarding atomicbitops.Uint32 95 96 // multicastForwarding is set to forwardingEnabled when the endpoint has 97 // forwarding enabled and forwardingDisabled when it is disabled. 98 // 99 // TODO(https://gvisor.dev/issue/7338): Implement support for multicast 100 //forwarding. Currently, setting this value to true is a no-op. 101 multicastForwarding atomicbitops.Uint32 102 103 // mu protects below. 104 mu sync.RWMutex 105 106 // +checklocks:mu 107 addressableEndpointState stack.AddressableEndpointState 108 109 // +checklocks:mu 110 igmp igmpState 111 } 112 113 // SetIGMPVersion implements IGMPEndpoint. 114 func (e *endpoint) SetIGMPVersion(v IGMPVersion) IGMPVersion { 115 e.mu.Lock() 116 defer e.mu.Unlock() 117 return e.setIGMPVersionLocked(v) 118 } 119 120 // GetIGMPVersion implements IGMPEndpoint. 121 func (e *endpoint) GetIGMPVersion() IGMPVersion { 122 e.mu.RLock() 123 defer e.mu.RUnlock() 124 return e.getIGMPVersionLocked() 125 } 126 127 // +checklocks:e.mu 128 // +checklocksalias:e.igmp.ep.mu=e.mu 129 func (e *endpoint) setIGMPVersionLocked(v IGMPVersion) IGMPVersion { 130 return e.igmp.setVersion(v) 131 } 132 133 // +checklocksread:e.mu 134 // +checklocksalias:e.igmp.ep.mu=e.mu 135 func (e *endpoint) getIGMPVersionLocked() IGMPVersion { 136 return e.igmp.getVersion() 137 } 138 139 // HandleLinkResolutionFailure implements stack.LinkResolvableNetworkEndpoint. 140 func (e *endpoint) HandleLinkResolutionFailure(pkt *stack.PacketBuffer) { 141 // If we are operating as a router, return an ICMP error to the original 142 // packet's sender. 143 if pkt.NetworkPacketInfo.IsForwardedPacket { 144 // TODO(gvisor.dev/issue/6005): Propagate asynchronously generated ICMP 145 // errors to local endpoints. 146 e.protocol.returnError(&icmpReasonHostUnreachable{}, pkt, false /* deliveredLocally */) 147 e.stats.ip.Forwarding.Errors.Increment() 148 e.stats.ip.Forwarding.HostUnreachable.Increment() 149 return 150 } 151 // handleControl expects the entire offending packet to be in the packet 152 // buffer's data field. 153 pkt = stack.NewPacketBuffer(stack.PacketBufferOptions{ 154 Payload: pkt.ToBuffer(), 155 }) 156 defer pkt.DecRef() 157 pkt.NICID = e.nic.ID() 158 pkt.NetworkProtocolNumber = ProtocolNumber 159 // Use the same control type as an ICMPv4 destination host unreachable error 160 // since the host is considered unreachable if we cannot resolve the link 161 // address to the next hop. 162 e.handleControl(&icmpv4DestinationHostUnreachableSockError{}, pkt) 163 } 164 165 // NewEndpoint creates a new ipv4 endpoint. 166 func (p *protocol) NewEndpoint(nic stack.NetworkInterface, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint { 167 e := &endpoint{ 168 nic: nic, 169 dispatcher: dispatcher, 170 protocol: p, 171 } 172 e.mu.Lock() 173 e.addressableEndpointState.Init(e, stack.AddressableEndpointStateOptions{HiddenWhileDisabled: false}) 174 e.igmp.init(e) 175 e.mu.Unlock() 176 177 tcpip.InitStatCounters(reflect.ValueOf(&e.stats.localStats).Elem()) 178 179 stackStats := p.stack.Stats() 180 e.stats.ip.Init(&e.stats.localStats.IP, &stackStats.IP) 181 e.stats.icmp.init(&e.stats.localStats.ICMP, &stackStats.ICMP.V4) 182 e.stats.igmp.init(&e.stats.localStats.IGMP, &stackStats.IGMP) 183 184 p.mu.Lock() 185 p.eps[nic.ID()] = e 186 p.mu.Unlock() 187 188 return e 189 } 190 191 func (p *protocol) findEndpointWithAddress(addr tcpip.Address) *endpoint { 192 p.mu.RLock() 193 defer p.mu.RUnlock() 194 195 for _, e := range p.eps { 196 if addressEndpoint := e.AcquireAssignedAddress(addr, false /* allowTemp */, stack.NeverPrimaryEndpoint, true /* readOnly */); addressEndpoint != nil { 197 return e 198 } 199 } 200 201 return nil 202 } 203 204 func (p *protocol) getEndpointForNIC(id tcpip.NICID) (*endpoint, bool) { 205 p.mu.RLock() 206 defer p.mu.RUnlock() 207 ep, ok := p.eps[id] 208 return ep, ok 209 } 210 211 func (p *protocol) forgetEndpoint(nicID tcpip.NICID) { 212 p.mu.Lock() 213 defer p.mu.Unlock() 214 delete(p.eps, nicID) 215 } 216 217 // Forwarding implements stack.ForwardingNetworkEndpoint. 218 func (e *endpoint) Forwarding() bool { 219 return e.forwarding.Load() == forwardingEnabled 220 } 221 222 // setForwarding sets the forwarding status for the endpoint. 223 // 224 // Returns the previous forwarding status. 225 func (e *endpoint) setForwarding(v bool) bool { 226 forwarding := uint32(forwardingDisabled) 227 if v { 228 forwarding = forwardingEnabled 229 } 230 231 return e.forwarding.Swap(forwarding) != forwardingDisabled 232 } 233 234 // SetForwarding implements stack.ForwardingNetworkEndpoint. 235 func (e *endpoint) SetForwarding(forwarding bool) bool { 236 e.mu.Lock() 237 defer e.mu.Unlock() 238 239 prevForwarding := e.setForwarding(forwarding) 240 if prevForwarding == forwarding { 241 return prevForwarding 242 } 243 244 if forwarding { 245 // There does not seem to be an RFC requirement for a node to join the all 246 // routers multicast address but 247 // https://www.iana.org/assignments/multicast-addresses/multicast-addresses.xhtml 248 // specifies the address as a group for all routers on a subnet so we join 249 // the group here. 250 if err := e.joinGroupLocked(header.IPv4AllRoutersGroup); err != nil { 251 // joinGroupLocked only returns an error if the group address is not a 252 // valid IPv4 multicast address. 253 panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv4AllRoutersGroup, err)) 254 } 255 256 return prevForwarding 257 } 258 259 switch err := e.leaveGroupLocked(header.IPv4AllRoutersGroup).(type) { 260 case nil: 261 case *tcpip.ErrBadLocalAddress: 262 // The endpoint may have already left the multicast group. 263 default: 264 panic(fmt.Sprintf("e.leaveGroupLocked(%s): %s", header.IPv4AllRoutersGroup, err)) 265 } 266 267 return prevForwarding 268 } 269 270 // MulticastForwarding implements stack.MulticastForwardingNetworkEndpoint. 271 func (e *endpoint) MulticastForwarding() bool { 272 return e.multicastForwarding.Load() == forwardingEnabled 273 } 274 275 // SetMulticastForwarding implements stack.MulticastForwardingNetworkEndpoint. 276 func (e *endpoint) SetMulticastForwarding(forwarding bool) bool { 277 updatedForwarding := uint32(forwardingDisabled) 278 if forwarding { 279 updatedForwarding = forwardingEnabled 280 } 281 282 return e.multicastForwarding.Swap(updatedForwarding) != forwardingDisabled 283 } 284 285 // Enable implements stack.NetworkEndpoint. 286 func (e *endpoint) Enable() tcpip.Error { 287 e.mu.Lock() 288 defer e.mu.Unlock() 289 return e.enableLocked() 290 } 291 292 // +checklocks:e.mu 293 // +checklocksalias:e.igmp.ep.mu=e.mu 294 func (e *endpoint) enableLocked() tcpip.Error { 295 // If the NIC is not enabled, the endpoint can't do anything meaningful so 296 // don't enable the endpoint. 297 if !e.nic.Enabled() { 298 return &tcpip.ErrNotPermitted{} 299 } 300 301 // If the endpoint is already enabled, there is nothing for it to do. 302 if !e.setEnabled(true) { 303 return nil 304 } 305 306 // Must be called after Enabled has already been set. 307 e.addressableEndpointState.OnNetworkEndpointEnabledChanged() 308 309 // Create an endpoint to receive broadcast packets on this interface. 310 ep, err := e.addressableEndpointState.AddAndAcquirePermanentAddress(ipv4BroadcastAddr, stack.AddressProperties{PEB: stack.NeverPrimaryEndpoint}) 311 if err != nil { 312 return err 313 } 314 // We have no need for the address endpoint. 315 ep.DecRef() 316 317 // Groups may have been joined while the endpoint was disabled, or the 318 // endpoint may have left groups from the perspective of IGMP when the 319 // endpoint was disabled. Either way, we need to let routers know to 320 // send us multicast traffic. 321 e.igmp.initializeAll() 322 323 // As per RFC 1122 section 3.3.7, all hosts should join the all-hosts 324 // multicast group. Note, the IANA calls the all-hosts multicast group the 325 // all-systems multicast group. 326 if err := e.joinGroupLocked(header.IPv4AllSystems); err != nil { 327 // joinGroupLocked only returns an error if the group address is not a valid 328 // IPv4 multicast address. 329 panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv4AllSystems, err)) 330 } 331 332 return nil 333 } 334 335 // Enabled implements stack.NetworkEndpoint. 336 func (e *endpoint) Enabled() bool { 337 return e.nic.Enabled() && e.isEnabled() 338 } 339 340 // isEnabled returns true if the endpoint is enabled, regardless of the 341 // enabled status of the NIC. 342 func (e *endpoint) isEnabled() bool { 343 return e.enabled.Load() == 1 344 } 345 346 // setEnabled sets the enabled status for the endpoint. 347 // 348 // Returns true if the enabled status was updated. 349 func (e *endpoint) setEnabled(v bool) bool { 350 if v { 351 return e.enabled.Swap(1) == 0 352 } 353 return e.enabled.Swap(0) == 1 354 } 355 356 // Disable implements stack.NetworkEndpoint. 357 func (e *endpoint) Disable() { 358 e.mu.Lock() 359 defer e.mu.Unlock() 360 e.disableLocked() 361 } 362 363 // +checklocks:e.mu 364 // +checklocksalias:e.igmp.ep.mu=e.mu 365 func (e *endpoint) disableLocked() { 366 if !e.isEnabled() { 367 return 368 } 369 370 // The endpoint may have already left the multicast group. 371 switch err := e.leaveGroupLocked(header.IPv4AllSystems).(type) { 372 case nil, *tcpip.ErrBadLocalAddress: 373 default: 374 panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv4AllSystems, err)) 375 } 376 377 // Leave groups from the perspective of IGMP so that routers know that 378 // we are no longer interested in the group. 379 e.igmp.softLeaveAll() 380 381 // The address may have already been removed. 382 switch err := e.addressableEndpointState.RemovePermanentAddress(ipv4BroadcastAddr.Address); err.(type) { 383 case nil, *tcpip.ErrBadLocalAddress: 384 default: 385 panic(fmt.Sprintf("unexpected error when removing address = %s: %s", ipv4BroadcastAddr.Address, err)) 386 } 387 388 // Reset the IGMP V1 present flag. 389 // 390 // If the node comes back up on the same network, it will re-learn that it 391 // needs to perform IGMPv1. 392 e.igmp.resetV1Present() 393 394 if !e.setEnabled(false) { 395 panic("should have only done work to disable the endpoint if it was enabled") 396 } 397 398 // Must be called after Enabled has been set. 399 e.addressableEndpointState.OnNetworkEndpointEnabledChanged() 400 } 401 402 // emitMulticastEvent emits a multicast forwarding event using the provided 403 // generator if a valid event dispatcher exists. 404 func (e *endpoint) emitMulticastEvent(eventGenerator func(stack.MulticastForwardingEventDispatcher)) { 405 e.protocol.mu.RLock() 406 defer e.protocol.mu.RUnlock() 407 408 if mcastDisp := e.protocol.multicastForwardingDisp; mcastDisp != nil { 409 eventGenerator(mcastDisp) 410 } 411 } 412 413 // DefaultTTL is the default time-to-live value for this endpoint. 414 func (e *endpoint) DefaultTTL() uint8 { 415 return e.protocol.DefaultTTL() 416 } 417 418 // MTU implements stack.NetworkEndpoint. It returns the link-layer MTU minus the 419 // network layer max header length. 420 func (e *endpoint) MTU() uint32 { 421 networkMTU, err := calculateNetworkMTU(e.nic.MTU(), header.IPv4MinimumSize) 422 if err != nil { 423 return 0 424 } 425 return networkMTU 426 } 427 428 // MaxHeaderLength returns the maximum length needed by ipv4 headers (and 429 // underlying protocols). 430 func (e *endpoint) MaxHeaderLength() uint16 { 431 return e.nic.MaxHeaderLength() + header.IPv4MaximumHeaderSize 432 } 433 434 // NetworkProtocolNumber implements stack.NetworkEndpoint. 435 func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber { 436 return e.protocol.Number() 437 } 438 439 // getID returns a random uint16 number (other than zero) to be used as ID in 440 // the IPv4 header. 441 func (e *endpoint) getID() uint16 { 442 rng := e.protocol.stack.SecureRNG() 443 id := rng.Uint16() 444 for id == 0 { 445 id = rng.Uint16() 446 } 447 return id 448 } 449 450 func (e *endpoint) addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams, options header.IPv4OptionsSerializer) tcpip.Error { 451 hdrLen := header.IPv4MinimumSize 452 var optLen int 453 if options != nil { 454 optLen = int(options.Length()) 455 } 456 hdrLen += optLen 457 if hdrLen > header.IPv4MaximumHeaderSize { 458 return &tcpip.ErrMessageTooLong{} 459 } 460 ipH := header.IPv4(pkt.NetworkHeader().Push(hdrLen)) 461 length := pkt.Size() 462 if length > math.MaxUint16 { 463 return &tcpip.ErrMessageTooLong{} 464 } 465 // RFC 6864 section 4.3 mandates uniqueness of ID values for non-atomic 466 // datagrams. Since the DF bit is never being set here, all datagrams 467 // are non-atomic and need an ID. 468 ipH.Encode(&header.IPv4Fields{ 469 TotalLength: uint16(length), 470 ID: e.getID(), 471 TTL: params.TTL, 472 TOS: params.TOS, 473 Protocol: uint8(params.Protocol), 474 SrcAddr: srcAddr, 475 DstAddr: dstAddr, 476 Options: options, 477 }) 478 ipH.SetChecksum(^ipH.CalculateChecksum()) 479 pkt.NetworkProtocolNumber = ProtocolNumber 480 return nil 481 } 482 483 // handleFragments fragments pkt and calls the handler function on each 484 // fragment. It returns the number of fragments handled and the number of 485 // fragments left to be processed. The IP header must already be present in the 486 // original packet. 487 func (e *endpoint) handleFragments(_ *stack.Route, networkMTU uint32, pkt *stack.PacketBuffer, handler func(*stack.PacketBuffer) tcpip.Error) (int, int, tcpip.Error) { 488 // Round the MTU down to align to 8 bytes. 489 fragmentPayloadSize := networkMTU &^ 7 490 networkHeader := header.IPv4(pkt.NetworkHeader().Slice()) 491 pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadSize, pkt.AvailableHeaderBytes()+len(networkHeader)) 492 defer pf.Release() 493 494 var n int 495 for { 496 fragPkt, more := buildNextFragment(&pf, networkHeader) 497 err := handler(fragPkt) 498 fragPkt.DecRef() 499 if err != nil { 500 return n, pf.RemainingFragmentCount() + 1, err 501 } 502 n++ 503 if !more { 504 return n, pf.RemainingFragmentCount(), nil 505 } 506 } 507 } 508 509 // WritePacket writes a packet to the given destination address and protocol. 510 func (e *endpoint) WritePacket(r *stack.Route, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) tcpip.Error { 511 if err := e.addIPHeader(r.LocalAddress(), r.RemoteAddress(), pkt, params, nil /* options */); err != nil { 512 return err 513 } 514 515 return e.writePacket(r, pkt) 516 } 517 518 func (e *endpoint) writePacket(r *stack.Route, pkt *stack.PacketBuffer) tcpip.Error { 519 netHeader := header.IPv4(pkt.NetworkHeader().Slice()) 520 dstAddr := netHeader.DestinationAddress() 521 522 // iptables filtering. All packets that reach here are locally 523 // generated. 524 outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 525 if ok := e.protocol.stack.IPTables().CheckOutput(pkt, r, outNicName); !ok { 526 // iptables is telling us to drop the packet. 527 e.stats.ip.IPTablesOutputDropped.Increment() 528 return nil 529 } 530 531 // If the packet is manipulated as per DNAT Output rules, handle packet 532 // based on destination address and do not send the packet to link 533 // layer. 534 // 535 // We should do this for every packet, rather than only DNATted packets, but 536 // removing this check short circuits broadcasts before they are sent out to 537 // other hosts. 538 if newDstAddr := netHeader.DestinationAddress(); dstAddr != newDstAddr { 539 if ep := e.protocol.findEndpointWithAddress(newDstAddr); ep != nil { 540 // Since we rewrote the packet but it is being routed back to us, we 541 // can safely assume the checksum is valid. 542 ep.handleLocalPacket(pkt, true /* canSkipRXChecksum */) 543 return nil 544 } 545 } 546 547 return e.writePacketPostRouting(r, pkt, false /* headerIncluded */) 548 } 549 550 func (e *endpoint) writePacketPostRouting(r *stack.Route, pkt *stack.PacketBuffer, headerIncluded bool) tcpip.Error { 551 if r.Loop()&stack.PacketLoop != 0 { 552 // If the packet was generated by the stack (not a raw/packet endpoint 553 // where a packet may be written with the header included), then we can 554 // safely assume the checksum is valid. 555 e.handleLocalPacket(pkt, !headerIncluded /* canSkipRXChecksum */) 556 } 557 if r.Loop()&stack.PacketOut == 0 { 558 return nil 559 } 560 561 // Postrouting NAT can only change the source address, and does not alter the 562 // route or outgoing interface of the packet. 563 outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 564 if ok := e.protocol.stack.IPTables().CheckPostrouting(pkt, r, e, outNicName); !ok { 565 // iptables is telling us to drop the packet. 566 e.stats.ip.IPTablesPostroutingDropped.Increment() 567 return nil 568 } 569 570 stats := e.stats.ip 571 572 networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(len(pkt.NetworkHeader().Slice()))) 573 if err != nil { 574 stats.OutgoingPacketErrors.Increment() 575 return err 576 } 577 578 if packetMustBeFragmented(pkt, networkMTU) { 579 h := header.IPv4(pkt.NetworkHeader().Slice()) 580 if h.Flags()&header.IPv4FlagDontFragment != 0 && pkt.NetworkPacketInfo.IsForwardedPacket { 581 // TODO(gvisor.dev/issue/5919): Handle error condition in which DontFragment 582 // is set but the packet must be fragmented for the non-forwarding case. 583 return &tcpip.ErrMessageTooLong{} 584 } 585 sent, remain, err := e.handleFragments(r, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) tcpip.Error { 586 // TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each 587 // fragment one by one using WritePacket() (current strategy) or if we 588 // want to create a PacketBufferList from the fragments and feed it to 589 // WritePackets(). It'll be faster but cost more memory. 590 return e.nic.WritePacket(r, fragPkt) 591 }) 592 stats.PacketsSent.IncrementBy(uint64(sent)) 593 stats.OutgoingPacketErrors.IncrementBy(uint64(remain)) 594 return err 595 } 596 597 if err := e.nic.WritePacket(r, pkt); err != nil { 598 stats.OutgoingPacketErrors.Increment() 599 return err 600 } 601 stats.PacketsSent.Increment() 602 return nil 603 } 604 605 // WriteHeaderIncludedPacket implements stack.NetworkEndpoint. 606 func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) tcpip.Error { 607 // The packet already has an IP header, but there are a few required 608 // checks. 609 h, ok := pkt.Data().PullUp(header.IPv4MinimumSize) 610 if !ok { 611 return &tcpip.ErrMalformedHeader{} 612 } 613 614 hdrLen := header.IPv4(h).HeaderLength() 615 if hdrLen < header.IPv4MinimumSize { 616 return &tcpip.ErrMalformedHeader{} 617 } 618 619 h, ok = pkt.Data().PullUp(int(hdrLen)) 620 if !ok { 621 return &tcpip.ErrMalformedHeader{} 622 } 623 ipH := header.IPv4(h) 624 625 // Always set the total length. 626 pktSize := pkt.Data().Size() 627 ipH.SetTotalLength(uint16(pktSize)) 628 629 // Set the source address when zero. 630 if ipH.SourceAddress() == header.IPv4Any { 631 ipH.SetSourceAddress(r.LocalAddress()) 632 } 633 634 // Set the packet ID when zero. 635 if ipH.ID() == 0 { 636 // RFC 6864 section 4.3 mandates uniqueness of ID values for 637 // non-atomic datagrams, so assign an ID to all such datagrams 638 // according to the definition given in RFC 6864 section 4. 639 if ipH.Flags()&header.IPv4FlagDontFragment == 0 || ipH.Flags()&header.IPv4FlagMoreFragments != 0 || ipH.FragmentOffset() > 0 { 640 ipH.SetID(e.getID()) 641 } 642 } 643 644 // Always set the checksum. 645 ipH.SetChecksum(0) 646 ipH.SetChecksum(^ipH.CalculateChecksum()) 647 648 // Populate the packet buffer's network header and don't allow an invalid 649 // packet to be sent. 650 // 651 // Note that parsing only makes sure that the packet is well formed as per the 652 // wire format. We also want to check if the header's fields are valid before 653 // sending the packet. 654 if !parse.IPv4(pkt) || !header.IPv4(pkt.NetworkHeader().Slice()).IsValid(pktSize) { 655 return &tcpip.ErrMalformedHeader{} 656 } 657 658 return e.writePacketPostRouting(r, pkt, true /* headerIncluded */) 659 } 660 661 // forwardPacketWithRoute emits the pkt using the provided route. 662 // 663 // If updateOptions is true, then the IP options will be updated in the copied 664 // pkt using the outgoing endpoint. Otherwise, the caller is responsible for 665 // updating the options. 666 // 667 // This method should be invoked by the endpoint that received the pkt. 668 func (e *endpoint) forwardPacketWithRoute(route *stack.Route, pkt *stack.PacketBuffer, updateOptions bool) ip.ForwardingError { 669 h := header.IPv4(pkt.NetworkHeader().Slice()) 670 stk := e.protocol.stack 671 672 inNicName := stk.FindNICNameFromID(e.nic.ID()) 673 outNicName := stk.FindNICNameFromID(route.NICID()) 674 if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok { 675 // iptables is telling us to drop the packet. 676 e.stats.ip.IPTablesForwardDropped.Increment() 677 return nil 678 } 679 680 // We need to do a deep copy of the IP packet because 681 // WriteHeaderIncludedPacket may modify the packet buffer, but we do 682 // not own it. 683 // 684 // TODO(https://gvisor.dev/issue/7473): For multicast, only create one deep 685 // copy and then clone. 686 newPkt := pkt.DeepCopyForForwarding(int(route.MaxHeaderLength())) 687 newHdr := header.IPv4(newPkt.NetworkHeader().Slice()) 688 defer newPkt.DecRef() 689 690 forwardToEp, ok := e.protocol.getEndpointForNIC(route.NICID()) 691 if !ok { 692 return &ip.ErrUnknownOutputEndpoint{} 693 } 694 695 if updateOptions { 696 if err := forwardToEp.updateOptionsForForwarding(newPkt); err != nil { 697 return err 698 } 699 } 700 701 ttl := h.TTL() 702 // As per RFC 791 page 30, Time to Live, 703 // 704 // This field must be decreased at each point that the internet header 705 // is processed to reflect the time spent processing the datagram. 706 // Even if no local information is available on the time actually 707 // spent, the field must be decremented by 1. 708 newHdr.SetTTL(ttl - 1) 709 // We perform a full checksum as we may have updated options above. The IP 710 // header is relatively small so this is not expected to be an expensive 711 // operation. 712 newHdr.SetChecksum(0) 713 newHdr.SetChecksum(^newHdr.CalculateChecksum()) 714 715 switch err := forwardToEp.writePacketPostRouting(route, newPkt, true /* headerIncluded */); err.(type) { 716 case nil: 717 return nil 718 case *tcpip.ErrMessageTooLong: 719 // As per RFC 792, page 4, Destination Unreachable: 720 // 721 // Another case is when a datagram must be fragmented to be forwarded by a 722 // gateway yet the Don't Fragment flag is on. In this case the gateway must 723 // discard the datagram and may return a destination unreachable message. 724 // 725 // WriteHeaderIncludedPacket checks for the presence of the Don't Fragment bit 726 // while sending the packet and returns this error iff fragmentation is 727 // necessary and the bit is also set. 728 _ = e.protocol.returnError(&icmpReasonFragmentationNeeded{}, pkt, false /* deliveredLocally */) 729 return &ip.ErrMessageTooLong{} 730 case *tcpip.ErrNoBufferSpace: 731 return &ip.ErrOutgoingDeviceNoBufferSpace{} 732 default: 733 return &ip.ErrOther{Err: err} 734 } 735 } 736 737 // forwardUnicastPacket attempts to forward a packet to its final destination. 738 func (e *endpoint) forwardUnicastPacket(pkt *stack.PacketBuffer) ip.ForwardingError { 739 hView := pkt.NetworkHeader().View() 740 defer hView.Release() 741 h := header.IPv4(hView.AsSlice()) 742 743 dstAddr := h.DestinationAddress() 744 745 if err := validateAddressesForForwarding(h); err != nil { 746 return err 747 } 748 749 ttl := h.TTL() 750 if ttl == 0 { 751 // As per RFC 792 page 6, Time Exceeded Message, 752 // 753 // If the gateway processing a datagram finds the time to live field 754 // is zero it must discard the datagram. The gateway may also notify 755 // the source host via the time exceeded message. 756 // 757 // We return the original error rather than the result of returning 758 // the ICMP packet because the original error is more relevant to 759 // the caller. 760 _ = e.protocol.returnError(&icmpReasonTTLExceeded{}, pkt, false /* deliveredLocally */) 761 return &ip.ErrTTLExceeded{} 762 } 763 764 if err := e.updateOptionsForForwarding(pkt); err != nil { 765 return err 766 } 767 768 stk := e.protocol.stack 769 770 // Check if the destination is owned by the stack. 771 if ep := e.protocol.findEndpointWithAddress(dstAddr); ep != nil { 772 inNicName := stk.FindNICNameFromID(e.nic.ID()) 773 outNicName := stk.FindNICNameFromID(ep.nic.ID()) 774 if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok { 775 // iptables is telling us to drop the packet. 776 e.stats.ip.IPTablesForwardDropped.Increment() 777 return nil 778 } 779 780 // The packet originally arrived on e so provide its NIC as the input NIC. 781 ep.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 782 return nil 783 } 784 785 r, err := stk.FindRoute(0, tcpip.Address{}, dstAddr, ProtocolNumber, false /* multicastLoop */) 786 switch err.(type) { 787 case nil: 788 // TODO(https://gvisor.dev/issues/8105): We should not observe ErrHostUnreachable from route 789 // lookups. 790 case *tcpip.ErrHostUnreachable, *tcpip.ErrNetworkUnreachable: 791 // We return the original error rather than the result of returning 792 // the ICMP packet because the original error is more relevant to 793 // the caller. 794 _ = e.protocol.returnError(&icmpReasonNetworkUnreachable{}, pkt, false /* deliveredLocally */) 795 return &ip.ErrHostUnreachable{} 796 default: 797 return &ip.ErrOther{Err: err} 798 } 799 defer r.Release() 800 801 // TODO(https://gvisor.dev/issue/7472): Unicast IP options should be updated 802 // using the output endpoint (instead of the input endpoint). In particular, 803 // RFC 1812 section 5.2.1 states the following: 804 // 805 // Processing of certain IP options requires that the router insert its IP 806 // address into the option. As noted in Section [5.2.4], the address 807 // inserted MUST be the address of the logical interface on which the 808 // packet is sent or the router's router-id if the packet is sent over an 809 // unnumbered interface. Thus, processing of these options cannot be 810 // completed until after the output interface is chosen. 811 return e.forwardPacketWithRoute(r, pkt, false /* updateOptions */) 812 } 813 814 // HandlePacket is called by the link layer when new ipv4 packets arrive for 815 // this endpoint. 816 func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) { 817 stats := e.stats.ip 818 819 stats.PacketsReceived.Increment() 820 821 if !e.isEnabled() { 822 stats.DisabledPacketsReceived.Increment() 823 return 824 } 825 826 hView, ok := e.protocol.parseAndValidate(pkt) 827 if !ok { 828 stats.MalformedPacketsReceived.Increment() 829 return 830 } 831 h := header.IPv4(hView.AsSlice()) 832 defer hView.Release() 833 834 if !e.nic.IsLoopback() { 835 if !e.protocol.options.AllowExternalLoopbackTraffic { 836 if header.IsV4LoopbackAddress(h.SourceAddress()) { 837 stats.InvalidSourceAddressesReceived.Increment() 838 return 839 } 840 841 if header.IsV4LoopbackAddress(h.DestinationAddress()) { 842 stats.InvalidDestinationAddressesReceived.Increment() 843 return 844 } 845 } 846 847 if e.protocol.stack.HandleLocal() { 848 addressEndpoint := e.AcquireAssignedAddress(header.IPv4(pkt.NetworkHeader().Slice()).SourceAddress(), e.nic.Promiscuous(), stack.CanBePrimaryEndpoint, true /* readOnly */) 849 if addressEndpoint != nil { 850 // The source address is one of our own, so we never should have gotten 851 // a packet like this unless HandleLocal is false or our NIC is the 852 // loopback interface. 853 stats.InvalidSourceAddressesReceived.Increment() 854 return 855 } 856 } 857 858 // Loopback traffic skips the prerouting chain. 859 inNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 860 if ok := e.protocol.stack.IPTables().CheckPrerouting(pkt, e, inNicName); !ok { 861 // iptables is telling us to drop the packet. 862 stats.IPTablesPreroutingDropped.Increment() 863 return 864 } 865 } 866 867 e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 868 } 869 870 // handleLocalPacket is like HandlePacket except it does not perform the 871 // prerouting iptables hook or check for loopback traffic that originated from 872 // outside of the netstack (i.e. martian loopback packets). 873 func (e *endpoint) handleLocalPacket(pkt *stack.PacketBuffer, canSkipRXChecksum bool) { 874 stats := e.stats.ip 875 stats.PacketsReceived.Increment() 876 877 pkt = pkt.CloneToInbound() 878 defer pkt.DecRef() 879 pkt.RXChecksumValidated = canSkipRXChecksum 880 881 hView, ok := e.protocol.parseAndValidate(pkt) 882 if !ok { 883 stats.MalformedPacketsReceived.Increment() 884 return 885 } 886 h := header.IPv4(hView.AsSlice()) 887 defer hView.Release() 888 889 e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 890 } 891 892 func validateAddressesForForwarding(h header.IPv4) ip.ForwardingError { 893 srcAddr := h.SourceAddress() 894 895 // As per RFC 5735 section 3, 896 // 897 // 0.0.0.0/8 - Addresses in this block refer to source hosts on "this" 898 // network. Address 0.0.0.0/32 may be used as a source address for this 899 // host on this network; other addresses within 0.0.0.0/8 may be used to 900 // refer to specified hosts on this network ([RFC1122], Section 3.2.1.3). 901 // 902 // And RFC 6890 section 2.2.2, 903 // 904 // +----------------------+----------------------------+ 905 // | Attribute | Value | 906 // +----------------------+----------------------------+ 907 // | Address Block | 0.0.0.0/8 | 908 // | Name | "This host on this network"| 909 // | RFC | [RFC1122], Section 3.2.1.3 | 910 // | Allocation Date | September 1981 | 911 // | Termination Date | N/A | 912 // | Source | True | 913 // | Destination | False | 914 // | Forwardable | False | 915 // | Global | False | 916 // | Reserved-by-Protocol | True | 917 // +----------------------+----------------------------+ 918 if header.IPv4CurrentNetworkSubnet.Contains(srcAddr) { 919 return &ip.ErrInitializingSourceAddress{} 920 } 921 922 // As per RFC 3927 section 7, 923 // 924 // A router MUST NOT forward a packet with an IPv4 Link-Local source or 925 // destination address, irrespective of the router's default route 926 // configuration or routes obtained from dynamic routing protocols. 927 // 928 // A router which receives a packet with an IPv4 Link-Local source or 929 // destination address MUST NOT forward the packet. This prevents 930 // forwarding of packets back onto the network segment from which they 931 // originated, or to any other segment. 932 if header.IsV4LinkLocalUnicastAddress(srcAddr) { 933 return &ip.ErrLinkLocalSourceAddress{} 934 } 935 if dstAddr := h.DestinationAddress(); header.IsV4LinkLocalUnicastAddress(dstAddr) || header.IsV4LinkLocalMulticastAddress(dstAddr) { 936 return &ip.ErrLinkLocalDestinationAddress{} 937 } 938 return nil 939 } 940 941 // forwardMulticastPacket validates a multicast pkt and attempts to forward it. 942 // 943 // This method should be invoked for incoming multicast packets using the 944 // endpoint that received the packet. 945 func (e *endpoint) forwardMulticastPacket(h header.IPv4, pkt *stack.PacketBuffer) ip.ForwardingError { 946 if err := validateAddressesForForwarding(h); err != nil { 947 return err 948 } 949 950 if opts := h.Options(); len(opts) != 0 { 951 // Check if the options are valid, but don't mutate them. This corresponds 952 // to step 3 of RFC 1812 section 5.2.1.1. 953 if _, _, optProblem := e.processIPOptions(pkt, opts, &optionUsageVerify{}); optProblem != nil { 954 // Per RFC 1812 section 4.3.2.7, an ICMP error message should not be 955 // sent for: 956 // 957 // A packet destined to an IP broadcast or IP multicast address. 958 // 959 // Note that protocol.returnError also enforces this requirement. 960 // However, we intentionally omit it here since this path is multicast 961 // only. 962 return &ip.ErrParameterProblem{} 963 } 964 } 965 966 routeKey := stack.UnicastSourceAndMulticastDestination{ 967 Source: h.SourceAddress(), 968 Destination: h.DestinationAddress(), 969 } 970 971 // The pkt has been validated. Consequently, if a route is not found, then 972 // the pkt can safely be queued. 973 result, hasBufferSpace := e.protocol.multicastRouteTable.GetRouteOrInsertPending(routeKey, pkt) 974 975 if !hasBufferSpace { 976 // Unable to queue the pkt. Silently drop it. 977 return &ip.ErrNoMulticastPendingQueueBufferSpace{} 978 } 979 980 switch result.GetRouteResultState { 981 case multicast.InstalledRouteFound: 982 // Attempt to forward the pkt using an existing route. 983 return e.forwardValidatedMulticastPacket(pkt, result.InstalledRoute) 984 case multicast.NoRouteFoundAndPendingInserted: 985 e.emitMulticastEvent(func(disp stack.MulticastForwardingEventDispatcher) { 986 disp.OnMissingRoute(stack.MulticastPacketContext{ 987 stack.UnicastSourceAndMulticastDestination{h.SourceAddress(), h.DestinationAddress()}, 988 e.nic.ID(), 989 }) 990 }) 991 case multicast.PacketQueuedInPendingRoute: 992 default: 993 panic(fmt.Sprintf("unexpected GetRouteResultState: %s", result.GetRouteResultState)) 994 } 995 return &ip.ErrHostUnreachable{} 996 } 997 998 func (e *endpoint) updateOptionsForForwarding(pkt *stack.PacketBuffer) ip.ForwardingError { 999 h := header.IPv4(pkt.NetworkHeader().Slice()) 1000 if opts := h.Options(); len(opts) != 0 { 1001 newOpts, _, optProblem := e.processIPOptions(pkt, opts, &optionUsageForward{}) 1002 if optProblem != nil { 1003 if optProblem.NeedICMP { 1004 // Note that this will not emit an ICMP error if the destination is 1005 // multicast. 1006 _ = e.protocol.returnError(&icmpReasonParamProblem{ 1007 pointer: optProblem.Pointer, 1008 }, pkt, false /* deliveredLocally */) 1009 } 1010 return &ip.ErrParameterProblem{} 1011 } 1012 copied := copy(opts, newOpts) 1013 if copied != len(newOpts) { 1014 panic(fmt.Sprintf("copied %d bytes of new options, expected %d bytes", copied, len(newOpts))) 1015 } 1016 // Since in forwarding we handle all options, including copying those we 1017 // do not recognise, the options region should remain the same size which 1018 // simplifies processing. As we MAY receive a packet with a lot of padded 1019 // bytes after the "end of options list" byte, make sure we copy 1020 // them as the legal padding value (0). 1021 for i := copied; i < len(opts); i++ { 1022 // Pad with 0 (EOL). RFC 791 page 23 says "The padding is zero". 1023 opts[i] = byte(header.IPv4OptionListEndType) 1024 } 1025 } 1026 return nil 1027 } 1028 1029 // forwardValidatedMulticastPacket attempts to forward the pkt using the 1030 // provided installedRoute. 1031 // 1032 // This method should be invoked by the endpoint that received the pkt. 1033 func (e *endpoint) forwardValidatedMulticastPacket(pkt *stack.PacketBuffer, installedRoute *multicast.InstalledRoute) ip.ForwardingError { 1034 // Per RFC 1812 section 5.2.1.3, 1035 // 1036 // Based on the IP source and destination addresses found in the datagram 1037 // header, the router determines whether the datagram has been received 1038 // on the proper interface for forwarding. If not, the datagram is 1039 // dropped silently. 1040 if e.nic.ID() != installedRoute.ExpectedInputInterface { 1041 h := header.IPv4(pkt.NetworkHeader().Slice()) 1042 e.emitMulticastEvent(func(disp stack.MulticastForwardingEventDispatcher) { 1043 disp.OnUnexpectedInputInterface(stack.MulticastPacketContext{ 1044 stack.UnicastSourceAndMulticastDestination{h.SourceAddress(), h.DestinationAddress()}, 1045 e.nic.ID(), 1046 }, installedRoute.ExpectedInputInterface) 1047 }) 1048 return &ip.ErrUnexpectedMulticastInputInterface{} 1049 } 1050 1051 for _, outgoingInterface := range installedRoute.OutgoingInterfaces { 1052 if err := e.forwardMulticastPacketForOutgoingInterface(pkt, outgoingInterface); err != nil { 1053 e.handleForwardingError(err) 1054 continue 1055 } 1056 // The pkt was successfully forwarded. Mark the route as used. 1057 installedRoute.SetLastUsedTimestamp(e.protocol.stack.Clock().NowMonotonic()) 1058 } 1059 return nil 1060 } 1061 1062 // forwardMulticastPacketForOutgoingInterface attempts to forward the pkt out 1063 // of the provided outgoingInterface. 1064 // 1065 // This method should be invoked by the endpoint that received the pkt. 1066 func (e *endpoint) forwardMulticastPacketForOutgoingInterface(pkt *stack.PacketBuffer, outgoingInterface stack.MulticastRouteOutgoingInterface) ip.ForwardingError { 1067 h := header.IPv4(pkt.NetworkHeader().Slice()) 1068 1069 // Per RFC 1812 section 5.2.1.3, 1070 // 1071 // A copy of the multicast datagram is forwarded out each outgoing 1072 // interface whose minimum TTL value is less than or equal to the TTL 1073 // value in the datagram header. 1074 // 1075 // Copying of the packet is deferred to forwardPacketWithRoute since unicast 1076 // and multicast both require a copy. 1077 if outgoingInterface.MinTTL > h.TTL() { 1078 return &ip.ErrTTLExceeded{} 1079 } 1080 1081 route := e.protocol.stack.NewRouteForMulticast(outgoingInterface.ID, h.DestinationAddress(), e.NetworkProtocolNumber()) 1082 1083 if route == nil { 1084 // Failed to convert to a stack.Route. This likely means that the outgoing 1085 // endpoint no longer exists. 1086 return &ip.ErrHostUnreachable{} 1087 } 1088 defer route.Release() 1089 1090 return e.forwardPacketWithRoute(route, pkt, true /* updateOptions */) 1091 } 1092 1093 func (e *endpoint) handleValidatedPacket(h header.IPv4, pkt *stack.PacketBuffer, inNICName string) { 1094 pkt.NICID = e.nic.ID() 1095 1096 // Raw socket packets are delivered based solely on the transport protocol 1097 // number. We only require that the packet be valid IPv4, and that they not 1098 // be fragmented. 1099 if !h.More() && h.FragmentOffset() == 0 { 1100 e.dispatcher.DeliverRawPacket(h.TransportProtocol(), pkt) 1101 } 1102 1103 stats := e.stats 1104 stats.ip.ValidPacketsReceived.Increment() 1105 1106 srcAddr := h.SourceAddress() 1107 dstAddr := h.DestinationAddress() 1108 1109 // As per RFC 1122 section 3.2.1.3: 1110 // When a host sends any datagram, the IP source address MUST 1111 // be one of its own IP addresses (but not a broadcast or 1112 // multicast address). 1113 if srcAddr == header.IPv4Broadcast || header.IsV4MulticastAddress(srcAddr) { 1114 stats.ip.InvalidSourceAddressesReceived.Increment() 1115 return 1116 } 1117 // Make sure the source address is not a subnet-local broadcast address. 1118 if addressEndpoint := e.AcquireAssignedAddress(srcAddr, false /* createTemp */, stack.NeverPrimaryEndpoint, true /* readOnly */); addressEndpoint != nil { 1119 subnet := addressEndpoint.Subnet() 1120 if subnet.IsBroadcast(srcAddr) { 1121 stats.ip.InvalidSourceAddressesReceived.Increment() 1122 return 1123 } 1124 } 1125 1126 if header.IsV4MulticastAddress(dstAddr) { 1127 // Handle all packets destined to a multicast address separately. Unlike 1128 // unicast, these packets can be both delivered locally and forwarded. See 1129 // RFC 1812 section 5.2.3 for details regarding the forwarding/local 1130 // delivery decision. 1131 1132 multicastForwarding := e.MulticastForwarding() && e.protocol.multicastForwarding() 1133 1134 if multicastForwarding { 1135 e.handleForwardingError(e.forwardMulticastPacket(h, pkt)) 1136 } 1137 1138 if e.IsInGroup(dstAddr) { 1139 e.deliverPacketLocally(h, pkt, inNICName) 1140 return 1141 } 1142 1143 if !multicastForwarding { 1144 // Only consider the destination address invalid if we didn't attempt to 1145 // forward the pkt and it was not delivered locally. 1146 stats.ip.InvalidDestinationAddressesReceived.Increment() 1147 } 1148 return 1149 } 1150 1151 // Before we do any processing, check if the packet was received as some 1152 // sort of broadcast. 1153 // 1154 // If the packet is destined for this device, then it should be delivered 1155 // locally. Otherwise, if forwarding is enabled, it should be forwarded. 1156 if addressEndpoint := e.AcquireAssignedAddress(dstAddr, e.nic.Promiscuous(), stack.CanBePrimaryEndpoint, true /* readOnly */); addressEndpoint != nil { 1157 subnet := addressEndpoint.AddressWithPrefix().Subnet() 1158 pkt.NetworkPacketInfo.LocalAddressBroadcast = subnet.IsBroadcast(dstAddr) || dstAddr == header.IPv4Broadcast 1159 e.deliverPacketLocally(h, pkt, inNICName) 1160 } else if e.Forwarding() { 1161 e.handleForwardingError(e.forwardUnicastPacket(pkt)) 1162 } else { 1163 stats.ip.InvalidDestinationAddressesReceived.Increment() 1164 } 1165 } 1166 1167 // handleForwardingError processes the provided err and increments any relevant 1168 // counters. 1169 func (e *endpoint) handleForwardingError(err ip.ForwardingError) { 1170 stats := e.stats.ip 1171 switch err := err.(type) { 1172 case nil: 1173 return 1174 case *ip.ErrInitializingSourceAddress: 1175 stats.Forwarding.InitializingSource.Increment() 1176 case *ip.ErrLinkLocalSourceAddress: 1177 stats.Forwarding.LinkLocalSource.Increment() 1178 case *ip.ErrLinkLocalDestinationAddress: 1179 stats.Forwarding.LinkLocalDestination.Increment() 1180 case *ip.ErrTTLExceeded: 1181 stats.Forwarding.ExhaustedTTL.Increment() 1182 case *ip.ErrHostUnreachable: 1183 stats.Forwarding.Unrouteable.Increment() 1184 case *ip.ErrParameterProblem: 1185 stats.MalformedPacketsReceived.Increment() 1186 case *ip.ErrMessageTooLong: 1187 stats.Forwarding.PacketTooBig.Increment() 1188 case *ip.ErrNoMulticastPendingQueueBufferSpace: 1189 stats.Forwarding.NoMulticastPendingQueueBufferSpace.Increment() 1190 case *ip.ErrUnexpectedMulticastInputInterface: 1191 stats.Forwarding.UnexpectedMulticastInputInterface.Increment() 1192 case *ip.ErrUnknownOutputEndpoint: 1193 stats.Forwarding.UnknownOutputEndpoint.Increment() 1194 case *ip.ErrOutgoingDeviceNoBufferSpace: 1195 stats.Forwarding.OutgoingDeviceNoBufferSpace.Increment() 1196 default: 1197 panic(fmt.Sprintf("unrecognized forwarding error: %s", err)) 1198 } 1199 stats.Forwarding.Errors.Increment() 1200 } 1201 1202 func (e *endpoint) deliverPacketLocally(h header.IPv4, pkt *stack.PacketBuffer, inNICName string) { 1203 stats := e.stats 1204 // iptables filtering. All packets that reach here are intended for 1205 // this machine and will not be forwarded. 1206 if ok := e.protocol.stack.IPTables().CheckInput(pkt, inNICName); !ok { 1207 // iptables is telling us to drop the packet. 1208 stats.ip.IPTablesInputDropped.Increment() 1209 return 1210 } 1211 1212 if h.More() || h.FragmentOffset() != 0 { 1213 if pkt.Data().Size()+len(pkt.TransportHeader().Slice()) == 0 { 1214 // Drop the packet as it's marked as a fragment but has 1215 // no payload. 1216 stats.ip.MalformedPacketsReceived.Increment() 1217 stats.ip.MalformedFragmentsReceived.Increment() 1218 return 1219 } 1220 if opts := h.Options(); len(opts) != 0 { 1221 // If there are options we need to check them before we do assembly 1222 // or we could be assembling errant packets. However we do not change the 1223 // options as that could lead to double processing later. 1224 if _, _, optProblem := e.processIPOptions(pkt, opts, &optionUsageVerify{}); optProblem != nil { 1225 if optProblem.NeedICMP { 1226 _ = e.protocol.returnError(&icmpReasonParamProblem{ 1227 pointer: optProblem.Pointer, 1228 }, pkt, true /* deliveredLocally */) 1229 e.stats.ip.MalformedPacketsReceived.Increment() 1230 } 1231 return 1232 } 1233 } 1234 // The packet is a fragment, let's try to reassemble it. 1235 start := h.FragmentOffset() 1236 // Drop the fragment if the size of the reassembled payload would exceed the 1237 // maximum payload size. 1238 // 1239 // Note that this addition doesn't overflow even on 32bit architecture 1240 // because pkt.Data().Size() should not exceed 65535 (the max IP datagram 1241 // size). Otherwise the packet would've been rejected as invalid before 1242 // reaching here. 1243 if int(start)+pkt.Data().Size() > header.IPv4MaximumPayloadSize { 1244 stats.ip.MalformedPacketsReceived.Increment() 1245 stats.ip.MalformedFragmentsReceived.Increment() 1246 return 1247 } 1248 1249 proto := h.Protocol() 1250 resPkt, transProtoNum, ready, err := e.protocol.fragmentation.Process( 1251 // As per RFC 791 section 2.3, the identification value is unique 1252 // for a source-destination pair and protocol. 1253 fragmentation.FragmentID{ 1254 Source: h.SourceAddress(), 1255 Destination: h.DestinationAddress(), 1256 ID: uint32(h.ID()), 1257 Protocol: proto, 1258 }, 1259 start, 1260 start+uint16(pkt.Data().Size())-1, 1261 h.More(), 1262 proto, 1263 pkt, 1264 ) 1265 if err != nil { 1266 stats.ip.MalformedPacketsReceived.Increment() 1267 stats.ip.MalformedFragmentsReceived.Increment() 1268 return 1269 } 1270 if !ready { 1271 return 1272 } 1273 defer resPkt.DecRef() 1274 pkt = resPkt 1275 h = header.IPv4(pkt.NetworkHeader().Slice()) 1276 1277 // The reassembler doesn't take care of fixing up the header, so we need 1278 // to do it here. 1279 h.SetTotalLength(uint16(pkt.Data().Size() + len(h))) 1280 h.SetFlagsFragmentOffset(0, 0) 1281 1282 e.protocol.parseTransport(pkt, tcpip.TransportProtocolNumber(transProtoNum)) 1283 1284 // Now that the packet is reassembled, it can be sent to raw sockets. 1285 e.dispatcher.DeliverRawPacket(h.TransportProtocol(), pkt) 1286 } 1287 stats.ip.PacketsDelivered.Increment() 1288 1289 p := h.TransportProtocol() 1290 if p == header.ICMPv4ProtocolNumber { 1291 // TODO(gvisor.dev/issues/3810): when we sort out ICMP and transport 1292 // headers, the setting of the transport number here should be 1293 // unnecessary and removed. 1294 pkt.TransportProtocolNumber = p 1295 e.handleICMP(pkt) 1296 return 1297 } 1298 // ICMP handles options itself but do it here for all remaining destinations. 1299 var hasRouterAlertOption bool 1300 if opts := h.Options(); len(opts) != 0 { 1301 newOpts, processedOpts, optProblem := e.processIPOptions(pkt, opts, &optionUsageReceive{}) 1302 if optProblem != nil { 1303 if optProblem.NeedICMP { 1304 _ = e.protocol.returnError(&icmpReasonParamProblem{ 1305 pointer: optProblem.Pointer, 1306 }, pkt, true /* deliveredLocally */) 1307 stats.ip.MalformedPacketsReceived.Increment() 1308 } 1309 return 1310 } 1311 hasRouterAlertOption = processedOpts.routerAlert 1312 copied := copy(opts, newOpts) 1313 if copied != len(newOpts) { 1314 panic(fmt.Sprintf("copied %d bytes of new options, expected %d bytes", copied, len(newOpts))) 1315 } 1316 for i := copied; i < len(opts); i++ { 1317 // Pad with 0 (EOL). RFC 791 page 23 says "The padding is zero". 1318 opts[i] = byte(header.IPv4OptionListEndType) 1319 } 1320 } 1321 if p == header.IGMPProtocolNumber { 1322 e.mu.Lock() 1323 e.igmp.handleIGMP(pkt, hasRouterAlertOption) // +checklocksforce: e == e.igmp.ep. 1324 e.mu.Unlock() 1325 return 1326 } 1327 1328 switch res := e.dispatcher.DeliverTransportPacket(p, pkt); res { 1329 case stack.TransportPacketHandled: 1330 case stack.TransportPacketDestinationPortUnreachable: 1331 // As per RFC: 1122 Section 3.2.2.1 A host SHOULD generate Destination 1332 // Unreachable messages with code: 1333 // 3 (Port Unreachable), when the designated transport protocol 1334 // (e.g., UDP) is unable to demultiplex the datagram but has no 1335 // protocol mechanism to inform the sender. 1336 _ = e.protocol.returnError(&icmpReasonPortUnreachable{}, pkt, true /* deliveredLocally */) 1337 case stack.TransportPacketProtocolUnreachable: 1338 // As per RFC: 1122 Section 3.2.2.1 1339 // A host SHOULD generate Destination Unreachable messages with code: 1340 // 2 (Protocol Unreachable), when the designated transport protocol 1341 // is not supported 1342 _ = e.protocol.returnError(&icmpReasonProtoUnreachable{}, pkt, true /* deliveredLocally */) 1343 default: 1344 panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res)) 1345 } 1346 } 1347 1348 // Close cleans up resources associated with the endpoint. 1349 func (e *endpoint) Close() { 1350 e.mu.Lock() 1351 e.disableLocked() 1352 e.addressableEndpointState.Cleanup() 1353 e.mu.Unlock() 1354 1355 e.protocol.forgetEndpoint(e.nic.ID()) 1356 } 1357 1358 // AddAndAcquirePermanentAddress implements stack.AddressableEndpoint. 1359 func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, properties stack.AddressProperties) (stack.AddressEndpoint, tcpip.Error) { 1360 e.mu.Lock() 1361 defer e.mu.Unlock() 1362 1363 ep, err := e.addressableEndpointState.AddAndAcquireAddress(addr, properties, stack.Permanent) 1364 if err == nil { 1365 e.sendQueuedReports() 1366 } 1367 return ep, err 1368 } 1369 1370 // sendQueuedReports sends queued igmp reports. 1371 // 1372 // +checklocks:e.mu 1373 // +checklocksalias:e.igmp.ep.mu=e.mu 1374 func (e *endpoint) sendQueuedReports() { 1375 e.igmp.sendQueuedReports() 1376 } 1377 1378 // RemovePermanentAddress implements stack.AddressableEndpoint. 1379 func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) tcpip.Error { 1380 e.mu.RLock() 1381 defer e.mu.RUnlock() 1382 return e.addressableEndpointState.RemovePermanentAddress(addr) 1383 } 1384 1385 // SetDeprecated implements stack.AddressableEndpoint. 1386 func (e *endpoint) SetDeprecated(addr tcpip.Address, deprecated bool) tcpip.Error { 1387 e.mu.RLock() 1388 defer e.mu.RUnlock() 1389 return e.addressableEndpointState.SetDeprecated(addr, deprecated) 1390 } 1391 1392 // SetLifetimes implements stack.AddressableEndpoint. 1393 func (e *endpoint) SetLifetimes(addr tcpip.Address, lifetimes stack.AddressLifetimes) tcpip.Error { 1394 e.mu.RLock() 1395 defer e.mu.RUnlock() 1396 return e.addressableEndpointState.SetLifetimes(addr, lifetimes) 1397 } 1398 1399 // MainAddress implements stack.AddressableEndpoint. 1400 func (e *endpoint) MainAddress() tcpip.AddressWithPrefix { 1401 e.mu.RLock() 1402 defer e.mu.RUnlock() 1403 return e.addressableEndpointState.MainAddress() 1404 } 1405 1406 // AcquireAssignedAddress implements stack.AddressableEndpoint. 1407 func (e *endpoint) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior, readOnly bool) stack.AddressEndpoint { 1408 e.mu.RLock() 1409 defer e.mu.RUnlock() 1410 1411 loopback := e.nic.IsLoopback() 1412 return e.addressableEndpointState.AcquireAssignedAddressOrMatching(localAddr, func(addressEndpoint stack.AddressEndpoint) bool { 1413 subnet := addressEndpoint.Subnet() 1414 // IPv4 has a notion of a subnet broadcast address and considers the 1415 // loopback interface bound to an address's whole subnet (on linux). 1416 return subnet.IsBroadcast(localAddr) || (loopback && subnet.Contains(localAddr)) 1417 }, allowTemp, tempPEB, readOnly) 1418 } 1419 1420 // AcquireOutgoingPrimaryAddress implements stack.AddressableEndpoint. 1421 func (e *endpoint) AcquireOutgoingPrimaryAddress(remoteAddr, srcHint tcpip.Address, allowExpired bool) stack.AddressEndpoint { 1422 e.mu.RLock() 1423 defer e.mu.RUnlock() 1424 return e.acquireOutgoingPrimaryAddressRLocked(remoteAddr, srcHint, allowExpired) 1425 } 1426 1427 // acquireOutgoingPrimaryAddressRLocked is like AcquireOutgoingPrimaryAddress 1428 // but with locking requirements 1429 // 1430 // +checklocksread:e.mu 1431 func (e *endpoint) acquireOutgoingPrimaryAddressRLocked(remoteAddr, srcHint tcpip.Address, allowExpired bool) stack.AddressEndpoint { 1432 return e.addressableEndpointState.AcquireOutgoingPrimaryAddress(remoteAddr, srcHint, allowExpired) 1433 } 1434 1435 // PrimaryAddresses implements stack.AddressableEndpoint. 1436 func (e *endpoint) PrimaryAddresses() []tcpip.AddressWithPrefix { 1437 e.mu.RLock() 1438 defer e.mu.RUnlock() 1439 return e.addressableEndpointState.PrimaryAddresses() 1440 } 1441 1442 // PermanentAddresses implements stack.AddressableEndpoint. 1443 func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix { 1444 e.mu.RLock() 1445 defer e.mu.RUnlock() 1446 return e.addressableEndpointState.PermanentAddresses() 1447 } 1448 1449 // JoinGroup implements stack.GroupAddressableEndpoint. 1450 func (e *endpoint) JoinGroup(addr tcpip.Address) tcpip.Error { 1451 e.mu.Lock() 1452 defer e.mu.Unlock() 1453 return e.joinGroupLocked(addr) 1454 } 1455 1456 // joinGroupLocked is like JoinGroup but with locking requirements. 1457 // 1458 // +checklocks:e.mu 1459 // +checklocksalias:e.igmp.ep.mu=e.mu 1460 func (e *endpoint) joinGroupLocked(addr tcpip.Address) tcpip.Error { 1461 if !header.IsV4MulticastAddress(addr) { 1462 return &tcpip.ErrBadAddress{} 1463 } 1464 1465 e.igmp.joinGroup(addr) 1466 return nil 1467 } 1468 1469 // LeaveGroup implements stack.GroupAddressableEndpoint. 1470 func (e *endpoint) LeaveGroup(addr tcpip.Address) tcpip.Error { 1471 e.mu.Lock() 1472 defer e.mu.Unlock() 1473 return e.leaveGroupLocked(addr) 1474 } 1475 1476 // leaveGroupLocked is like LeaveGroup but with locking requirements. 1477 // 1478 // +checklocks:e.mu 1479 // +checklocksalias:e.igmp.ep.mu=e.mu 1480 func (e *endpoint) leaveGroupLocked(addr tcpip.Address) tcpip.Error { 1481 return e.igmp.leaveGroup(addr) 1482 } 1483 1484 // IsInGroup implements stack.GroupAddressableEndpoint. 1485 func (e *endpoint) IsInGroup(addr tcpip.Address) bool { 1486 e.mu.RLock() 1487 defer e.mu.RUnlock() 1488 return e.igmp.isInGroup(addr) // +checklocksforce: e.mu==e.igmp.ep.mu. 1489 } 1490 1491 // Stats implements stack.NetworkEndpoint. 1492 func (e *endpoint) Stats() stack.NetworkEndpointStats { 1493 return &e.stats.localStats 1494 } 1495 1496 var _ stack.NetworkProtocol = (*protocol)(nil) 1497 var _ stack.MulticastForwardingNetworkProtocol = (*protocol)(nil) 1498 var _ stack.RejectIPv4WithHandler = (*protocol)(nil) 1499 var _ fragmentation.TimeoutHandler = (*protocol)(nil) 1500 1501 type protocol struct { 1502 stack *stack.Stack 1503 1504 // mu protects annotated fields below. 1505 mu sync.RWMutex 1506 1507 // eps is keyed by NICID to allow protocol methods to retrieve an endpoint 1508 // when handling a packet, by looking at which NIC handled the packet. 1509 // +checklocks:mu 1510 eps map[tcpip.NICID]*endpoint 1511 1512 // ICMP types for which the stack's global rate limiting must apply. 1513 // +checklocks:mu 1514 icmpRateLimitedTypes map[header.ICMPv4Type]struct{} 1515 1516 // defaultTTL is the current default TTL for the protocol. Only the 1517 // uint8 portion of it is meaningful. 1518 defaultTTL atomicbitops.Uint32 1519 1520 ids []atomicbitops.Uint32 1521 hashIV uint32 1522 // idTS is the unix timestamp in milliseconds 'ids' was last accessed. 1523 idTS atomicbitops.Int64 1524 1525 fragmentation *fragmentation.Fragmentation 1526 1527 options Options 1528 1529 multicastRouteTable multicast.RouteTable 1530 // multicastForwardingDisp is the multicast forwarding event dispatcher that 1531 // an integrator can provide to receive multicast forwarding events. Note 1532 // that multicast packets will only be forwarded if this is non-nil. 1533 // +checklocks:mu 1534 multicastForwardingDisp stack.MulticastForwardingEventDispatcher 1535 } 1536 1537 // Number returns the ipv4 protocol number. 1538 func (p *protocol) Number() tcpip.NetworkProtocolNumber { 1539 return ProtocolNumber 1540 } 1541 1542 // MinimumPacketSize returns the minimum valid ipv4 packet size. 1543 func (p *protocol) MinimumPacketSize() int { 1544 return header.IPv4MinimumSize 1545 } 1546 1547 // ParseAddresses implements stack.NetworkProtocol. 1548 func (*protocol) ParseAddresses(v []byte) (src, dst tcpip.Address) { 1549 h := header.IPv4(v) 1550 return h.SourceAddress(), h.DestinationAddress() 1551 } 1552 1553 // SetOption implements stack.NetworkProtocol. 1554 func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error { 1555 switch v := option.(type) { 1556 case *tcpip.DefaultTTLOption: 1557 p.SetDefaultTTL(uint8(*v)) 1558 return nil 1559 default: 1560 return &tcpip.ErrUnknownProtocolOption{} 1561 } 1562 } 1563 1564 // Option implements stack.NetworkProtocol. 1565 func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error { 1566 switch v := option.(type) { 1567 case *tcpip.DefaultTTLOption: 1568 *v = tcpip.DefaultTTLOption(p.DefaultTTL()) 1569 return nil 1570 default: 1571 return &tcpip.ErrUnknownProtocolOption{} 1572 } 1573 } 1574 1575 // SetDefaultTTL sets the default TTL for endpoints created with this protocol. 1576 func (p *protocol) SetDefaultTTL(ttl uint8) { 1577 p.defaultTTL.Store(uint32(ttl)) 1578 } 1579 1580 // DefaultTTL returns the default TTL for endpoints created with this protocol. 1581 func (p *protocol) DefaultTTL() uint8 { 1582 return uint8(p.defaultTTL.Load()) 1583 } 1584 1585 // Close implements stack.TransportProtocol. 1586 func (p *protocol) Close() { 1587 p.fragmentation.Release() 1588 p.multicastRouteTable.Close() 1589 } 1590 1591 // Wait implements stack.TransportProtocol. 1592 func (*protocol) Wait() {} 1593 1594 func (p *protocol) validateUnicastSourceAndMulticastDestination(addresses stack.UnicastSourceAndMulticastDestination) tcpip.Error { 1595 if !p.isUnicastAddress(addresses.Source) || header.IsV4LinkLocalUnicastAddress(addresses.Source) { 1596 return &tcpip.ErrBadAddress{} 1597 } 1598 1599 if !header.IsV4MulticastAddress(addresses.Destination) || header.IsV4LinkLocalMulticastAddress(addresses.Destination) { 1600 return &tcpip.ErrBadAddress{} 1601 } 1602 1603 return nil 1604 } 1605 1606 func (p *protocol) multicastForwarding() bool { 1607 p.mu.RLock() 1608 defer p.mu.RUnlock() 1609 return p.multicastForwardingDisp != nil 1610 } 1611 1612 func (p *protocol) newInstalledRoute(route stack.MulticastRoute) (*multicast.InstalledRoute, tcpip.Error) { 1613 if len(route.OutgoingInterfaces) == 0 { 1614 return nil, &tcpip.ErrMissingRequiredFields{} 1615 } 1616 1617 if !p.stack.HasNIC(route.ExpectedInputInterface) { 1618 return nil, &tcpip.ErrUnknownNICID{} 1619 } 1620 1621 for _, outgoingInterface := range route.OutgoingInterfaces { 1622 if route.ExpectedInputInterface == outgoingInterface.ID { 1623 return nil, &tcpip.ErrMulticastInputCannotBeOutput{} 1624 } 1625 1626 if !p.stack.HasNIC(outgoingInterface.ID) { 1627 return nil, &tcpip.ErrUnknownNICID{} 1628 } 1629 } 1630 return p.multicastRouteTable.NewInstalledRoute(route), nil 1631 } 1632 1633 // AddMulticastRoute implements stack.MulticastForwardingNetworkProtocol. 1634 func (p *protocol) AddMulticastRoute(addresses stack.UnicastSourceAndMulticastDestination, route stack.MulticastRoute) tcpip.Error { 1635 if !p.multicastForwarding() { 1636 return &tcpip.ErrNotPermitted{} 1637 } 1638 1639 if err := p.validateUnicastSourceAndMulticastDestination(addresses); err != nil { 1640 return err 1641 } 1642 1643 installedRoute, err := p.newInstalledRoute(route) 1644 if err != nil { 1645 return err 1646 } 1647 1648 pendingPackets := p.multicastRouteTable.AddInstalledRoute(addresses, installedRoute) 1649 1650 for _, pkt := range pendingPackets { 1651 p.forwardPendingMulticastPacket(pkt, installedRoute) 1652 } 1653 return nil 1654 } 1655 1656 // RemoveMulticastRoute implements 1657 // stack.MulticastForwardingNetworkProtocol.RemoveMulticastRoute. 1658 func (p *protocol) RemoveMulticastRoute(addresses stack.UnicastSourceAndMulticastDestination) tcpip.Error { 1659 if err := p.validateUnicastSourceAndMulticastDestination(addresses); err != nil { 1660 return err 1661 } 1662 1663 if removed := p.multicastRouteTable.RemoveInstalledRoute(addresses); !removed { 1664 return &tcpip.ErrHostUnreachable{} 1665 } 1666 1667 return nil 1668 } 1669 1670 // EnableMulticastForwarding implements 1671 // stack.MulticastForwardingNetworkProtocol.EnableMulticastForwarding. 1672 func (p *protocol) EnableMulticastForwarding(disp stack.MulticastForwardingEventDispatcher) (bool, tcpip.Error) { 1673 p.mu.Lock() 1674 defer p.mu.Unlock() 1675 1676 if p.multicastForwardingDisp != nil { 1677 return true, nil 1678 } 1679 1680 if disp == nil { 1681 return false, &tcpip.ErrInvalidOptionValue{} 1682 } 1683 1684 p.multicastForwardingDisp = disp 1685 return false, nil 1686 } 1687 1688 // DisableMulticastForwarding implements 1689 // stack.MulticastForwardingNetworkProtocol.DisableMulticastForwarding. 1690 func (p *protocol) DisableMulticastForwarding() { 1691 p.mu.Lock() 1692 defer p.mu.Unlock() 1693 1694 p.multicastForwardingDisp = nil 1695 p.multicastRouteTable.RemoveAllInstalledRoutes() 1696 } 1697 1698 // MulticastRouteLastUsedTime implements 1699 // stack.MulticastForwardingNetworkProtocol. 1700 func (p *protocol) MulticastRouteLastUsedTime(addresses stack.UnicastSourceAndMulticastDestination) (tcpip.MonotonicTime, tcpip.Error) { 1701 if err := p.validateUnicastSourceAndMulticastDestination(addresses); err != nil { 1702 return tcpip.MonotonicTime{}, err 1703 } 1704 1705 timestamp, found := p.multicastRouteTable.GetLastUsedTimestamp(addresses) 1706 1707 if !found { 1708 return tcpip.MonotonicTime{}, &tcpip.ErrHostUnreachable{} 1709 } 1710 1711 return timestamp, nil 1712 } 1713 1714 func (p *protocol) forwardPendingMulticastPacket(pkt *stack.PacketBuffer, installedRoute *multicast.InstalledRoute) { 1715 defer pkt.DecRef() 1716 1717 // Attempt to forward the packet using the endpoint that it originally 1718 // arrived on. This ensures that the packet is only forwarded if it 1719 // matches the route's expected input interface (see 5a of RFC 1812 section 1720 // 5.2.1.3). 1721 ep, ok := p.getEndpointForNIC(pkt.NICID) 1722 1723 if !ok { 1724 // The endpoint that the packet arrived on no longer exists. Silently 1725 // drop the pkt. 1726 return 1727 } 1728 1729 if !ep.MulticastForwarding() { 1730 return 1731 } 1732 1733 ep.handleForwardingError(ep.forwardValidatedMulticastPacket(pkt, installedRoute)) 1734 } 1735 1736 func (p *protocol) isUnicastAddress(addr tcpip.Address) bool { 1737 if addr.BitLen() != header.IPv4AddressSizeBits { 1738 return false 1739 } 1740 1741 if addr == header.IPv4Any || addr == header.IPv4Broadcast { 1742 return false 1743 } 1744 1745 if p.isSubnetLocalBroadcastAddress(addr) { 1746 return false 1747 } 1748 return !header.IsV4MulticastAddress(addr) 1749 } 1750 1751 func (p *protocol) isSubnetLocalBroadcastAddress(addr tcpip.Address) bool { 1752 p.mu.RLock() 1753 defer p.mu.RUnlock() 1754 1755 for _, e := range p.eps { 1756 if addressEndpoint := e.AcquireAssignedAddress(addr, false /* createTemp */, stack.NeverPrimaryEndpoint, true /* readOnly */); addressEndpoint != nil { 1757 subnet := addressEndpoint.Subnet() 1758 if subnet.IsBroadcast(addr) { 1759 return true 1760 } 1761 } 1762 } 1763 return false 1764 } 1765 1766 // parseAndValidate parses the packet (including its transport layer header) and 1767 // returns the parsed IP header. 1768 // 1769 // Returns true if the IP header was successfully parsed. 1770 func (p *protocol) parseAndValidate(pkt *stack.PacketBuffer) (*buffer.View, bool) { 1771 transProtoNum, hasTransportHdr, ok := p.Parse(pkt) 1772 if !ok { 1773 return nil, false 1774 } 1775 1776 h := header.IPv4(pkt.NetworkHeader().Slice()) 1777 // Do not include the link header's size when calculating the size of the IP 1778 // packet. 1779 if !h.IsValid(pkt.Size() - len(pkt.LinkHeader().Slice())) { 1780 return nil, false 1781 } 1782 1783 if !pkt.RXChecksumValidated && !h.IsChecksumValid() { 1784 return nil, false 1785 } 1786 1787 if hasTransportHdr { 1788 p.parseTransport(pkt, transProtoNum) 1789 } 1790 1791 return pkt.NetworkHeader().View(), true 1792 } 1793 1794 func (p *protocol) parseTransport(pkt *stack.PacketBuffer, transProtoNum tcpip.TransportProtocolNumber) { 1795 if transProtoNum == header.ICMPv4ProtocolNumber { 1796 // The transport layer will handle transport layer parsing errors. 1797 _ = parse.ICMPv4(pkt) 1798 return 1799 } 1800 1801 switch err := p.stack.ParsePacketBufferTransport(transProtoNum, pkt); err { 1802 case stack.ParsedOK: 1803 case stack.UnknownTransportProtocol, stack.TransportLayerParseError: 1804 // The transport layer will handle unknown protocols and transport layer 1805 // parsing errors. 1806 default: 1807 panic(fmt.Sprintf("unexpected error parsing transport header = %d", err)) 1808 } 1809 } 1810 1811 // Parse implements stack.NetworkProtocol. 1812 func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) { 1813 if ok := parse.IPv4(pkt); !ok { 1814 return 0, false, false 1815 } 1816 1817 ipHdr := header.IPv4(pkt.NetworkHeader().Slice()) 1818 return ipHdr.TransportProtocol(), !ipHdr.More() && ipHdr.FragmentOffset() == 0, true 1819 } 1820 1821 // allowICMPReply reports whether an ICMP reply with provided type and code may 1822 // be sent following the rate mask options and global ICMP rate limiter. 1823 func (p *protocol) allowICMPReply(icmpType header.ICMPv4Type, code header.ICMPv4Code) bool { 1824 // Mimic linux and never rate limit for PMTU discovery. 1825 // https://github.com/torvalds/linux/blob/9e9fb7655ed585da8f468e29221f0ba194a5f613/net/ipv4/icmp.c#L288 1826 if icmpType == header.ICMPv4DstUnreachable && code == header.ICMPv4FragmentationNeeded { 1827 return true 1828 } 1829 p.mu.RLock() 1830 defer p.mu.RUnlock() 1831 1832 if _, ok := p.icmpRateLimitedTypes[icmpType]; ok { 1833 return p.stack.AllowICMPMessage() 1834 } 1835 return true 1836 } 1837 1838 // SendRejectionError implements stack.RejectIPv4WithHandler. 1839 func (p *protocol) SendRejectionError(pkt *stack.PacketBuffer, rejectWith stack.RejectIPv4WithICMPType, inputHook bool) tcpip.Error { 1840 switch rejectWith { 1841 case stack.RejectIPv4WithICMPNetUnreachable: 1842 return p.returnError(&icmpReasonNetworkUnreachable{}, pkt, inputHook) 1843 case stack.RejectIPv4WithICMPHostUnreachable: 1844 return p.returnError(&icmpReasonHostUnreachable{}, pkt, inputHook) 1845 case stack.RejectIPv4WithICMPPortUnreachable: 1846 return p.returnError(&icmpReasonPortUnreachable{}, pkt, inputHook) 1847 case stack.RejectIPv4WithICMPNetProhibited: 1848 return p.returnError(&icmpReasonNetworkProhibited{}, pkt, inputHook) 1849 case stack.RejectIPv4WithICMPHostProhibited: 1850 return p.returnError(&icmpReasonHostProhibited{}, pkt, inputHook) 1851 case stack.RejectIPv4WithICMPAdminProhibited: 1852 return p.returnError(&icmpReasonAdministrativelyProhibited{}, pkt, inputHook) 1853 default: 1854 panic(fmt.Sprintf("unhandled %[1]T = %[1]d", rejectWith)) 1855 } 1856 } 1857 1858 // calculateNetworkMTU calculates the network-layer payload MTU based on the 1859 // link-layer payload mtu. 1860 func calculateNetworkMTU(linkMTU, networkHeaderSize uint32) (uint32, tcpip.Error) { 1861 if linkMTU < header.IPv4MinimumMTU { 1862 return 0, &tcpip.ErrInvalidEndpointState{} 1863 } 1864 1865 // As per RFC 791 section 3.1, an IPv4 header cannot exceed 60 bytes in 1866 // length: 1867 // The maximal internet header is 60 octets, and a typical internet header 1868 // is 20 octets, allowing a margin for headers of higher level protocols. 1869 if networkHeaderSize > header.IPv4MaximumHeaderSize { 1870 return 0, &tcpip.ErrMalformedHeader{} 1871 } 1872 1873 networkMTU := linkMTU 1874 if networkMTU > MaxTotalSize { 1875 networkMTU = MaxTotalSize 1876 } 1877 1878 return networkMTU - networkHeaderSize, nil 1879 } 1880 1881 func packetMustBeFragmented(pkt *stack.PacketBuffer, networkMTU uint32) bool { 1882 payload := len(pkt.TransportHeader().Slice()) + pkt.Data().Size() 1883 return pkt.GSOOptions.Type == stack.GSONone && uint32(payload) > networkMTU 1884 } 1885 1886 // addressToUint32 translates an IPv4 address into its little endian uint32 1887 // representation. 1888 // 1889 // This function does the same thing as binary.LittleEndian.Uint32 but operates 1890 // on a tcpip.Address (a string) without the need to convert it to a byte slice, 1891 // which would cause an allocation. 1892 func addressToUint32(addr tcpip.Address) uint32 { 1893 addrBytes := addr.As4() 1894 _ = addrBytes[3] // bounds check hint to compiler 1895 return uint32(addrBytes[0]) | uint32(addrBytes[1])<<8 | uint32(addrBytes[2])<<16 | uint32(addrBytes[3])<<24 1896 } 1897 1898 // hashRoute calculates a hash value for the given source/destination pair using 1899 // the addresses, transport protocol number and a 32-bit number to generate the 1900 // hash. 1901 func hashRoute(srcAddr, dstAddr tcpip.Address, protocol tcpip.TransportProtocolNumber, hashIV uint32) uint32 { 1902 a := addressToUint32(srcAddr) 1903 b := addressToUint32(dstAddr) 1904 return hash.Hash3Words(a, b, uint32(protocol), hashIV) 1905 } 1906 1907 // Options holds options to configure a new protocol. 1908 type Options struct { 1909 // IGMP holds options for IGMP. 1910 IGMP IGMPOptions 1911 1912 // AllowExternalLoopbackTraffic indicates that inbound loopback packets (i.e. 1913 // martian loopback packets) should be accepted. 1914 AllowExternalLoopbackTraffic bool 1915 } 1916 1917 // NewProtocolWithOptions returns an IPv4 network protocol. 1918 func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory { 1919 ids := make([]atomicbitops.Uint32, buckets) 1920 1921 // Randomly initialize hashIV and the ids. 1922 r := hash.RandN32(1 + buckets) 1923 for i := range ids { 1924 ids[i] = atomicbitops.FromUint32(r[i]) 1925 } 1926 hashIV := r[buckets] 1927 1928 return func(s *stack.Stack) stack.NetworkProtocol { 1929 p := &protocol{ 1930 stack: s, 1931 ids: ids, 1932 hashIV: hashIV, 1933 defaultTTL: atomicbitops.FromUint32(DefaultTTL), 1934 options: opts, 1935 } 1936 p.fragmentation = fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock(), p) 1937 p.eps = make(map[tcpip.NICID]*endpoint) 1938 // Set ICMP rate limiting to Linux defaults. 1939 // See https://man7.org/linux/man-pages/man7/icmp.7.html. 1940 p.icmpRateLimitedTypes = map[header.ICMPv4Type]struct{}{ 1941 header.ICMPv4DstUnreachable: {}, 1942 header.ICMPv4SrcQuench: {}, 1943 header.ICMPv4TimeExceeded: {}, 1944 header.ICMPv4ParamProblem: {}, 1945 } 1946 if err := p.multicastRouteTable.Init(multicast.DefaultConfig(s.Clock())); err != nil { 1947 panic(fmt.Sprintf("p.multicastRouteTable.Init(_): %s", err)) 1948 } 1949 return p 1950 } 1951 } 1952 1953 // NewProtocol is equivalent to NewProtocolWithOptions with an empty Options. 1954 func NewProtocol(s *stack.Stack) stack.NetworkProtocol { 1955 return NewProtocolWithOptions(Options{})(s) 1956 } 1957 1958 func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeader header.IPv4) (*stack.PacketBuffer, bool) { 1959 fragPkt, offset, copied, more := pf.BuildNextFragment() 1960 fragPkt.NetworkProtocolNumber = ProtocolNumber 1961 1962 originalIPHeaderLength := len(originalIPHeader) 1963 nextFragIPHeader := header.IPv4(fragPkt.NetworkHeader().Push(originalIPHeaderLength)) 1964 fragPkt.NetworkProtocolNumber = ProtocolNumber 1965 1966 if copied := copy(nextFragIPHeader, originalIPHeader); copied != len(originalIPHeader) { 1967 panic(fmt.Sprintf("wrong number of bytes copied into fragmentIPHeaders: got = %d, want = %d", copied, originalIPHeaderLength)) 1968 } 1969 1970 flags := originalIPHeader.Flags() 1971 if more { 1972 flags |= header.IPv4FlagMoreFragments 1973 } 1974 nextFragIPHeader.SetFlagsFragmentOffset(flags, uint16(offset)) 1975 nextFragIPHeader.SetTotalLength(uint16(nextFragIPHeader.HeaderLength()) + uint16(copied)) 1976 nextFragIPHeader.SetChecksum(0) 1977 nextFragIPHeader.SetChecksum(^nextFragIPHeader.CalculateChecksum()) 1978 1979 return fragPkt, more 1980 } 1981 1982 // optionAction describes possible actions that may be taken on an option 1983 // while processing it. 1984 type optionAction uint8 1985 1986 const ( 1987 // optionRemove says that the option should not be in the output option set. 1988 optionRemove optionAction = iota 1989 1990 // optionProcess says that the option should be fully processed. 1991 optionProcess 1992 1993 // optionVerify says the option should be checked and passed unchanged. 1994 optionVerify 1995 1996 // optionPass says to pass the output set without checking. 1997 optionPass 1998 ) 1999 2000 // optionActions list what to do for each option in a given scenario. 2001 type optionActions struct { 2002 // timestamp controls what to do with a Timestamp option. 2003 timestamp optionAction 2004 2005 // recordRoute controls what to do with a Record Route option. 2006 recordRoute optionAction 2007 2008 // routerAlert controls what to do with a Router Alert option. 2009 routerAlert optionAction 2010 2011 // unknown controls what to do with an unknown option. 2012 unknown optionAction 2013 } 2014 2015 // optionsUsage specifies the ways options may be operated upon for a given 2016 // scenario during packet processing. 2017 type optionsUsage interface { 2018 actions() optionActions 2019 } 2020 2021 // optionUsageVerify implements optionsUsage for when we just want to check 2022 // fragments. Don't change anything, just check and reject if bad. No 2023 // replacement options are generated. 2024 type optionUsageVerify struct{} 2025 2026 // actions implements optionsUsage. 2027 func (*optionUsageVerify) actions() optionActions { 2028 return optionActions{ 2029 timestamp: optionVerify, 2030 recordRoute: optionVerify, 2031 routerAlert: optionVerify, 2032 unknown: optionRemove, 2033 } 2034 } 2035 2036 // optionUsageReceive implements optionsUsage for packets we will pass 2037 // to the transport layer (with the exception of Echo requests). 2038 type optionUsageReceive struct{} 2039 2040 // actions implements optionsUsage. 2041 func (*optionUsageReceive) actions() optionActions { 2042 return optionActions{ 2043 timestamp: optionProcess, 2044 recordRoute: optionProcess, 2045 routerAlert: optionVerify, 2046 unknown: optionPass, 2047 } 2048 } 2049 2050 // optionUsageForward implements optionsUsage for packets about to be forwarded. 2051 // All options are passed on regardless of whether we recognise them, however 2052 // we do process the Timestamp and Record Route options. 2053 type optionUsageForward struct{} 2054 2055 // actions implements optionsUsage. 2056 func (*optionUsageForward) actions() optionActions { 2057 return optionActions{ 2058 timestamp: optionProcess, 2059 recordRoute: optionProcess, 2060 routerAlert: optionVerify, 2061 unknown: optionPass, 2062 } 2063 } 2064 2065 // optionUsageEcho implements optionsUsage for echo packet processing. 2066 // Only Timestamp and RecordRoute are processed and sent back. 2067 type optionUsageEcho struct{} 2068 2069 // actions implements optionsUsage. 2070 func (*optionUsageEcho) actions() optionActions { 2071 return optionActions{ 2072 timestamp: optionProcess, 2073 recordRoute: optionProcess, 2074 routerAlert: optionVerify, 2075 unknown: optionRemove, 2076 } 2077 } 2078 2079 // handleTimestamp does any required processing on a Timestamp option 2080 // in place. 2081 func handleTimestamp(tsOpt header.IPv4OptionTimestamp, localAddress tcpip.Address, clock tcpip.Clock, usage optionsUsage) *header.IPv4OptParameterProblem { 2082 flags := tsOpt.Flags() 2083 var entrySize uint8 2084 switch flags { 2085 case header.IPv4OptionTimestampOnlyFlag: 2086 entrySize = header.IPv4OptionTimestampSize 2087 case 2088 header.IPv4OptionTimestampWithIPFlag, 2089 header.IPv4OptionTimestampWithPredefinedIPFlag: 2090 entrySize = header.IPv4OptionTimestampWithAddrSize 2091 default: 2092 return &header.IPv4OptParameterProblem{ 2093 Pointer: header.IPv4OptTSOFLWAndFLGOffset, 2094 NeedICMP: true, 2095 } 2096 } 2097 2098 pointer := tsOpt.Pointer() 2099 // RFC 791 page 22 states: "The smallest legal value is 5." 2100 // Since the pointer is 1 based, and the header is 4 bytes long the 2101 // pointer must point beyond the header therefore 4 or less is bad. 2102 if pointer <= header.IPv4OptionTimestampHdrLength { 2103 return &header.IPv4OptParameterProblem{ 2104 Pointer: header.IPv4OptTSPointerOffset, 2105 NeedICMP: true, 2106 } 2107 } 2108 // To simplify processing below, base further work on the array of timestamps 2109 // beyond the header, rather than on the whole option. Also to aid 2110 // calculations set 'nextSlot' to be 0 based as in the packet it is 1 based. 2111 nextSlot := pointer - (header.IPv4OptionTimestampHdrLength + 1) 2112 optLen := tsOpt.Size() 2113 dataLength := optLen - header.IPv4OptionTimestampHdrLength 2114 2115 // In the section below, we verify the pointer, length and overflow counter 2116 // fields of the option. The distinction is in which byte you return as being 2117 // in error in the ICMP packet. Offsets 1 (length), 2 pointer) 2118 // or 3 (overflowed counter). 2119 // 2120 // The following RFC sections cover this section: 2121 // 2122 // RFC 791 (page 22): 2123 // If there is some room but not enough room for a full timestamp 2124 // to be inserted, or the overflow count itself overflows, the 2125 // original datagram is considered to be in error and is discarded. 2126 // In either case an ICMP parameter problem message may be sent to 2127 // the source host [3]. 2128 // 2129 // You can get this situation in two ways. Firstly if the data area is not 2130 // a multiple of the entry size or secondly, if the pointer is not at a 2131 // multiple of the entry size. The wording of the RFC suggests that 2132 // this is not an error until you actually run out of space. 2133 if pointer > optLen { 2134 // RFC 791 (page 22) says we should switch to using the overflow count. 2135 // If the timestamp data area is already full (the pointer exceeds 2136 // the length) the datagram is forwarded without inserting the 2137 // timestamp, but the overflow count is incremented by one. 2138 if flags == header.IPv4OptionTimestampWithPredefinedIPFlag { 2139 // By definition we have nothing to do. 2140 return nil 2141 } 2142 2143 if tsOpt.IncOverflow() != 0 { 2144 return nil 2145 } 2146 // The overflow count is also full. 2147 return &header.IPv4OptParameterProblem{ 2148 Pointer: header.IPv4OptTSOFLWAndFLGOffset, 2149 NeedICMP: true, 2150 } 2151 } 2152 if nextSlot+entrySize > dataLength { 2153 // The data area isn't full but there isn't room for a new entry. 2154 // Either Length or Pointer could be bad. 2155 if false { 2156 // We must select Pointer for Linux compatibility, even if 2157 // only the length is bad. 2158 // The Linux code is at (in October 2020) 2159 // https://github.com/torvalds/linux/blob/bbf5c979011a099af5dc76498918ed7df445635b/net/ipv4/ip_options.c#L367-L370 2160 // if (optptr[2]+3 > optlen) { 2161 // pp_ptr = optptr + 2; 2162 // goto error; 2163 // } 2164 // which doesn't distinguish between which of optptr[2] or optlen 2165 // is wrong, but just arbitrarily decides on optptr+2. 2166 if dataLength%entrySize != 0 { 2167 // The Data section size should be a multiple of the expected 2168 // timestamp entry size. 2169 return &header.IPv4OptParameterProblem{ 2170 Pointer: header.IPv4OptionLengthOffset, 2171 NeedICMP: false, 2172 } 2173 } 2174 // If the size is OK, the pointer must be corrupted. 2175 } 2176 return &header.IPv4OptParameterProblem{ 2177 Pointer: header.IPv4OptTSPointerOffset, 2178 NeedICMP: true, 2179 } 2180 } 2181 2182 if usage.actions().timestamp == optionProcess { 2183 tsOpt.UpdateTimestamp(localAddress, clock) 2184 } 2185 return nil 2186 } 2187 2188 // handleRecordRoute checks and processes a Record route option. It is much 2189 // like the timestamp type 1 option, but without timestamps. The passed in 2190 // address is stored in the option in the correct spot if possible. 2191 func handleRecordRoute(rrOpt header.IPv4OptionRecordRoute, localAddress tcpip.Address, usage optionsUsage) *header.IPv4OptParameterProblem { 2192 optlen := rrOpt.Size() 2193 2194 if optlen < header.IPv4AddressSize+header.IPv4OptionRecordRouteHdrLength { 2195 return &header.IPv4OptParameterProblem{ 2196 Pointer: header.IPv4OptionLengthOffset, 2197 NeedICMP: true, 2198 } 2199 } 2200 2201 pointer := rrOpt.Pointer() 2202 // RFC 791 page 20 states: 2203 // The pointer is relative to this option, and the 2204 // smallest legal value for the pointer is 4. 2205 // Since the pointer is 1 based, and the header is 3 bytes long the 2206 // pointer must point beyond the header therefore 3 or less is bad. 2207 if pointer <= header.IPv4OptionRecordRouteHdrLength { 2208 return &header.IPv4OptParameterProblem{ 2209 Pointer: header.IPv4OptRRPointerOffset, 2210 NeedICMP: true, 2211 } 2212 } 2213 2214 // RFC 791 page 21 says 2215 // If the route data area is already full (the pointer exceeds the 2216 // length) the datagram is forwarded without inserting the address 2217 // into the recorded route. If there is some room but not enough 2218 // room for a full address to be inserted, the original datagram is 2219 // considered to be in error and is discarded. In either case an 2220 // ICMP parameter problem message may be sent to the source 2221 // host. 2222 // The use of the words "In either case" suggests that a 'full' RR option 2223 // could generate an ICMP at every hop after it fills up. We chose to not 2224 // do this (as do most implementations). It is probable that the inclusion 2225 // of these words is a copy/paste error from the timestamp option where 2226 // there are two failure reasons given. 2227 if pointer > optlen { 2228 return nil 2229 } 2230 2231 // The data area isn't full but there isn't room for a new entry. 2232 // Either Length or Pointer could be bad. We must select Pointer for Linux 2233 // compatibility, even if only the length is bad. NB. pointer is 1 based. 2234 if pointer+header.IPv4AddressSize > optlen+1 { 2235 if false { 2236 // This is what we would do if we were not being Linux compatible. 2237 // Check for bad pointer or length value. Must be a multiple of 4 after 2238 // accounting for the 3 byte header and not within that header. 2239 // RFC 791, page 20 says: 2240 // The pointer is relative to this option, and the 2241 // smallest legal value for the pointer is 4. 2242 // 2243 // A recorded route is composed of a series of internet addresses. 2244 // Each internet address is 32 bits or 4 octets. 2245 // Linux skips this test so we must too. See Linux code at: 2246 // https://github.com/torvalds/linux/blob/bbf5c979011a099af5dc76498918ed7df445635b/net/ipv4/ip_options.c#L338-L341 2247 // if (optptr[2]+3 > optlen) { 2248 // pp_ptr = optptr + 2; 2249 // goto error; 2250 // } 2251 if (optlen-header.IPv4OptionRecordRouteHdrLength)%header.IPv4AddressSize != 0 { 2252 // Length is bad, not on integral number of slots. 2253 return &header.IPv4OptParameterProblem{ 2254 Pointer: header.IPv4OptionLengthOffset, 2255 NeedICMP: true, 2256 } 2257 } 2258 // If not length, the fault must be with the pointer. 2259 } 2260 return &header.IPv4OptParameterProblem{ 2261 Pointer: header.IPv4OptRRPointerOffset, 2262 NeedICMP: true, 2263 } 2264 } 2265 if usage.actions().recordRoute == optionVerify { 2266 return nil 2267 } 2268 rrOpt.StoreAddress(localAddress) 2269 return nil 2270 } 2271 2272 // handleRouterAlert performs sanity checks on a Router Alert option. 2273 func handleRouterAlert(raOpt header.IPv4OptionRouterAlert) *header.IPv4OptParameterProblem { 2274 // Only the zero value is acceptable, as per RFC 2113, section 2.1: 2275 // Value: A two octet code with the following values: 2276 // 0 - Router shall examine packet 2277 // 1-65535 - Reserved 2278 if raOpt.Value() != header.IPv4OptionRouterAlertValue { 2279 return &header.IPv4OptParameterProblem{ 2280 Pointer: header.IPv4OptionRouterAlertValueOffset, 2281 NeedICMP: true, 2282 } 2283 } 2284 return nil 2285 } 2286 2287 type optionTracker struct { 2288 timestamp bool 2289 recordRoute bool 2290 routerAlert bool 2291 } 2292 2293 // processIPOptions parses the IPv4 options and produces a new set of options 2294 // suitable for use in the next step of packet processing as informed by usage. 2295 // The original will not be touched. 2296 // 2297 // If there were no errors during parsing, the new set of options is returned as 2298 // a new buffer. 2299 func (e *endpoint) processIPOptions(pkt *stack.PacketBuffer, opts header.IPv4Options, usage optionsUsage) (header.IPv4Options, optionTracker, *header.IPv4OptParameterProblem) { 2300 stats := e.stats.ip 2301 optIter := opts.MakeIterator() 2302 2303 // Except NOP, each option must only appear at most once (RFC 791 section 3.1, 2304 // at the definition of every type). 2305 // Keep track of each option we find to enable duplicate option detection. 2306 var seenOptions [math.MaxUint8 + 1]bool 2307 2308 // TODO(https://gvisor.dev/issue/4586): This will need tweaking when we start 2309 // really forwarding packets as we may need to get two addresses, for rx and 2310 // tx interfaces. We will also have to take usage into account. 2311 localAddress := e.MainAddress().Address 2312 if localAddress.BitLen() == 0 { 2313 h := header.IPv4(pkt.NetworkHeader().Slice()) 2314 dstAddr := h.DestinationAddress() 2315 if pkt.NetworkPacketInfo.LocalAddressBroadcast || header.IsV4MulticastAddress(dstAddr) { 2316 return nil, optionTracker{}, &header.IPv4OptParameterProblem{ 2317 NeedICMP: false, 2318 } 2319 } 2320 localAddress = dstAddr 2321 } 2322 2323 var optionsProcessed optionTracker 2324 for { 2325 option, done, optProblem := optIter.Next() 2326 if done || optProblem != nil { 2327 return optIter.Finalize(), optionsProcessed, optProblem 2328 } 2329 optType := option.Type() 2330 if optType == header.IPv4OptionNOPType { 2331 optIter.PushNOPOrEnd(optType) 2332 continue 2333 } 2334 if optType == header.IPv4OptionListEndType { 2335 optIter.PushNOPOrEnd(optType) 2336 return optIter.Finalize(), optionsProcessed, nil 2337 } 2338 2339 // check for repeating options (multiple NOPs are OK) 2340 if seenOptions[optType] { 2341 return nil, optionTracker{}, &header.IPv4OptParameterProblem{ 2342 Pointer: optIter.ErrCursor, 2343 NeedICMP: true, 2344 } 2345 } 2346 seenOptions[optType] = true 2347 2348 optLen, optProblem := func() (int, *header.IPv4OptParameterProblem) { 2349 switch option := option.(type) { 2350 case *header.IPv4OptionTimestamp: 2351 stats.OptionTimestampReceived.Increment() 2352 optionsProcessed.timestamp = true 2353 if usage.actions().timestamp != optionRemove { 2354 clock := e.protocol.stack.Clock() 2355 newBuffer := optIter.InitReplacement(option) 2356 optProblem := handleTimestamp(header.IPv4OptionTimestamp(newBuffer), localAddress, clock, usage) 2357 return len(newBuffer), optProblem 2358 } 2359 2360 case *header.IPv4OptionRecordRoute: 2361 stats.OptionRecordRouteReceived.Increment() 2362 optionsProcessed.recordRoute = true 2363 if usage.actions().recordRoute != optionRemove { 2364 newBuffer := optIter.InitReplacement(option) 2365 optProblem := handleRecordRoute(header.IPv4OptionRecordRoute(newBuffer), localAddress, usage) 2366 return len(newBuffer), optProblem 2367 } 2368 2369 case *header.IPv4OptionRouterAlert: 2370 stats.OptionRouterAlertReceived.Increment() 2371 optionsProcessed.routerAlert = true 2372 if usage.actions().routerAlert != optionRemove { 2373 newBuffer := optIter.InitReplacement(option) 2374 optProblem := handleRouterAlert(header.IPv4OptionRouterAlert(newBuffer)) 2375 return len(newBuffer), optProblem 2376 } 2377 2378 default: 2379 stats.OptionUnknownReceived.Increment() 2380 if usage.actions().unknown == optionPass { 2381 return len(optIter.InitReplacement(option)), nil 2382 } 2383 } 2384 return 0, nil 2385 }() 2386 2387 if optProblem != nil { 2388 optProblem.Pointer += optIter.ErrCursor 2389 return nil, optionTracker{}, optProblem 2390 } 2391 optIter.ConsumeBuffer(optLen) 2392 } 2393 }