github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/tcpip/network/ipv4/ipv4.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package ipv4 contains the implementation of the ipv4 network protocol. 16 package ipv4 17 18 import ( 19 "fmt" 20 "math" 21 "reflect" 22 "time" 23 24 "github.com/metacubex/gvisor/pkg/atomicbitops" 25 "github.com/metacubex/gvisor/pkg/buffer" 26 "github.com/metacubex/gvisor/pkg/sync" 27 "github.com/metacubex/gvisor/pkg/tcpip" 28 "github.com/metacubex/gvisor/pkg/tcpip/header" 29 "github.com/metacubex/gvisor/pkg/tcpip/header/parse" 30 "github.com/metacubex/gvisor/pkg/tcpip/network/hash" 31 "github.com/metacubex/gvisor/pkg/tcpip/network/internal/fragmentation" 32 "github.com/metacubex/gvisor/pkg/tcpip/network/internal/ip" 33 "github.com/metacubex/gvisor/pkg/tcpip/network/internal/multicast" 34 "github.com/metacubex/gvisor/pkg/tcpip/stack" 35 ) 36 37 const ( 38 // ReassembleTimeout is the time a packet stays in the reassembly 39 // system before being evicted. 40 // As per RFC 791 section 3.2: 41 // The current recommendation for the initial timer setting is 15 seconds. 42 // This may be changed as experience with this protocol accumulates. 43 // 44 // Considering that it is an old recommendation, we use the same reassembly 45 // timeout that linux defines, which is 30 seconds: 46 // https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/include/net/ip.h#L138 47 ReassembleTimeout = 30 * time.Second 48 49 // ProtocolNumber is the ipv4 protocol number. 50 ProtocolNumber = header.IPv4ProtocolNumber 51 52 // MaxTotalSize is maximum size that can be encoded in the 16-bit 53 // TotalLength field of the ipv4 header. 54 MaxTotalSize = 0xffff 55 56 // DefaultTTL is the default time-to-live value for this endpoint. 57 DefaultTTL = 64 58 59 // buckets is the number of identifier buckets. 60 buckets = 2048 61 62 // The size of a fragment block, in bytes, as per RFC 791 section 3.1, 63 // page 14. 64 fragmentblockSize = 8 65 ) 66 67 const ( 68 forwardingDisabled = 0 69 forwardingEnabled = 1 70 ) 71 72 var ipv4BroadcastAddr = header.IPv4Broadcast.WithPrefix() 73 74 var _ stack.LinkResolvableNetworkEndpoint = (*endpoint)(nil) 75 var _ stack.ForwardingNetworkEndpoint = (*endpoint)(nil) 76 var _ stack.MulticastForwardingNetworkEndpoint = (*endpoint)(nil) 77 var _ stack.GroupAddressableEndpoint = (*endpoint)(nil) 78 var _ stack.AddressableEndpoint = (*endpoint)(nil) 79 var _ stack.NetworkEndpoint = (*endpoint)(nil) 80 var _ IGMPEndpoint = (*endpoint)(nil) 81 82 type endpoint struct { 83 nic stack.NetworkInterface 84 dispatcher stack.TransportDispatcher 85 protocol *protocol 86 stats sharedStats 87 88 // enabled is set to 1 when the endpoint is enabled and 0 when it is 89 // disabled. 90 enabled atomicbitops.Uint32 91 92 // forwarding is set to forwardingEnabled when the endpoint has forwarding 93 // enabled and forwardingDisabled when it is disabled. 94 forwarding atomicbitops.Uint32 95 96 // multicastForwarding is set to forwardingEnabled when the endpoint has 97 // forwarding enabled and forwardingDisabled when it is disabled. 98 // 99 // TODO(https://gvisor.dev/issue/7338): Implement support for multicast 100 //forwarding. Currently, setting this value to true is a no-op. 101 multicastForwarding atomicbitops.Uint32 102 103 // mu protects below. 104 mu sync.RWMutex 105 106 // +checklocks:mu 107 addressableEndpointState stack.AddressableEndpointState 108 109 // +checklocks:mu 110 igmp igmpState 111 } 112 113 // SetIGMPVersion implements IGMPEndpoint. 114 func (e *endpoint) SetIGMPVersion(v IGMPVersion) IGMPVersion { 115 e.mu.Lock() 116 defer e.mu.Unlock() 117 return e.setIGMPVersionLocked(v) 118 } 119 120 // GetIGMPVersion implements IGMPEndpoint. 121 func (e *endpoint) GetIGMPVersion() IGMPVersion { 122 e.mu.RLock() 123 defer e.mu.RUnlock() 124 return e.getIGMPVersionLocked() 125 } 126 127 // +checklocks:e.mu 128 // +checklocksalias:e.igmp.ep.mu=e.mu 129 func (e *endpoint) setIGMPVersionLocked(v IGMPVersion) IGMPVersion { 130 return e.igmp.setVersion(v) 131 } 132 133 // +checklocksread:e.mu 134 // +checklocksalias:e.igmp.ep.mu=e.mu 135 func (e *endpoint) getIGMPVersionLocked() IGMPVersion { 136 return e.igmp.getVersion() 137 } 138 139 // HandleLinkResolutionFailure implements stack.LinkResolvableNetworkEndpoint. 140 func (e *endpoint) HandleLinkResolutionFailure(pkt *stack.PacketBuffer) { 141 // If we are operating as a router, return an ICMP error to the original 142 // packet's sender. 143 if pkt.NetworkPacketInfo.IsForwardedPacket { 144 // TODO(gvisor.dev/issue/6005): Propagate asynchronously generated ICMP 145 // errors to local endpoints. 146 e.protocol.returnError(&icmpReasonHostUnreachable{}, pkt, false /* deliveredLocally */) 147 e.stats.ip.Forwarding.Errors.Increment() 148 e.stats.ip.Forwarding.HostUnreachable.Increment() 149 return 150 } 151 // handleControl expects the entire offending packet to be in the packet 152 // buffer's data field. 153 pkt = stack.NewPacketBuffer(stack.PacketBufferOptions{ 154 Payload: pkt.ToBuffer(), 155 }) 156 defer pkt.DecRef() 157 pkt.NICID = e.nic.ID() 158 pkt.NetworkProtocolNumber = ProtocolNumber 159 // Use the same control type as an ICMPv4 destination host unreachable error 160 // since the host is considered unreachable if we cannot resolve the link 161 // address to the next hop. 162 e.handleControl(&icmpv4DestinationHostUnreachableSockError{}, pkt) 163 } 164 165 // NewEndpoint creates a new ipv4 endpoint. 166 func (p *protocol) NewEndpoint(nic stack.NetworkInterface, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint { 167 e := &endpoint{ 168 nic: nic, 169 dispatcher: dispatcher, 170 protocol: p, 171 } 172 e.mu.Lock() 173 e.addressableEndpointState.Init(e, stack.AddressableEndpointStateOptions{HiddenWhileDisabled: false}) 174 e.igmp.init(e) 175 e.mu.Unlock() 176 177 tcpip.InitStatCounters(reflect.ValueOf(&e.stats.localStats).Elem()) 178 179 stackStats := p.stack.Stats() 180 e.stats.ip.Init(&e.stats.localStats.IP, &stackStats.IP) 181 e.stats.icmp.init(&e.stats.localStats.ICMP, &stackStats.ICMP.V4) 182 e.stats.igmp.init(&e.stats.localStats.IGMP, &stackStats.IGMP) 183 184 p.mu.Lock() 185 p.eps[nic.ID()] = e 186 p.mu.Unlock() 187 188 return e 189 } 190 191 func (p *protocol) findEndpointWithAddress(addr tcpip.Address) *endpoint { 192 p.mu.RLock() 193 defer p.mu.RUnlock() 194 195 for _, e := range p.eps { 196 if addressEndpoint := e.AcquireAssignedAddress(addr, false /* allowTemp */, stack.NeverPrimaryEndpoint); addressEndpoint != nil { 197 addressEndpoint.DecRef() 198 return e 199 } 200 } 201 202 return nil 203 } 204 205 func (p *protocol) getEndpointForNIC(id tcpip.NICID) (*endpoint, bool) { 206 p.mu.RLock() 207 defer p.mu.RUnlock() 208 ep, ok := p.eps[id] 209 return ep, ok 210 } 211 212 func (p *protocol) forgetEndpoint(nicID tcpip.NICID) { 213 p.mu.Lock() 214 defer p.mu.Unlock() 215 delete(p.eps, nicID) 216 } 217 218 // Forwarding implements stack.ForwardingNetworkEndpoint. 219 func (e *endpoint) Forwarding() bool { 220 return e.forwarding.Load() == forwardingEnabled 221 } 222 223 // setForwarding sets the forwarding status for the endpoint. 224 // 225 // Returns the previous forwarding status. 226 func (e *endpoint) setForwarding(v bool) bool { 227 forwarding := uint32(forwardingDisabled) 228 if v { 229 forwarding = forwardingEnabled 230 } 231 232 return e.forwarding.Swap(forwarding) != forwardingDisabled 233 } 234 235 // SetForwarding implements stack.ForwardingNetworkEndpoint. 236 func (e *endpoint) SetForwarding(forwarding bool) bool { 237 e.mu.Lock() 238 defer e.mu.Unlock() 239 240 prevForwarding := e.setForwarding(forwarding) 241 if prevForwarding == forwarding { 242 return prevForwarding 243 } 244 245 if forwarding { 246 // There does not seem to be an RFC requirement for a node to join the all 247 // routers multicast address but 248 // https://www.iana.org/assignments/multicast-addresses/multicast-addresses.xhtml 249 // specifies the address as a group for all routers on a subnet so we join 250 // the group here. 251 if err := e.joinGroupLocked(header.IPv4AllRoutersGroup); err != nil { 252 // joinGroupLocked only returns an error if the group address is not a 253 // valid IPv4 multicast address. 254 panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv4AllRoutersGroup, err)) 255 } 256 257 return prevForwarding 258 } 259 260 switch err := e.leaveGroupLocked(header.IPv4AllRoutersGroup).(type) { 261 case nil: 262 case *tcpip.ErrBadLocalAddress: 263 // The endpoint may have already left the multicast group. 264 default: 265 panic(fmt.Sprintf("e.leaveGroupLocked(%s): %s", header.IPv4AllRoutersGroup, err)) 266 } 267 268 return prevForwarding 269 } 270 271 // MulticastForwarding implements stack.MulticastForwardingNetworkEndpoint. 272 func (e *endpoint) MulticastForwarding() bool { 273 return e.multicastForwarding.Load() == forwardingEnabled 274 } 275 276 // SetMulticastForwarding implements stack.MulticastForwardingNetworkEndpoint. 277 func (e *endpoint) SetMulticastForwarding(forwarding bool) bool { 278 updatedForwarding := uint32(forwardingDisabled) 279 if forwarding { 280 updatedForwarding = forwardingEnabled 281 } 282 283 return e.multicastForwarding.Swap(updatedForwarding) != forwardingDisabled 284 } 285 286 // Enable implements stack.NetworkEndpoint. 287 func (e *endpoint) Enable() tcpip.Error { 288 e.mu.Lock() 289 defer e.mu.Unlock() 290 return e.enableLocked() 291 } 292 293 // +checklocks:e.mu 294 // +checklocksalias:e.igmp.ep.mu=e.mu 295 func (e *endpoint) enableLocked() tcpip.Error { 296 // If the NIC is not enabled, the endpoint can't do anything meaningful so 297 // don't enable the endpoint. 298 if !e.nic.Enabled() { 299 return &tcpip.ErrNotPermitted{} 300 } 301 302 // If the endpoint is already enabled, there is nothing for it to do. 303 if !e.setEnabled(true) { 304 return nil 305 } 306 307 // Must be called after Enabled has already been set. 308 e.addressableEndpointState.OnNetworkEndpointEnabledChanged() 309 310 // Create an endpoint to receive broadcast packets on this interface. 311 ep, err := e.addressableEndpointState.AddAndAcquirePermanentAddress(ipv4BroadcastAddr, stack.AddressProperties{PEB: stack.NeverPrimaryEndpoint}) 312 if err != nil { 313 return err 314 } 315 // We have no need for the address endpoint. 316 ep.DecRef() 317 318 // Groups may have been joined while the endpoint was disabled, or the 319 // endpoint may have left groups from the perspective of IGMP when the 320 // endpoint was disabled. Either way, we need to let routers know to 321 // send us multicast traffic. 322 e.igmp.initializeAll() 323 324 // As per RFC 1122 section 3.3.7, all hosts should join the all-hosts 325 // multicast group. Note, the IANA calls the all-hosts multicast group the 326 // all-systems multicast group. 327 if err := e.joinGroupLocked(header.IPv4AllSystems); err != nil { 328 // joinGroupLocked only returns an error if the group address is not a valid 329 // IPv4 multicast address. 330 panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv4AllSystems, err)) 331 } 332 333 return nil 334 } 335 336 // Enabled implements stack.NetworkEndpoint. 337 func (e *endpoint) Enabled() bool { 338 return e.nic.Enabled() && e.isEnabled() 339 } 340 341 // isEnabled returns true if the endpoint is enabled, regardless of the 342 // enabled status of the NIC. 343 func (e *endpoint) isEnabled() bool { 344 return e.enabled.Load() == 1 345 } 346 347 // setEnabled sets the enabled status for the endpoint. 348 // 349 // Returns true if the enabled status was updated. 350 func (e *endpoint) setEnabled(v bool) bool { 351 if v { 352 return e.enabled.Swap(1) == 0 353 } 354 return e.enabled.Swap(0) == 1 355 } 356 357 // Disable implements stack.NetworkEndpoint. 358 func (e *endpoint) Disable() { 359 e.mu.Lock() 360 defer e.mu.Unlock() 361 e.disableLocked() 362 } 363 364 // +checklocks:e.mu 365 // +checklocksalias:e.igmp.ep.mu=e.mu 366 func (e *endpoint) disableLocked() { 367 if !e.isEnabled() { 368 return 369 } 370 371 // The endpoint may have already left the multicast group. 372 switch err := e.leaveGroupLocked(header.IPv4AllSystems).(type) { 373 case nil, *tcpip.ErrBadLocalAddress: 374 default: 375 panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv4AllSystems, err)) 376 } 377 378 // Leave groups from the perspective of IGMP so that routers know that 379 // we are no longer interested in the group. 380 e.igmp.softLeaveAll() 381 382 // The address may have already been removed. 383 switch err := e.addressableEndpointState.RemovePermanentAddress(ipv4BroadcastAddr.Address); err.(type) { 384 case nil, *tcpip.ErrBadLocalAddress: 385 default: 386 panic(fmt.Sprintf("unexpected error when removing address = %s: %s", ipv4BroadcastAddr.Address, err)) 387 } 388 389 // Reset the IGMP V1 present flag. 390 // 391 // If the node comes back up on the same network, it will re-learn that it 392 // needs to perform IGMPv1. 393 e.igmp.resetV1Present() 394 395 if !e.setEnabled(false) { 396 panic("should have only done work to disable the endpoint if it was enabled") 397 } 398 399 // Must be called after Enabled has been set. 400 e.addressableEndpointState.OnNetworkEndpointEnabledChanged() 401 } 402 403 // emitMulticastEvent emits a multicast forwarding event using the provided 404 // generator if a valid event dispatcher exists. 405 func (e *endpoint) emitMulticastEvent(eventGenerator func(stack.MulticastForwardingEventDispatcher)) { 406 e.protocol.mu.RLock() 407 defer e.protocol.mu.RUnlock() 408 409 if mcastDisp := e.protocol.multicastForwardingDisp; mcastDisp != nil { 410 eventGenerator(mcastDisp) 411 } 412 } 413 414 // DefaultTTL is the default time-to-live value for this endpoint. 415 func (e *endpoint) DefaultTTL() uint8 { 416 return e.protocol.DefaultTTL() 417 } 418 419 // MTU implements stack.NetworkEndpoint. It returns the link-layer MTU minus the 420 // network layer max header length. 421 func (e *endpoint) MTU() uint32 { 422 networkMTU, err := calculateNetworkMTU(e.nic.MTU(), header.IPv4MinimumSize) 423 if err != nil { 424 return 0 425 } 426 return networkMTU 427 } 428 429 // MaxHeaderLength returns the maximum length needed by ipv4 headers (and 430 // underlying protocols). 431 func (e *endpoint) MaxHeaderLength() uint16 { 432 return e.nic.MaxHeaderLength() + header.IPv4MaximumHeaderSize 433 } 434 435 // NetworkProtocolNumber implements stack.NetworkEndpoint. 436 func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber { 437 return e.protocol.Number() 438 } 439 440 // getID returns a random uint16 number (other than zero) to be used as ID in 441 // the IPv4 header. 442 func (e *endpoint) getID() uint16 { 443 rng := e.protocol.stack.SecureRNG() 444 id := rng.Uint16() 445 for id == 0 { 446 id = rng.Uint16() 447 } 448 return id 449 } 450 451 func (e *endpoint) addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams, options header.IPv4OptionsSerializer) tcpip.Error { 452 hdrLen := header.IPv4MinimumSize 453 var optLen int 454 if options != nil { 455 optLen = int(options.Length()) 456 } 457 hdrLen += optLen 458 if hdrLen > header.IPv4MaximumHeaderSize { 459 return &tcpip.ErrMessageTooLong{} 460 } 461 ipH := header.IPv4(pkt.NetworkHeader().Push(hdrLen)) 462 length := pkt.Size() 463 if length > math.MaxUint16 { 464 return &tcpip.ErrMessageTooLong{} 465 } 466 // RFC 6864 section 4.3 mandates uniqueness of ID values for non-atomic 467 // datagrams. Since the DF bit is never being set here, all datagrams 468 // are non-atomic and need an ID. 469 ipH.Encode(&header.IPv4Fields{ 470 TotalLength: uint16(length), 471 ID: e.getID(), 472 TTL: params.TTL, 473 TOS: params.TOS, 474 Protocol: uint8(params.Protocol), 475 SrcAddr: srcAddr, 476 DstAddr: dstAddr, 477 Options: options, 478 }) 479 ipH.SetChecksum(^ipH.CalculateChecksum()) 480 pkt.NetworkProtocolNumber = ProtocolNumber 481 return nil 482 } 483 484 // handleFragments fragments pkt and calls the handler function on each 485 // fragment. It returns the number of fragments handled and the number of 486 // fragments left to be processed. The IP header must already be present in the 487 // original packet. 488 func (e *endpoint) handleFragments(_ *stack.Route, networkMTU uint32, pkt *stack.PacketBuffer, handler func(*stack.PacketBuffer) tcpip.Error) (int, int, tcpip.Error) { 489 // Round the MTU down to align to 8 bytes. 490 fragmentPayloadSize := networkMTU &^ 7 491 networkHeader := header.IPv4(pkt.NetworkHeader().Slice()) 492 pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadSize, pkt.AvailableHeaderBytes()+len(networkHeader)) 493 defer pf.Release() 494 495 var n int 496 for { 497 fragPkt, more := buildNextFragment(&pf, networkHeader) 498 err := handler(fragPkt) 499 fragPkt.DecRef() 500 if err != nil { 501 return n, pf.RemainingFragmentCount() + 1, err 502 } 503 n++ 504 if !more { 505 return n, pf.RemainingFragmentCount(), nil 506 } 507 } 508 } 509 510 // WritePacket writes a packet to the given destination address and protocol. 511 func (e *endpoint) WritePacket(r *stack.Route, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) tcpip.Error { 512 if err := e.addIPHeader(r.LocalAddress(), r.RemoteAddress(), pkt, params, nil /* options */); err != nil { 513 return err 514 } 515 516 return e.writePacket(r, pkt) 517 } 518 519 func (e *endpoint) writePacket(r *stack.Route, pkt *stack.PacketBuffer) tcpip.Error { 520 netHeader := header.IPv4(pkt.NetworkHeader().Slice()) 521 dstAddr := netHeader.DestinationAddress() 522 523 // iptables filtering. All packets that reach here are locally 524 // generated. 525 outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 526 if ok := e.protocol.stack.IPTables().CheckOutput(pkt, r, outNicName); !ok { 527 // iptables is telling us to drop the packet. 528 e.stats.ip.IPTablesOutputDropped.Increment() 529 return nil 530 } 531 532 // If the packet is manipulated as per DNAT Output rules, handle packet 533 // based on destination address and do not send the packet to link 534 // layer. 535 // 536 // We should do this for every packet, rather than only DNATted packets, but 537 // removing this check short circuits broadcasts before they are sent out to 538 // other hosts. 539 if newDstAddr := netHeader.DestinationAddress(); dstAddr != newDstAddr { 540 if ep := e.protocol.findEndpointWithAddress(newDstAddr); ep != nil { 541 // Since we rewrote the packet but it is being routed back to us, we 542 // can safely assume the checksum is valid. 543 ep.handleLocalPacket(pkt, true /* canSkipRXChecksum */) 544 return nil 545 } 546 } 547 548 return e.writePacketPostRouting(r, pkt, false /* headerIncluded */) 549 } 550 551 func (e *endpoint) writePacketPostRouting(r *stack.Route, pkt *stack.PacketBuffer, headerIncluded bool) tcpip.Error { 552 if r.Loop()&stack.PacketLoop != 0 { 553 // If the packet was generated by the stack (not a raw/packet endpoint 554 // where a packet may be written with the header included), then we can 555 // safely assume the checksum is valid. 556 e.handleLocalPacket(pkt, !headerIncluded /* canSkipRXChecksum */) 557 } 558 if r.Loop()&stack.PacketOut == 0 { 559 return nil 560 } 561 562 // Postrouting NAT can only change the source address, and does not alter the 563 // route or outgoing interface of the packet. 564 outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 565 if ok := e.protocol.stack.IPTables().CheckPostrouting(pkt, r, e, outNicName); !ok { 566 // iptables is telling us to drop the packet. 567 e.stats.ip.IPTablesPostroutingDropped.Increment() 568 return nil 569 } 570 571 stats := e.stats.ip 572 573 networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(len(pkt.NetworkHeader().Slice()))) 574 if err != nil { 575 stats.OutgoingPacketErrors.Increment() 576 return err 577 } 578 579 if packetMustBeFragmented(pkt, networkMTU) { 580 h := header.IPv4(pkt.NetworkHeader().Slice()) 581 if h.Flags()&header.IPv4FlagDontFragment != 0 && pkt.NetworkPacketInfo.IsForwardedPacket { 582 // TODO(gvisor.dev/issue/5919): Handle error condition in which DontFragment 583 // is set but the packet must be fragmented for the non-forwarding case. 584 return &tcpip.ErrMessageTooLong{} 585 } 586 sent, remain, err := e.handleFragments(r, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) tcpip.Error { 587 // TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each 588 // fragment one by one using WritePacket() (current strategy) or if we 589 // want to create a PacketBufferList from the fragments and feed it to 590 // WritePackets(). It'll be faster but cost more memory. 591 return e.nic.WritePacket(r, fragPkt) 592 }) 593 stats.PacketsSent.IncrementBy(uint64(sent)) 594 stats.OutgoingPacketErrors.IncrementBy(uint64(remain)) 595 return err 596 } 597 598 if err := e.nic.WritePacket(r, pkt); err != nil { 599 stats.OutgoingPacketErrors.Increment() 600 return err 601 } 602 stats.PacketsSent.Increment() 603 return nil 604 } 605 606 // WriteHeaderIncludedPacket implements stack.NetworkEndpoint. 607 func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) tcpip.Error { 608 // The packet already has an IP header, but there are a few required 609 // checks. 610 h, ok := pkt.Data().PullUp(header.IPv4MinimumSize) 611 if !ok { 612 return &tcpip.ErrMalformedHeader{} 613 } 614 615 hdrLen := header.IPv4(h).HeaderLength() 616 if hdrLen < header.IPv4MinimumSize { 617 return &tcpip.ErrMalformedHeader{} 618 } 619 620 h, ok = pkt.Data().PullUp(int(hdrLen)) 621 if !ok { 622 return &tcpip.ErrMalformedHeader{} 623 } 624 ipH := header.IPv4(h) 625 626 // Always set the total length. 627 pktSize := pkt.Data().Size() 628 ipH.SetTotalLength(uint16(pktSize)) 629 630 // Set the source address when zero. 631 if ipH.SourceAddress() == header.IPv4Any { 632 ipH.SetSourceAddress(r.LocalAddress()) 633 } 634 635 // Set the packet ID when zero. 636 if ipH.ID() == 0 { 637 // RFC 6864 section 4.3 mandates uniqueness of ID values for 638 // non-atomic datagrams, so assign an ID to all such datagrams 639 // according to the definition given in RFC 6864 section 4. 640 if ipH.Flags()&header.IPv4FlagDontFragment == 0 || ipH.Flags()&header.IPv4FlagMoreFragments != 0 || ipH.FragmentOffset() > 0 { 641 ipH.SetID(e.getID()) 642 } 643 } 644 645 // Always set the checksum. 646 ipH.SetChecksum(0) 647 ipH.SetChecksum(^ipH.CalculateChecksum()) 648 649 // Populate the packet buffer's network header and don't allow an invalid 650 // packet to be sent. 651 // 652 // Note that parsing only makes sure that the packet is well formed as per the 653 // wire format. We also want to check if the header's fields are valid before 654 // sending the packet. 655 if !parse.IPv4(pkt) || !header.IPv4(pkt.NetworkHeader().Slice()).IsValid(pktSize) { 656 return &tcpip.ErrMalformedHeader{} 657 } 658 659 return e.writePacketPostRouting(r, pkt, true /* headerIncluded */) 660 } 661 662 // forwardPacketWithRoute emits the pkt using the provided route. 663 // 664 // If updateOptions is true, then the IP options will be updated in the copied 665 // pkt using the outgoing endpoint. Otherwise, the caller is responsible for 666 // updating the options. 667 // 668 // This method should be invoked by the endpoint that received the pkt. 669 func (e *endpoint) forwardPacketWithRoute(route *stack.Route, pkt *stack.PacketBuffer, updateOptions bool) ip.ForwardingError { 670 h := header.IPv4(pkt.NetworkHeader().Slice()) 671 stk := e.protocol.stack 672 673 inNicName := stk.FindNICNameFromID(e.nic.ID()) 674 outNicName := stk.FindNICNameFromID(route.NICID()) 675 if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok { 676 // iptables is telling us to drop the packet. 677 e.stats.ip.IPTablesForwardDropped.Increment() 678 return nil 679 } 680 681 // We need to do a deep copy of the IP packet because 682 // WriteHeaderIncludedPacket may modify the packet buffer, but we do 683 // not own it. 684 // 685 // TODO(https://gvisor.dev/issue/7473): For multicast, only create one deep 686 // copy and then clone. 687 newPkt := pkt.DeepCopyForForwarding(int(route.MaxHeaderLength())) 688 newHdr := header.IPv4(newPkt.NetworkHeader().Slice()) 689 defer newPkt.DecRef() 690 691 forwardToEp, ok := e.protocol.getEndpointForNIC(route.NICID()) 692 if !ok { 693 return &ip.ErrUnknownOutputEndpoint{} 694 } 695 696 if updateOptions { 697 if err := forwardToEp.updateOptionsForForwarding(newPkt); err != nil { 698 return err 699 } 700 } 701 702 ttl := h.TTL() 703 // As per RFC 791 page 30, Time to Live, 704 // 705 // This field must be decreased at each point that the internet header 706 // is processed to reflect the time spent processing the datagram. 707 // Even if no local information is available on the time actually 708 // spent, the field must be decremented by 1. 709 newHdr.SetTTL(ttl - 1) 710 // We perform a full checksum as we may have updated options above. The IP 711 // header is relatively small so this is not expected to be an expensive 712 // operation. 713 newHdr.SetChecksum(0) 714 newHdr.SetChecksum(^newHdr.CalculateChecksum()) 715 716 switch err := forwardToEp.writePacketPostRouting(route, newPkt, true /* headerIncluded */); err.(type) { 717 case nil: 718 return nil 719 case *tcpip.ErrMessageTooLong: 720 // As per RFC 792, page 4, Destination Unreachable: 721 // 722 // Another case is when a datagram must be fragmented to be forwarded by a 723 // gateway yet the Don't Fragment flag is on. In this case the gateway must 724 // discard the datagram and may return a destination unreachable message. 725 // 726 // WriteHeaderIncludedPacket checks for the presence of the Don't Fragment bit 727 // while sending the packet and returns this error iff fragmentation is 728 // necessary and the bit is also set. 729 _ = e.protocol.returnError(&icmpReasonFragmentationNeeded{}, pkt, false /* deliveredLocally */) 730 return &ip.ErrMessageTooLong{} 731 case *tcpip.ErrNoBufferSpace: 732 return &ip.ErrOutgoingDeviceNoBufferSpace{} 733 default: 734 return &ip.ErrOther{Err: err} 735 } 736 } 737 738 // forwardUnicastPacket attempts to forward a packet to its final destination. 739 func (e *endpoint) forwardUnicastPacket(pkt *stack.PacketBuffer) ip.ForwardingError { 740 hView := pkt.NetworkHeader().View() 741 defer hView.Release() 742 h := header.IPv4(hView.AsSlice()) 743 744 dstAddr := h.DestinationAddress() 745 746 if err := validateAddressesForForwarding(h); err != nil { 747 return err 748 } 749 750 ttl := h.TTL() 751 if ttl == 0 { 752 // As per RFC 792 page 6, Time Exceeded Message, 753 // 754 // If the gateway processing a datagram finds the time to live field 755 // is zero it must discard the datagram. The gateway may also notify 756 // the source host via the time exceeded message. 757 // 758 // We return the original error rather than the result of returning 759 // the ICMP packet because the original error is more relevant to 760 // the caller. 761 _ = e.protocol.returnError(&icmpReasonTTLExceeded{}, pkt, false /* deliveredLocally */) 762 return &ip.ErrTTLExceeded{} 763 } 764 765 if err := e.updateOptionsForForwarding(pkt); err != nil { 766 return err 767 } 768 769 stk := e.protocol.stack 770 771 // Check if the destination is owned by the stack. 772 if ep := e.protocol.findEndpointWithAddress(dstAddr); ep != nil { 773 inNicName := stk.FindNICNameFromID(e.nic.ID()) 774 outNicName := stk.FindNICNameFromID(ep.nic.ID()) 775 if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok { 776 // iptables is telling us to drop the packet. 777 e.stats.ip.IPTablesForwardDropped.Increment() 778 return nil 779 } 780 781 // The packet originally arrived on e so provide its NIC as the input NIC. 782 ep.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 783 return nil 784 } 785 786 r, err := stk.FindRoute(0, tcpip.Address{}, dstAddr, ProtocolNumber, false /* multicastLoop */) 787 switch err.(type) { 788 case nil: 789 // TODO(https://gvisor.dev/issues/8105): We should not observe ErrHostUnreachable from route 790 // lookups. 791 case *tcpip.ErrHostUnreachable, *tcpip.ErrNetworkUnreachable: 792 // We return the original error rather than the result of returning 793 // the ICMP packet because the original error is more relevant to 794 // the caller. 795 _ = e.protocol.returnError(&icmpReasonNetworkUnreachable{}, pkt, false /* deliveredLocally */) 796 return &ip.ErrHostUnreachable{} 797 default: 798 return &ip.ErrOther{Err: err} 799 } 800 defer r.Release() 801 802 // TODO(https://gvisor.dev/issue/7472): Unicast IP options should be updated 803 // using the output endpoint (instead of the input endpoint). In particular, 804 // RFC 1812 section 5.2.1 states the following: 805 // 806 // Processing of certain IP options requires that the router insert its IP 807 // address into the option. As noted in Section [5.2.4], the address 808 // inserted MUST be the address of the logical interface on which the 809 // packet is sent or the router's router-id if the packet is sent over an 810 // unnumbered interface. Thus, processing of these options cannot be 811 // completed until after the output interface is chosen. 812 return e.forwardPacketWithRoute(r, pkt, false /* updateOptions */) 813 } 814 815 // HandlePacket is called by the link layer when new ipv4 packets arrive for 816 // this endpoint. 817 func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) { 818 stats := e.stats.ip 819 820 stats.PacketsReceived.Increment() 821 822 if !e.isEnabled() { 823 stats.DisabledPacketsReceived.Increment() 824 return 825 } 826 827 hView, ok := e.protocol.parseAndValidate(pkt) 828 if !ok { 829 stats.MalformedPacketsReceived.Increment() 830 return 831 } 832 h := header.IPv4(hView.AsSlice()) 833 defer hView.Release() 834 835 if !e.nic.IsLoopback() { 836 if !e.protocol.options.AllowExternalLoopbackTraffic { 837 if header.IsV4LoopbackAddress(h.SourceAddress()) { 838 stats.InvalidSourceAddressesReceived.Increment() 839 return 840 } 841 842 if header.IsV4LoopbackAddress(h.DestinationAddress()) { 843 stats.InvalidDestinationAddressesReceived.Increment() 844 return 845 } 846 } 847 848 if e.protocol.stack.HandleLocal() { 849 addressEndpoint := e.AcquireAssignedAddress(header.IPv4(pkt.NetworkHeader().Slice()).SourceAddress(), e.nic.Promiscuous(), stack.CanBePrimaryEndpoint) 850 if addressEndpoint != nil { 851 addressEndpoint.DecRef() 852 853 // The source address is one of our own, so we never should have gotten 854 // a packet like this unless HandleLocal is false or our NIC is the 855 // loopback interface. 856 stats.InvalidSourceAddressesReceived.Increment() 857 return 858 } 859 } 860 861 // Loopback traffic skips the prerouting chain. 862 inNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 863 if ok := e.protocol.stack.IPTables().CheckPrerouting(pkt, e, inNicName); !ok { 864 // iptables is telling us to drop the packet. 865 stats.IPTablesPreroutingDropped.Increment() 866 return 867 } 868 } 869 870 e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 871 } 872 873 // handleLocalPacket is like HandlePacket except it does not perform the 874 // prerouting iptables hook or check for loopback traffic that originated from 875 // outside of the netstack (i.e. martian loopback packets). 876 func (e *endpoint) handleLocalPacket(pkt *stack.PacketBuffer, canSkipRXChecksum bool) { 877 stats := e.stats.ip 878 stats.PacketsReceived.Increment() 879 880 pkt = pkt.CloneToInbound() 881 defer pkt.DecRef() 882 pkt.RXChecksumValidated = canSkipRXChecksum 883 884 hView, ok := e.protocol.parseAndValidate(pkt) 885 if !ok { 886 stats.MalformedPacketsReceived.Increment() 887 return 888 } 889 h := header.IPv4(hView.AsSlice()) 890 defer hView.Release() 891 892 e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 893 } 894 895 func validateAddressesForForwarding(h header.IPv4) ip.ForwardingError { 896 srcAddr := h.SourceAddress() 897 898 // As per RFC 5735 section 3, 899 // 900 // 0.0.0.0/8 - Addresses in this block refer to source hosts on "this" 901 // network. Address 0.0.0.0/32 may be used as a source address for this 902 // host on this network; other addresses within 0.0.0.0/8 may be used to 903 // refer to specified hosts on this network ([RFC1122], Section 3.2.1.3). 904 // 905 // And RFC 6890 section 2.2.2, 906 // 907 // +----------------------+----------------------------+ 908 // | Attribute | Value | 909 // +----------------------+----------------------------+ 910 // | Address Block | 0.0.0.0/8 | 911 // | Name | "This host on this network"| 912 // | RFC | [RFC1122], Section 3.2.1.3 | 913 // | Allocation Date | September 1981 | 914 // | Termination Date | N/A | 915 // | Source | True | 916 // | Destination | False | 917 // | Forwardable | False | 918 // | Global | False | 919 // | Reserved-by-Protocol | True | 920 // +----------------------+----------------------------+ 921 if header.IPv4CurrentNetworkSubnet.Contains(srcAddr) { 922 return &ip.ErrInitializingSourceAddress{} 923 } 924 925 // As per RFC 3927 section 7, 926 // 927 // A router MUST NOT forward a packet with an IPv4 Link-Local source or 928 // destination address, irrespective of the router's default route 929 // configuration or routes obtained from dynamic routing protocols. 930 // 931 // A router which receives a packet with an IPv4 Link-Local source or 932 // destination address MUST NOT forward the packet. This prevents 933 // forwarding of packets back onto the network segment from which they 934 // originated, or to any other segment. 935 if header.IsV4LinkLocalUnicastAddress(srcAddr) { 936 return &ip.ErrLinkLocalSourceAddress{} 937 } 938 if dstAddr := h.DestinationAddress(); header.IsV4LinkLocalUnicastAddress(dstAddr) || header.IsV4LinkLocalMulticastAddress(dstAddr) { 939 return &ip.ErrLinkLocalDestinationAddress{} 940 } 941 return nil 942 } 943 944 // forwardMulticastPacket validates a multicast pkt and attempts to forward it. 945 // 946 // This method should be invoked for incoming multicast packets using the 947 // endpoint that received the packet. 948 func (e *endpoint) forwardMulticastPacket(h header.IPv4, pkt *stack.PacketBuffer) ip.ForwardingError { 949 if err := validateAddressesForForwarding(h); err != nil { 950 return err 951 } 952 953 if opts := h.Options(); len(opts) != 0 { 954 // Check if the options are valid, but don't mutate them. This corresponds 955 // to step 3 of RFC 1812 section 5.2.1.1. 956 if _, _, optProblem := e.processIPOptions(pkt, opts, &optionUsageVerify{}); optProblem != nil { 957 // Per RFC 1812 section 4.3.2.7, an ICMP error message should not be 958 // sent for: 959 // 960 // A packet destined to an IP broadcast or IP multicast address. 961 // 962 // Note that protocol.returnError also enforces this requirement. 963 // However, we intentionally omit it here since this path is multicast 964 // only. 965 return &ip.ErrParameterProblem{} 966 } 967 } 968 969 routeKey := stack.UnicastSourceAndMulticastDestination{ 970 Source: h.SourceAddress(), 971 Destination: h.DestinationAddress(), 972 } 973 974 // The pkt has been validated. Consequently, if a route is not found, then 975 // the pkt can safely be queued. 976 result, hasBufferSpace := e.protocol.multicastRouteTable.GetRouteOrInsertPending(routeKey, pkt) 977 978 if !hasBufferSpace { 979 // Unable to queue the pkt. Silently drop it. 980 return &ip.ErrNoMulticastPendingQueueBufferSpace{} 981 } 982 983 switch result.GetRouteResultState { 984 case multicast.InstalledRouteFound: 985 // Attempt to forward the pkt using an existing route. 986 return e.forwardValidatedMulticastPacket(pkt, result.InstalledRoute) 987 case multicast.NoRouteFoundAndPendingInserted: 988 e.emitMulticastEvent(func(disp stack.MulticastForwardingEventDispatcher) { 989 disp.OnMissingRoute(stack.MulticastPacketContext{ 990 stack.UnicastSourceAndMulticastDestination{h.SourceAddress(), h.DestinationAddress()}, 991 e.nic.ID(), 992 }) 993 }) 994 case multicast.PacketQueuedInPendingRoute: 995 default: 996 panic(fmt.Sprintf("unexpected GetRouteResultState: %s", result.GetRouteResultState)) 997 } 998 return &ip.ErrHostUnreachable{} 999 } 1000 1001 func (e *endpoint) updateOptionsForForwarding(pkt *stack.PacketBuffer) ip.ForwardingError { 1002 h := header.IPv4(pkt.NetworkHeader().Slice()) 1003 if opts := h.Options(); len(opts) != 0 { 1004 newOpts, _, optProblem := e.processIPOptions(pkt, opts, &optionUsageForward{}) 1005 if optProblem != nil { 1006 if optProblem.NeedICMP { 1007 // Note that this will not emit an ICMP error if the destination is 1008 // multicast. 1009 _ = e.protocol.returnError(&icmpReasonParamProblem{ 1010 pointer: optProblem.Pointer, 1011 }, pkt, false /* deliveredLocally */) 1012 } 1013 return &ip.ErrParameterProblem{} 1014 } 1015 copied := copy(opts, newOpts) 1016 if copied != len(newOpts) { 1017 panic(fmt.Sprintf("copied %d bytes of new options, expected %d bytes", copied, len(newOpts))) 1018 } 1019 // Since in forwarding we handle all options, including copying those we 1020 // do not recognise, the options region should remain the same size which 1021 // simplifies processing. As we MAY receive a packet with a lot of padded 1022 // bytes after the "end of options list" byte, make sure we copy 1023 // them as the legal padding value (0). 1024 for i := copied; i < len(opts); i++ { 1025 // Pad with 0 (EOL). RFC 791 page 23 says "The padding is zero". 1026 opts[i] = byte(header.IPv4OptionListEndType) 1027 } 1028 } 1029 return nil 1030 } 1031 1032 // forwardValidatedMulticastPacket attempts to forward the pkt using the 1033 // provided installedRoute. 1034 // 1035 // This method should be invoked by the endpoint that received the pkt. 1036 func (e *endpoint) forwardValidatedMulticastPacket(pkt *stack.PacketBuffer, installedRoute *multicast.InstalledRoute) ip.ForwardingError { 1037 // Per RFC 1812 section 5.2.1.3, 1038 // 1039 // Based on the IP source and destination addresses found in the datagram 1040 // header, the router determines whether the datagram has been received 1041 // on the proper interface for forwarding. If not, the datagram is 1042 // dropped silently. 1043 if e.nic.ID() != installedRoute.ExpectedInputInterface { 1044 h := header.IPv4(pkt.NetworkHeader().Slice()) 1045 e.emitMulticastEvent(func(disp stack.MulticastForwardingEventDispatcher) { 1046 disp.OnUnexpectedInputInterface(stack.MulticastPacketContext{ 1047 stack.UnicastSourceAndMulticastDestination{h.SourceAddress(), h.DestinationAddress()}, 1048 e.nic.ID(), 1049 }, installedRoute.ExpectedInputInterface) 1050 }) 1051 return &ip.ErrUnexpectedMulticastInputInterface{} 1052 } 1053 1054 for _, outgoingInterface := range installedRoute.OutgoingInterfaces { 1055 if err := e.forwardMulticastPacketForOutgoingInterface(pkt, outgoingInterface); err != nil { 1056 e.handleForwardingError(err) 1057 continue 1058 } 1059 // The pkt was successfully forwarded. Mark the route as used. 1060 installedRoute.SetLastUsedTimestamp(e.protocol.stack.Clock().NowMonotonic()) 1061 } 1062 return nil 1063 } 1064 1065 // forwardMulticastPacketForOutgoingInterface attempts to forward the pkt out 1066 // of the provided outgoingInterface. 1067 // 1068 // This method should be invoked by the endpoint that received the pkt. 1069 func (e *endpoint) forwardMulticastPacketForOutgoingInterface(pkt *stack.PacketBuffer, outgoingInterface stack.MulticastRouteOutgoingInterface) ip.ForwardingError { 1070 h := header.IPv4(pkt.NetworkHeader().Slice()) 1071 1072 // Per RFC 1812 section 5.2.1.3, 1073 // 1074 // A copy of the multicast datagram is forwarded out each outgoing 1075 // interface whose minimum TTL value is less than or equal to the TTL 1076 // value in the datagram header. 1077 // 1078 // Copying of the packet is deferred to forwardPacketWithRoute since unicast 1079 // and multicast both require a copy. 1080 if outgoingInterface.MinTTL > h.TTL() { 1081 return &ip.ErrTTLExceeded{} 1082 } 1083 1084 route := e.protocol.stack.NewRouteForMulticast(outgoingInterface.ID, h.DestinationAddress(), e.NetworkProtocolNumber()) 1085 1086 if route == nil { 1087 // Failed to convert to a stack.Route. This likely means that the outgoing 1088 // endpoint no longer exists. 1089 return &ip.ErrHostUnreachable{} 1090 } 1091 defer route.Release() 1092 1093 return e.forwardPacketWithRoute(route, pkt, true /* updateOptions */) 1094 } 1095 1096 func (e *endpoint) handleValidatedPacket(h header.IPv4, pkt *stack.PacketBuffer, inNICName string) { 1097 pkt.NICID = e.nic.ID() 1098 1099 // Raw socket packets are delivered based solely on the transport protocol 1100 // number. We only require that the packet be valid IPv4, and that they not 1101 // be fragmented. 1102 if !h.More() && h.FragmentOffset() == 0 { 1103 e.dispatcher.DeliverRawPacket(h.TransportProtocol(), pkt) 1104 } 1105 1106 stats := e.stats 1107 stats.ip.ValidPacketsReceived.Increment() 1108 1109 srcAddr := h.SourceAddress() 1110 dstAddr := h.DestinationAddress() 1111 1112 // As per RFC 1122 section 3.2.1.3: 1113 // When a host sends any datagram, the IP source address MUST 1114 // be one of its own IP addresses (but not a broadcast or 1115 // multicast address). 1116 if srcAddr == header.IPv4Broadcast || header.IsV4MulticastAddress(srcAddr) { 1117 stats.ip.InvalidSourceAddressesReceived.Increment() 1118 return 1119 } 1120 // Make sure the source address is not a subnet-local broadcast address. 1121 if addressEndpoint := e.AcquireAssignedAddress(srcAddr, false /* createTemp */, stack.NeverPrimaryEndpoint); addressEndpoint != nil { 1122 subnet := addressEndpoint.Subnet() 1123 addressEndpoint.DecRef() 1124 if subnet.IsBroadcast(srcAddr) { 1125 stats.ip.InvalidSourceAddressesReceived.Increment() 1126 return 1127 } 1128 } 1129 1130 if header.IsV4MulticastAddress(dstAddr) { 1131 // Handle all packets destined to a multicast address separately. Unlike 1132 // unicast, these packets can be both delivered locally and forwarded. See 1133 // RFC 1812 section 5.2.3 for details regarding the forwarding/local 1134 // delivery decision. 1135 1136 multicastForwarding := e.MulticastForwarding() && e.protocol.multicastForwarding() 1137 1138 if multicastForwarding { 1139 e.handleForwardingError(e.forwardMulticastPacket(h, pkt)) 1140 } 1141 1142 if e.IsInGroup(dstAddr) { 1143 e.deliverPacketLocally(h, pkt, inNICName) 1144 return 1145 } 1146 1147 if !multicastForwarding { 1148 // Only consider the destination address invalid if we didn't attempt to 1149 // forward the pkt and it was not delivered locally. 1150 stats.ip.InvalidDestinationAddressesReceived.Increment() 1151 } 1152 return 1153 } 1154 1155 // Before we do any processing, check if the packet was received as some 1156 // sort of broadcast. 1157 // 1158 // If the packet is destined for this device, then it should be delivered 1159 // locally. Otherwise, if forwarding is enabled, it should be forwarded. 1160 if addressEndpoint := e.AcquireAssignedAddress(dstAddr, e.nic.Promiscuous(), stack.CanBePrimaryEndpoint); addressEndpoint != nil { 1161 subnet := addressEndpoint.AddressWithPrefix().Subnet() 1162 addressEndpoint.DecRef() 1163 pkt.NetworkPacketInfo.LocalAddressBroadcast = subnet.IsBroadcast(dstAddr) || dstAddr == header.IPv4Broadcast 1164 e.deliverPacketLocally(h, pkt, inNICName) 1165 } else if e.Forwarding() { 1166 e.handleForwardingError(e.forwardUnicastPacket(pkt)) 1167 } else { 1168 stats.ip.InvalidDestinationAddressesReceived.Increment() 1169 } 1170 } 1171 1172 // handleForwardingError processes the provided err and increments any relevant 1173 // counters. 1174 func (e *endpoint) handleForwardingError(err ip.ForwardingError) { 1175 stats := e.stats.ip 1176 switch err := err.(type) { 1177 case nil: 1178 return 1179 case *ip.ErrInitializingSourceAddress: 1180 stats.Forwarding.InitializingSource.Increment() 1181 case *ip.ErrLinkLocalSourceAddress: 1182 stats.Forwarding.LinkLocalSource.Increment() 1183 case *ip.ErrLinkLocalDestinationAddress: 1184 stats.Forwarding.LinkLocalDestination.Increment() 1185 case *ip.ErrTTLExceeded: 1186 stats.Forwarding.ExhaustedTTL.Increment() 1187 case *ip.ErrHostUnreachable: 1188 stats.Forwarding.Unrouteable.Increment() 1189 case *ip.ErrParameterProblem: 1190 stats.MalformedPacketsReceived.Increment() 1191 case *ip.ErrMessageTooLong: 1192 stats.Forwarding.PacketTooBig.Increment() 1193 case *ip.ErrNoMulticastPendingQueueBufferSpace: 1194 stats.Forwarding.NoMulticastPendingQueueBufferSpace.Increment() 1195 case *ip.ErrUnexpectedMulticastInputInterface: 1196 stats.Forwarding.UnexpectedMulticastInputInterface.Increment() 1197 case *ip.ErrUnknownOutputEndpoint: 1198 stats.Forwarding.UnknownOutputEndpoint.Increment() 1199 case *ip.ErrOutgoingDeviceNoBufferSpace: 1200 stats.Forwarding.OutgoingDeviceNoBufferSpace.Increment() 1201 default: 1202 panic(fmt.Sprintf("unrecognized forwarding error: %s", err)) 1203 } 1204 stats.Forwarding.Errors.Increment() 1205 } 1206 1207 func (e *endpoint) deliverPacketLocally(h header.IPv4, pkt *stack.PacketBuffer, inNICName string) { 1208 stats := e.stats 1209 // iptables filtering. All packets that reach here are intended for 1210 // this machine and will not be forwarded. 1211 if ok := e.protocol.stack.IPTables().CheckInput(pkt, inNICName); !ok { 1212 // iptables is telling us to drop the packet. 1213 stats.ip.IPTablesInputDropped.Increment() 1214 return 1215 } 1216 1217 if h.More() || h.FragmentOffset() != 0 { 1218 if pkt.Data().Size()+len(pkt.TransportHeader().Slice()) == 0 { 1219 // Drop the packet as it's marked as a fragment but has 1220 // no payload. 1221 stats.ip.MalformedPacketsReceived.Increment() 1222 stats.ip.MalformedFragmentsReceived.Increment() 1223 return 1224 } 1225 if opts := h.Options(); len(opts) != 0 { 1226 // If there are options we need to check them before we do assembly 1227 // or we could be assembling errant packets. However we do not change the 1228 // options as that could lead to double processing later. 1229 if _, _, optProblem := e.processIPOptions(pkt, opts, &optionUsageVerify{}); optProblem != nil { 1230 if optProblem.NeedICMP { 1231 _ = e.protocol.returnError(&icmpReasonParamProblem{ 1232 pointer: optProblem.Pointer, 1233 }, pkt, true /* deliveredLocally */) 1234 e.stats.ip.MalformedPacketsReceived.Increment() 1235 } 1236 return 1237 } 1238 } 1239 // The packet is a fragment, let's try to reassemble it. 1240 start := h.FragmentOffset() 1241 // Drop the fragment if the size of the reassembled payload would exceed the 1242 // maximum payload size. 1243 // 1244 // Note that this addition doesn't overflow even on 32bit architecture 1245 // because pkt.Data().Size() should not exceed 65535 (the max IP datagram 1246 // size). Otherwise the packet would've been rejected as invalid before 1247 // reaching here. 1248 if int(start)+pkt.Data().Size() > header.IPv4MaximumPayloadSize { 1249 stats.ip.MalformedPacketsReceived.Increment() 1250 stats.ip.MalformedFragmentsReceived.Increment() 1251 return 1252 } 1253 1254 proto := h.Protocol() 1255 resPkt, transProtoNum, ready, err := e.protocol.fragmentation.Process( 1256 // As per RFC 791 section 2.3, the identification value is unique 1257 // for a source-destination pair and protocol. 1258 fragmentation.FragmentID{ 1259 Source: h.SourceAddress(), 1260 Destination: h.DestinationAddress(), 1261 ID: uint32(h.ID()), 1262 Protocol: proto, 1263 }, 1264 start, 1265 start+uint16(pkt.Data().Size())-1, 1266 h.More(), 1267 proto, 1268 pkt, 1269 ) 1270 if err != nil { 1271 stats.ip.MalformedPacketsReceived.Increment() 1272 stats.ip.MalformedFragmentsReceived.Increment() 1273 return 1274 } 1275 if !ready { 1276 return 1277 } 1278 defer resPkt.DecRef() 1279 pkt = resPkt 1280 h = header.IPv4(pkt.NetworkHeader().Slice()) 1281 1282 // The reassembler doesn't take care of fixing up the header, so we need 1283 // to do it here. 1284 h.SetTotalLength(uint16(pkt.Data().Size() + len(h))) 1285 h.SetFlagsFragmentOffset(0, 0) 1286 1287 e.protocol.parseTransport(pkt, tcpip.TransportProtocolNumber(transProtoNum)) 1288 1289 // Now that the packet is reassembled, it can be sent to raw sockets. 1290 e.dispatcher.DeliverRawPacket(h.TransportProtocol(), pkt) 1291 } 1292 stats.ip.PacketsDelivered.Increment() 1293 1294 p := h.TransportProtocol() 1295 if p == header.ICMPv4ProtocolNumber { 1296 // TODO(gvisor.dev/issues/3810): when we sort out ICMP and transport 1297 // headers, the setting of the transport number here should be 1298 // unnecessary and removed. 1299 pkt.TransportProtocolNumber = p 1300 e.handleICMP(pkt) 1301 return 1302 } 1303 // ICMP handles options itself but do it here for all remaining destinations. 1304 var hasRouterAlertOption bool 1305 if opts := h.Options(); len(opts) != 0 { 1306 newOpts, processedOpts, optProblem := e.processIPOptions(pkt, opts, &optionUsageReceive{}) 1307 if optProblem != nil { 1308 if optProblem.NeedICMP { 1309 _ = e.protocol.returnError(&icmpReasonParamProblem{ 1310 pointer: optProblem.Pointer, 1311 }, pkt, true /* deliveredLocally */) 1312 stats.ip.MalformedPacketsReceived.Increment() 1313 } 1314 return 1315 } 1316 hasRouterAlertOption = processedOpts.routerAlert 1317 copied := copy(opts, newOpts) 1318 if copied != len(newOpts) { 1319 panic(fmt.Sprintf("copied %d bytes of new options, expected %d bytes", copied, len(newOpts))) 1320 } 1321 for i := copied; i < len(opts); i++ { 1322 // Pad with 0 (EOL). RFC 791 page 23 says "The padding is zero". 1323 opts[i] = byte(header.IPv4OptionListEndType) 1324 } 1325 } 1326 if p == header.IGMPProtocolNumber { 1327 e.mu.Lock() 1328 e.igmp.handleIGMP(pkt, hasRouterAlertOption) // +checklocksforce: e == e.igmp.ep. 1329 e.mu.Unlock() 1330 return 1331 } 1332 1333 switch res := e.dispatcher.DeliverTransportPacket(p, pkt); res { 1334 case stack.TransportPacketHandled: 1335 case stack.TransportPacketDestinationPortUnreachable: 1336 // As per RFC: 1122 Section 3.2.2.1 A host SHOULD generate Destination 1337 // Unreachable messages with code: 1338 // 3 (Port Unreachable), when the designated transport protocol 1339 // (e.g., UDP) is unable to demultiplex the datagram but has no 1340 // protocol mechanism to inform the sender. 1341 _ = e.protocol.returnError(&icmpReasonPortUnreachable{}, pkt, true /* deliveredLocally */) 1342 case stack.TransportPacketProtocolUnreachable: 1343 // As per RFC: 1122 Section 3.2.2.1 1344 // A host SHOULD generate Destination Unreachable messages with code: 1345 // 2 (Protocol Unreachable), when the designated transport protocol 1346 // is not supported 1347 _ = e.protocol.returnError(&icmpReasonProtoUnreachable{}, pkt, true /* deliveredLocally */) 1348 default: 1349 panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res)) 1350 } 1351 } 1352 1353 // Close cleans up resources associated with the endpoint. 1354 func (e *endpoint) Close() { 1355 e.mu.Lock() 1356 e.disableLocked() 1357 e.addressableEndpointState.Cleanup() 1358 e.mu.Unlock() 1359 1360 e.protocol.forgetEndpoint(e.nic.ID()) 1361 } 1362 1363 // AddAndAcquirePermanentAddress implements stack.AddressableEndpoint. 1364 func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, properties stack.AddressProperties) (stack.AddressEndpoint, tcpip.Error) { 1365 e.mu.Lock() 1366 defer e.mu.Unlock() 1367 1368 ep, err := e.addressableEndpointState.AddAndAcquireAddress(addr, properties, stack.Permanent) 1369 if err == nil { 1370 e.sendQueuedReports() 1371 } 1372 return ep, err 1373 } 1374 1375 // sendQueuedReports sends queued igmp reports. 1376 // 1377 // +checklocks:e.mu 1378 // +checklocksalias:e.igmp.ep.mu=e.mu 1379 func (e *endpoint) sendQueuedReports() { 1380 e.igmp.sendQueuedReports() 1381 } 1382 1383 // RemovePermanentAddress implements stack.AddressableEndpoint. 1384 func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) tcpip.Error { 1385 e.mu.RLock() 1386 defer e.mu.RUnlock() 1387 return e.addressableEndpointState.RemovePermanentAddress(addr) 1388 } 1389 1390 // SetDeprecated implements stack.AddressableEndpoint. 1391 func (e *endpoint) SetDeprecated(addr tcpip.Address, deprecated bool) tcpip.Error { 1392 e.mu.RLock() 1393 defer e.mu.RUnlock() 1394 return e.addressableEndpointState.SetDeprecated(addr, deprecated) 1395 } 1396 1397 // SetLifetimes implements stack.AddressableEndpoint. 1398 func (e *endpoint) SetLifetimes(addr tcpip.Address, lifetimes stack.AddressLifetimes) tcpip.Error { 1399 e.mu.RLock() 1400 defer e.mu.RUnlock() 1401 return e.addressableEndpointState.SetLifetimes(addr, lifetimes) 1402 } 1403 1404 // MainAddress implements stack.AddressableEndpoint. 1405 func (e *endpoint) MainAddress() tcpip.AddressWithPrefix { 1406 e.mu.RLock() 1407 defer e.mu.RUnlock() 1408 return e.addressableEndpointState.MainAddress() 1409 } 1410 1411 // AcquireAssignedAddress implements stack.AddressableEndpoint. 1412 func (e *endpoint) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint { 1413 e.mu.RLock() 1414 defer e.mu.RUnlock() 1415 1416 loopback := e.nic.IsLoopback() 1417 return e.addressableEndpointState.AcquireAssignedAddressOrMatching(localAddr, func(addressEndpoint stack.AddressEndpoint) bool { 1418 subnet := addressEndpoint.Subnet() 1419 // IPv4 has a notion of a subnet broadcast address and considers the 1420 // loopback interface bound to an address's whole subnet (on linux). 1421 return subnet.IsBroadcast(localAddr) || (loopback && subnet.Contains(localAddr)) 1422 }, allowTemp, tempPEB) 1423 } 1424 1425 // AcquireOutgoingPrimaryAddress implements stack.AddressableEndpoint. 1426 func (e *endpoint) AcquireOutgoingPrimaryAddress(remoteAddr, srcHint tcpip.Address, allowExpired bool) stack.AddressEndpoint { 1427 e.mu.RLock() 1428 defer e.mu.RUnlock() 1429 return e.acquireOutgoingPrimaryAddressRLocked(remoteAddr, srcHint, allowExpired) 1430 } 1431 1432 // acquireOutgoingPrimaryAddressRLocked is like AcquireOutgoingPrimaryAddress 1433 // but with locking requirements 1434 // 1435 // +checklocksread:e.mu 1436 func (e *endpoint) acquireOutgoingPrimaryAddressRLocked(remoteAddr, srcHint tcpip.Address, allowExpired bool) stack.AddressEndpoint { 1437 return e.addressableEndpointState.AcquireOutgoingPrimaryAddress(remoteAddr, srcHint, allowExpired) 1438 } 1439 1440 // PrimaryAddresses implements stack.AddressableEndpoint. 1441 func (e *endpoint) PrimaryAddresses() []tcpip.AddressWithPrefix { 1442 e.mu.RLock() 1443 defer e.mu.RUnlock() 1444 return e.addressableEndpointState.PrimaryAddresses() 1445 } 1446 1447 // PermanentAddresses implements stack.AddressableEndpoint. 1448 func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix { 1449 e.mu.RLock() 1450 defer e.mu.RUnlock() 1451 return e.addressableEndpointState.PermanentAddresses() 1452 } 1453 1454 // JoinGroup implements stack.GroupAddressableEndpoint. 1455 func (e *endpoint) JoinGroup(addr tcpip.Address) tcpip.Error { 1456 e.mu.Lock() 1457 defer e.mu.Unlock() 1458 return e.joinGroupLocked(addr) 1459 } 1460 1461 // joinGroupLocked is like JoinGroup but with locking requirements. 1462 // 1463 // +checklocks:e.mu 1464 // +checklocksalias:e.igmp.ep.mu=e.mu 1465 func (e *endpoint) joinGroupLocked(addr tcpip.Address) tcpip.Error { 1466 if !header.IsV4MulticastAddress(addr) { 1467 return &tcpip.ErrBadAddress{} 1468 } 1469 1470 e.igmp.joinGroup(addr) 1471 return nil 1472 } 1473 1474 // LeaveGroup implements stack.GroupAddressableEndpoint. 1475 func (e *endpoint) LeaveGroup(addr tcpip.Address) tcpip.Error { 1476 e.mu.Lock() 1477 defer e.mu.Unlock() 1478 return e.leaveGroupLocked(addr) 1479 } 1480 1481 // leaveGroupLocked is like LeaveGroup but with locking requirements. 1482 // 1483 // +checklocks:e.mu 1484 // +checklocksalias:e.igmp.ep.mu=e.mu 1485 func (e *endpoint) leaveGroupLocked(addr tcpip.Address) tcpip.Error { 1486 return e.igmp.leaveGroup(addr) 1487 } 1488 1489 // IsInGroup implements stack.GroupAddressableEndpoint. 1490 func (e *endpoint) IsInGroup(addr tcpip.Address) bool { 1491 e.mu.RLock() 1492 defer e.mu.RUnlock() 1493 return e.igmp.isInGroup(addr) // +checklocksforce: e.mu==e.igmp.ep.mu. 1494 } 1495 1496 // Stats implements stack.NetworkEndpoint. 1497 func (e *endpoint) Stats() stack.NetworkEndpointStats { 1498 return &e.stats.localStats 1499 } 1500 1501 var _ stack.NetworkProtocol = (*protocol)(nil) 1502 var _ stack.MulticastForwardingNetworkProtocol = (*protocol)(nil) 1503 var _ stack.RejectIPv4WithHandler = (*protocol)(nil) 1504 var _ fragmentation.TimeoutHandler = (*protocol)(nil) 1505 1506 type protocol struct { 1507 stack *stack.Stack 1508 1509 // mu protects annotated fields below. 1510 mu sync.RWMutex 1511 1512 // eps is keyed by NICID to allow protocol methods to retrieve an endpoint 1513 // when handling a packet, by looking at which NIC handled the packet. 1514 // +checklocks:mu 1515 eps map[tcpip.NICID]*endpoint 1516 1517 // ICMP types for which the stack's global rate limiting must apply. 1518 // +checklocks:mu 1519 icmpRateLimitedTypes map[header.ICMPv4Type]struct{} 1520 1521 // defaultTTL is the current default TTL for the protocol. Only the 1522 // uint8 portion of it is meaningful. 1523 defaultTTL atomicbitops.Uint32 1524 1525 ids []atomicbitops.Uint32 1526 hashIV uint32 1527 // idTS is the unix timestamp in milliseconds 'ids' was last accessed. 1528 idTS atomicbitops.Int64 1529 1530 fragmentation *fragmentation.Fragmentation 1531 1532 options Options 1533 1534 multicastRouteTable multicast.RouteTable 1535 // multicastForwardingDisp is the multicast forwarding event dispatcher that 1536 // an integrator can provide to receive multicast forwarding events. Note 1537 // that multicast packets will only be forwarded if this is non-nil. 1538 // +checklocks:mu 1539 multicastForwardingDisp stack.MulticastForwardingEventDispatcher 1540 } 1541 1542 // Number returns the ipv4 protocol number. 1543 func (p *protocol) Number() tcpip.NetworkProtocolNumber { 1544 return ProtocolNumber 1545 } 1546 1547 // MinimumPacketSize returns the minimum valid ipv4 packet size. 1548 func (p *protocol) MinimumPacketSize() int { 1549 return header.IPv4MinimumSize 1550 } 1551 1552 // ParseAddresses implements stack.NetworkProtocol. 1553 func (*protocol) ParseAddresses(v []byte) (src, dst tcpip.Address) { 1554 h := header.IPv4(v) 1555 return h.SourceAddress(), h.DestinationAddress() 1556 } 1557 1558 // SetOption implements stack.NetworkProtocol. 1559 func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error { 1560 switch v := option.(type) { 1561 case *tcpip.DefaultTTLOption: 1562 p.SetDefaultTTL(uint8(*v)) 1563 return nil 1564 default: 1565 return &tcpip.ErrUnknownProtocolOption{} 1566 } 1567 } 1568 1569 // Option implements stack.NetworkProtocol. 1570 func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error { 1571 switch v := option.(type) { 1572 case *tcpip.DefaultTTLOption: 1573 *v = tcpip.DefaultTTLOption(p.DefaultTTL()) 1574 return nil 1575 default: 1576 return &tcpip.ErrUnknownProtocolOption{} 1577 } 1578 } 1579 1580 // SetDefaultTTL sets the default TTL for endpoints created with this protocol. 1581 func (p *protocol) SetDefaultTTL(ttl uint8) { 1582 p.defaultTTL.Store(uint32(ttl)) 1583 } 1584 1585 // DefaultTTL returns the default TTL for endpoints created with this protocol. 1586 func (p *protocol) DefaultTTL() uint8 { 1587 return uint8(p.defaultTTL.Load()) 1588 } 1589 1590 // Close implements stack.TransportProtocol. 1591 func (p *protocol) Close() { 1592 p.fragmentation.Release() 1593 p.multicastRouteTable.Close() 1594 } 1595 1596 // Wait implements stack.TransportProtocol. 1597 func (*protocol) Wait() {} 1598 1599 func (p *protocol) validateUnicastSourceAndMulticastDestination(addresses stack.UnicastSourceAndMulticastDestination) tcpip.Error { 1600 if !p.isUnicastAddress(addresses.Source) || header.IsV4LinkLocalUnicastAddress(addresses.Source) { 1601 return &tcpip.ErrBadAddress{} 1602 } 1603 1604 if !header.IsV4MulticastAddress(addresses.Destination) || header.IsV4LinkLocalMulticastAddress(addresses.Destination) { 1605 return &tcpip.ErrBadAddress{} 1606 } 1607 1608 return nil 1609 } 1610 1611 func (p *protocol) multicastForwarding() bool { 1612 p.mu.RLock() 1613 defer p.mu.RUnlock() 1614 return p.multicastForwardingDisp != nil 1615 } 1616 1617 func (p *protocol) newInstalledRoute(route stack.MulticastRoute) (*multicast.InstalledRoute, tcpip.Error) { 1618 if len(route.OutgoingInterfaces) == 0 { 1619 return nil, &tcpip.ErrMissingRequiredFields{} 1620 } 1621 1622 if !p.stack.HasNIC(route.ExpectedInputInterface) { 1623 return nil, &tcpip.ErrUnknownNICID{} 1624 } 1625 1626 for _, outgoingInterface := range route.OutgoingInterfaces { 1627 if route.ExpectedInputInterface == outgoingInterface.ID { 1628 return nil, &tcpip.ErrMulticastInputCannotBeOutput{} 1629 } 1630 1631 if !p.stack.HasNIC(outgoingInterface.ID) { 1632 return nil, &tcpip.ErrUnknownNICID{} 1633 } 1634 } 1635 return p.multicastRouteTable.NewInstalledRoute(route), nil 1636 } 1637 1638 // AddMulticastRoute implements stack.MulticastForwardingNetworkProtocol. 1639 func (p *protocol) AddMulticastRoute(addresses stack.UnicastSourceAndMulticastDestination, route stack.MulticastRoute) tcpip.Error { 1640 if !p.multicastForwarding() { 1641 return &tcpip.ErrNotPermitted{} 1642 } 1643 1644 if err := p.validateUnicastSourceAndMulticastDestination(addresses); err != nil { 1645 return err 1646 } 1647 1648 installedRoute, err := p.newInstalledRoute(route) 1649 if err != nil { 1650 return err 1651 } 1652 1653 pendingPackets := p.multicastRouteTable.AddInstalledRoute(addresses, installedRoute) 1654 1655 for _, pkt := range pendingPackets { 1656 p.forwardPendingMulticastPacket(pkt, installedRoute) 1657 } 1658 return nil 1659 } 1660 1661 // RemoveMulticastRoute implements 1662 // stack.MulticastForwardingNetworkProtocol.RemoveMulticastRoute. 1663 func (p *protocol) RemoveMulticastRoute(addresses stack.UnicastSourceAndMulticastDestination) tcpip.Error { 1664 if err := p.validateUnicastSourceAndMulticastDestination(addresses); err != nil { 1665 return err 1666 } 1667 1668 if removed := p.multicastRouteTable.RemoveInstalledRoute(addresses); !removed { 1669 return &tcpip.ErrHostUnreachable{} 1670 } 1671 1672 return nil 1673 } 1674 1675 // EnableMulticastForwarding implements 1676 // stack.MulticastForwardingNetworkProtocol.EnableMulticastForwarding. 1677 func (p *protocol) EnableMulticastForwarding(disp stack.MulticastForwardingEventDispatcher) (bool, tcpip.Error) { 1678 p.mu.Lock() 1679 defer p.mu.Unlock() 1680 1681 if p.multicastForwardingDisp != nil { 1682 return true, nil 1683 } 1684 1685 if disp == nil { 1686 return false, &tcpip.ErrInvalidOptionValue{} 1687 } 1688 1689 p.multicastForwardingDisp = disp 1690 return false, nil 1691 } 1692 1693 // DisableMulticastForwarding implements 1694 // stack.MulticastForwardingNetworkProtocol.DisableMulticastForwarding. 1695 func (p *protocol) DisableMulticastForwarding() { 1696 p.mu.Lock() 1697 defer p.mu.Unlock() 1698 1699 p.multicastForwardingDisp = nil 1700 p.multicastRouteTable.RemoveAllInstalledRoutes() 1701 } 1702 1703 // MulticastRouteLastUsedTime implements 1704 // stack.MulticastForwardingNetworkProtocol. 1705 func (p *protocol) MulticastRouteLastUsedTime(addresses stack.UnicastSourceAndMulticastDestination) (tcpip.MonotonicTime, tcpip.Error) { 1706 if err := p.validateUnicastSourceAndMulticastDestination(addresses); err != nil { 1707 return tcpip.MonotonicTime{}, err 1708 } 1709 1710 timestamp, found := p.multicastRouteTable.GetLastUsedTimestamp(addresses) 1711 1712 if !found { 1713 return tcpip.MonotonicTime{}, &tcpip.ErrHostUnreachable{} 1714 } 1715 1716 return timestamp, nil 1717 } 1718 1719 func (p *protocol) forwardPendingMulticastPacket(pkt *stack.PacketBuffer, installedRoute *multicast.InstalledRoute) { 1720 defer pkt.DecRef() 1721 1722 // Attempt to forward the packet using the endpoint that it originally 1723 // arrived on. This ensures that the packet is only forwarded if it 1724 // matches the route's expected input interface (see 5a of RFC 1812 section 1725 // 5.2.1.3). 1726 ep, ok := p.getEndpointForNIC(pkt.NICID) 1727 1728 if !ok { 1729 // The endpoint that the packet arrived on no longer exists. Silently 1730 // drop the pkt. 1731 return 1732 } 1733 1734 if !ep.MulticastForwarding() { 1735 return 1736 } 1737 1738 ep.handleForwardingError(ep.forwardValidatedMulticastPacket(pkt, installedRoute)) 1739 } 1740 1741 func (p *protocol) isUnicastAddress(addr tcpip.Address) bool { 1742 if addr.BitLen() != header.IPv4AddressSizeBits { 1743 return false 1744 } 1745 1746 if addr == header.IPv4Any || addr == header.IPv4Broadcast { 1747 return false 1748 } 1749 1750 if p.isSubnetLocalBroadcastAddress(addr) { 1751 return false 1752 } 1753 return !header.IsV4MulticastAddress(addr) 1754 } 1755 1756 func (p *protocol) isSubnetLocalBroadcastAddress(addr tcpip.Address) bool { 1757 p.mu.RLock() 1758 defer p.mu.RUnlock() 1759 1760 for _, e := range p.eps { 1761 if addressEndpoint := e.AcquireAssignedAddress(addr, false /* createTemp */, stack.NeverPrimaryEndpoint); addressEndpoint != nil { 1762 subnet := addressEndpoint.Subnet() 1763 addressEndpoint.DecRef() 1764 if subnet.IsBroadcast(addr) { 1765 return true 1766 } 1767 } 1768 } 1769 return false 1770 } 1771 1772 // parseAndValidate parses the packet (including its transport layer header) and 1773 // returns the parsed IP header. 1774 // 1775 // Returns true if the IP header was successfully parsed. 1776 func (p *protocol) parseAndValidate(pkt *stack.PacketBuffer) (*buffer.View, bool) { 1777 transProtoNum, hasTransportHdr, ok := p.Parse(pkt) 1778 if !ok { 1779 return nil, false 1780 } 1781 1782 h := header.IPv4(pkt.NetworkHeader().Slice()) 1783 // Do not include the link header's size when calculating the size of the IP 1784 // packet. 1785 if !h.IsValid(pkt.Size() - len(pkt.LinkHeader().Slice())) { 1786 return nil, false 1787 } 1788 1789 if !pkt.RXChecksumValidated && !h.IsChecksumValid() { 1790 return nil, false 1791 } 1792 1793 if hasTransportHdr { 1794 p.parseTransport(pkt, transProtoNum) 1795 } 1796 1797 return pkt.NetworkHeader().View(), true 1798 } 1799 1800 func (p *protocol) parseTransport(pkt *stack.PacketBuffer, transProtoNum tcpip.TransportProtocolNumber) { 1801 if transProtoNum == header.ICMPv4ProtocolNumber { 1802 // The transport layer will handle transport layer parsing errors. 1803 _ = parse.ICMPv4(pkt) 1804 return 1805 } 1806 1807 switch err := p.stack.ParsePacketBufferTransport(transProtoNum, pkt); err { 1808 case stack.ParsedOK: 1809 case stack.UnknownTransportProtocol, stack.TransportLayerParseError: 1810 // The transport layer will handle unknown protocols and transport layer 1811 // parsing errors. 1812 default: 1813 panic(fmt.Sprintf("unexpected error parsing transport header = %d", err)) 1814 } 1815 } 1816 1817 // Parse implements stack.NetworkProtocol. 1818 func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) { 1819 if ok := parse.IPv4(pkt); !ok { 1820 return 0, false, false 1821 } 1822 1823 ipHdr := header.IPv4(pkt.NetworkHeader().Slice()) 1824 return ipHdr.TransportProtocol(), !ipHdr.More() && ipHdr.FragmentOffset() == 0, true 1825 } 1826 1827 // allowICMPReply reports whether an ICMP reply with provided type and code may 1828 // be sent following the rate mask options and global ICMP rate limiter. 1829 func (p *protocol) allowICMPReply(icmpType header.ICMPv4Type, code header.ICMPv4Code) bool { 1830 // Mimic linux and never rate limit for PMTU discovery. 1831 // https://github.com/torvalds/linux/blob/9e9fb7655ed585da8f468e29221f0ba194a5f613/net/ipv4/icmp.c#L288 1832 if icmpType == header.ICMPv4DstUnreachable && code == header.ICMPv4FragmentationNeeded { 1833 return true 1834 } 1835 p.mu.RLock() 1836 defer p.mu.RUnlock() 1837 1838 if _, ok := p.icmpRateLimitedTypes[icmpType]; ok { 1839 return p.stack.AllowICMPMessage() 1840 } 1841 return true 1842 } 1843 1844 // SendRejectionError implements stack.RejectIPv4WithHandler. 1845 func (p *protocol) SendRejectionError(pkt *stack.PacketBuffer, rejectWith stack.RejectIPv4WithICMPType, inputHook bool) tcpip.Error { 1846 switch rejectWith { 1847 case stack.RejectIPv4WithICMPNetUnreachable: 1848 return p.returnError(&icmpReasonNetworkUnreachable{}, pkt, inputHook) 1849 case stack.RejectIPv4WithICMPHostUnreachable: 1850 return p.returnError(&icmpReasonHostUnreachable{}, pkt, inputHook) 1851 case stack.RejectIPv4WithICMPPortUnreachable: 1852 return p.returnError(&icmpReasonPortUnreachable{}, pkt, inputHook) 1853 case stack.RejectIPv4WithICMPNetProhibited: 1854 return p.returnError(&icmpReasonNetworkProhibited{}, pkt, inputHook) 1855 case stack.RejectIPv4WithICMPHostProhibited: 1856 return p.returnError(&icmpReasonHostProhibited{}, pkt, inputHook) 1857 case stack.RejectIPv4WithICMPAdminProhibited: 1858 return p.returnError(&icmpReasonAdministrativelyProhibited{}, pkt, inputHook) 1859 default: 1860 panic(fmt.Sprintf("unhandled %[1]T = %[1]d", rejectWith)) 1861 } 1862 } 1863 1864 // calculateNetworkMTU calculates the network-layer payload MTU based on the 1865 // link-layer payload mtu. 1866 func calculateNetworkMTU(linkMTU, networkHeaderSize uint32) (uint32, tcpip.Error) { 1867 if linkMTU < header.IPv4MinimumMTU { 1868 return 0, &tcpip.ErrInvalidEndpointState{} 1869 } 1870 1871 // As per RFC 791 section 3.1, an IPv4 header cannot exceed 60 bytes in 1872 // length: 1873 // The maximal internet header is 60 octets, and a typical internet header 1874 // is 20 octets, allowing a margin for headers of higher level protocols. 1875 if networkHeaderSize > header.IPv4MaximumHeaderSize { 1876 return 0, &tcpip.ErrMalformedHeader{} 1877 } 1878 1879 networkMTU := linkMTU 1880 if networkMTU > MaxTotalSize { 1881 networkMTU = MaxTotalSize 1882 } 1883 1884 return networkMTU - networkHeaderSize, nil 1885 } 1886 1887 func packetMustBeFragmented(pkt *stack.PacketBuffer, networkMTU uint32) bool { 1888 payload := len(pkt.TransportHeader().Slice()) + pkt.Data().Size() 1889 return pkt.GSOOptions.Type == stack.GSONone && uint32(payload) > networkMTU 1890 } 1891 1892 // addressToUint32 translates an IPv4 address into its little endian uint32 1893 // representation. 1894 // 1895 // This function does the same thing as binary.LittleEndian.Uint32 but operates 1896 // on a tcpip.Address (a string) without the need to convert it to a byte slice, 1897 // which would cause an allocation. 1898 func addressToUint32(addr tcpip.Address) uint32 { 1899 addrBytes := addr.As4() 1900 _ = addrBytes[3] // bounds check hint to compiler 1901 return uint32(addrBytes[0]) | uint32(addrBytes[1])<<8 | uint32(addrBytes[2])<<16 | uint32(addrBytes[3])<<24 1902 } 1903 1904 // hashRoute calculates a hash value for the given source/destination pair using 1905 // the addresses, transport protocol number and a 32-bit number to generate the 1906 // hash. 1907 func hashRoute(srcAddr, dstAddr tcpip.Address, protocol tcpip.TransportProtocolNumber, hashIV uint32) uint32 { 1908 a := addressToUint32(srcAddr) 1909 b := addressToUint32(dstAddr) 1910 return hash.Hash3Words(a, b, uint32(protocol), hashIV) 1911 } 1912 1913 // Options holds options to configure a new protocol. 1914 type Options struct { 1915 // IGMP holds options for IGMP. 1916 IGMP IGMPOptions 1917 1918 // AllowExternalLoopbackTraffic indicates that inbound loopback packets (i.e. 1919 // martian loopback packets) should be accepted. 1920 AllowExternalLoopbackTraffic bool 1921 } 1922 1923 // NewProtocolWithOptions returns an IPv4 network protocol. 1924 func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory { 1925 ids := make([]atomicbitops.Uint32, buckets) 1926 1927 // Randomly initialize hashIV and the ids. 1928 r := hash.RandN32(1 + buckets) 1929 for i := range ids { 1930 ids[i] = atomicbitops.FromUint32(r[i]) 1931 } 1932 hashIV := r[buckets] 1933 1934 return func(s *stack.Stack) stack.NetworkProtocol { 1935 p := &protocol{ 1936 stack: s, 1937 ids: ids, 1938 hashIV: hashIV, 1939 defaultTTL: atomicbitops.FromUint32(DefaultTTL), 1940 options: opts, 1941 } 1942 p.fragmentation = fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock(), p) 1943 p.eps = make(map[tcpip.NICID]*endpoint) 1944 // Set ICMP rate limiting to Linux defaults. 1945 // See https://man7.org/linux/man-pages/man7/icmp.7.html. 1946 p.icmpRateLimitedTypes = map[header.ICMPv4Type]struct{}{ 1947 header.ICMPv4DstUnreachable: {}, 1948 header.ICMPv4SrcQuench: {}, 1949 header.ICMPv4TimeExceeded: {}, 1950 header.ICMPv4ParamProblem: {}, 1951 } 1952 if err := p.multicastRouteTable.Init(multicast.DefaultConfig(s.Clock())); err != nil { 1953 panic(fmt.Sprintf("p.multicastRouteTable.Init(_): %s", err)) 1954 } 1955 return p 1956 } 1957 } 1958 1959 // NewProtocol is equivalent to NewProtocolWithOptions with an empty Options. 1960 func NewProtocol(s *stack.Stack) stack.NetworkProtocol { 1961 return NewProtocolWithOptions(Options{})(s) 1962 } 1963 1964 func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeader header.IPv4) (*stack.PacketBuffer, bool) { 1965 fragPkt, offset, copied, more := pf.BuildNextFragment() 1966 fragPkt.NetworkProtocolNumber = ProtocolNumber 1967 1968 originalIPHeaderLength := len(originalIPHeader) 1969 nextFragIPHeader := header.IPv4(fragPkt.NetworkHeader().Push(originalIPHeaderLength)) 1970 fragPkt.NetworkProtocolNumber = ProtocolNumber 1971 1972 if copied := copy(nextFragIPHeader, originalIPHeader); copied != len(originalIPHeader) { 1973 panic(fmt.Sprintf("wrong number of bytes copied into fragmentIPHeaders: got = %d, want = %d", copied, originalIPHeaderLength)) 1974 } 1975 1976 flags := originalIPHeader.Flags() 1977 if more { 1978 flags |= header.IPv4FlagMoreFragments 1979 } 1980 nextFragIPHeader.SetFlagsFragmentOffset(flags, uint16(offset)) 1981 nextFragIPHeader.SetTotalLength(uint16(nextFragIPHeader.HeaderLength()) + uint16(copied)) 1982 nextFragIPHeader.SetChecksum(0) 1983 nextFragIPHeader.SetChecksum(^nextFragIPHeader.CalculateChecksum()) 1984 1985 return fragPkt, more 1986 } 1987 1988 // optionAction describes possible actions that may be taken on an option 1989 // while processing it. 1990 type optionAction uint8 1991 1992 const ( 1993 // optionRemove says that the option should not be in the output option set. 1994 optionRemove optionAction = iota 1995 1996 // optionProcess says that the option should be fully processed. 1997 optionProcess 1998 1999 // optionVerify says the option should be checked and passed unchanged. 2000 optionVerify 2001 2002 // optionPass says to pass the output set without checking. 2003 optionPass 2004 ) 2005 2006 // optionActions list what to do for each option in a given scenario. 2007 type optionActions struct { 2008 // timestamp controls what to do with a Timestamp option. 2009 timestamp optionAction 2010 2011 // recordRoute controls what to do with a Record Route option. 2012 recordRoute optionAction 2013 2014 // routerAlert controls what to do with a Router Alert option. 2015 routerAlert optionAction 2016 2017 // unknown controls what to do with an unknown option. 2018 unknown optionAction 2019 } 2020 2021 // optionsUsage specifies the ways options may be operated upon for a given 2022 // scenario during packet processing. 2023 type optionsUsage interface { 2024 actions() optionActions 2025 } 2026 2027 // optionUsageVerify implements optionsUsage for when we just want to check 2028 // fragments. Don't change anything, just check and reject if bad. No 2029 // replacement options are generated. 2030 type optionUsageVerify struct{} 2031 2032 // actions implements optionsUsage. 2033 func (*optionUsageVerify) actions() optionActions { 2034 return optionActions{ 2035 timestamp: optionVerify, 2036 recordRoute: optionVerify, 2037 routerAlert: optionVerify, 2038 unknown: optionRemove, 2039 } 2040 } 2041 2042 // optionUsageReceive implements optionsUsage for packets we will pass 2043 // to the transport layer (with the exception of Echo requests). 2044 type optionUsageReceive struct{} 2045 2046 // actions implements optionsUsage. 2047 func (*optionUsageReceive) actions() optionActions { 2048 return optionActions{ 2049 timestamp: optionProcess, 2050 recordRoute: optionProcess, 2051 routerAlert: optionVerify, 2052 unknown: optionPass, 2053 } 2054 } 2055 2056 // optionUsageForward implements optionsUsage for packets about to be forwarded. 2057 // All options are passed on regardless of whether we recognise them, however 2058 // we do process the Timestamp and Record Route options. 2059 type optionUsageForward struct{} 2060 2061 // actions implements optionsUsage. 2062 func (*optionUsageForward) actions() optionActions { 2063 return optionActions{ 2064 timestamp: optionProcess, 2065 recordRoute: optionProcess, 2066 routerAlert: optionVerify, 2067 unknown: optionPass, 2068 } 2069 } 2070 2071 // optionUsageEcho implements optionsUsage for echo packet processing. 2072 // Only Timestamp and RecordRoute are processed and sent back. 2073 type optionUsageEcho struct{} 2074 2075 // actions implements optionsUsage. 2076 func (*optionUsageEcho) actions() optionActions { 2077 return optionActions{ 2078 timestamp: optionProcess, 2079 recordRoute: optionProcess, 2080 routerAlert: optionVerify, 2081 unknown: optionRemove, 2082 } 2083 } 2084 2085 // handleTimestamp does any required processing on a Timestamp option 2086 // in place. 2087 func handleTimestamp(tsOpt header.IPv4OptionTimestamp, localAddress tcpip.Address, clock tcpip.Clock, usage optionsUsage) *header.IPv4OptParameterProblem { 2088 flags := tsOpt.Flags() 2089 var entrySize uint8 2090 switch flags { 2091 case header.IPv4OptionTimestampOnlyFlag: 2092 entrySize = header.IPv4OptionTimestampSize 2093 case 2094 header.IPv4OptionTimestampWithIPFlag, 2095 header.IPv4OptionTimestampWithPredefinedIPFlag: 2096 entrySize = header.IPv4OptionTimestampWithAddrSize 2097 default: 2098 return &header.IPv4OptParameterProblem{ 2099 Pointer: header.IPv4OptTSOFLWAndFLGOffset, 2100 NeedICMP: true, 2101 } 2102 } 2103 2104 pointer := tsOpt.Pointer() 2105 // RFC 791 page 22 states: "The smallest legal value is 5." 2106 // Since the pointer is 1 based, and the header is 4 bytes long the 2107 // pointer must point beyond the header therefore 4 or less is bad. 2108 if pointer <= header.IPv4OptionTimestampHdrLength { 2109 return &header.IPv4OptParameterProblem{ 2110 Pointer: header.IPv4OptTSPointerOffset, 2111 NeedICMP: true, 2112 } 2113 } 2114 // To simplify processing below, base further work on the array of timestamps 2115 // beyond the header, rather than on the whole option. Also to aid 2116 // calculations set 'nextSlot' to be 0 based as in the packet it is 1 based. 2117 nextSlot := pointer - (header.IPv4OptionTimestampHdrLength + 1) 2118 optLen := tsOpt.Size() 2119 dataLength := optLen - header.IPv4OptionTimestampHdrLength 2120 2121 // In the section below, we verify the pointer, length and overflow counter 2122 // fields of the option. The distinction is in which byte you return as being 2123 // in error in the ICMP packet. Offsets 1 (length), 2 pointer) 2124 // or 3 (overflowed counter). 2125 // 2126 // The following RFC sections cover this section: 2127 // 2128 // RFC 791 (page 22): 2129 // If there is some room but not enough room for a full timestamp 2130 // to be inserted, or the overflow count itself overflows, the 2131 // original datagram is considered to be in error and is discarded. 2132 // In either case an ICMP parameter problem message may be sent to 2133 // the source host [3]. 2134 // 2135 // You can get this situation in two ways. Firstly if the data area is not 2136 // a multiple of the entry size or secondly, if the pointer is not at a 2137 // multiple of the entry size. The wording of the RFC suggests that 2138 // this is not an error until you actually run out of space. 2139 if pointer > optLen { 2140 // RFC 791 (page 22) says we should switch to using the overflow count. 2141 // If the timestamp data area is already full (the pointer exceeds 2142 // the length) the datagram is forwarded without inserting the 2143 // timestamp, but the overflow count is incremented by one. 2144 if flags == header.IPv4OptionTimestampWithPredefinedIPFlag { 2145 // By definition we have nothing to do. 2146 return nil 2147 } 2148 2149 if tsOpt.IncOverflow() != 0 { 2150 return nil 2151 } 2152 // The overflow count is also full. 2153 return &header.IPv4OptParameterProblem{ 2154 Pointer: header.IPv4OptTSOFLWAndFLGOffset, 2155 NeedICMP: true, 2156 } 2157 } 2158 if nextSlot+entrySize > dataLength { 2159 // The data area isn't full but there isn't room for a new entry. 2160 // Either Length or Pointer could be bad. 2161 if false { 2162 // We must select Pointer for Linux compatibility, even if 2163 // only the length is bad. 2164 // The Linux code is at (in October 2020) 2165 // https://github.com/torvalds/linux/blob/bbf5c979011a099af5dc76498918ed7df445635b/net/ipv4/ip_options.c#L367-L370 2166 // if (optptr[2]+3 > optlen) { 2167 // pp_ptr = optptr + 2; 2168 // goto error; 2169 // } 2170 // which doesn't distinguish between which of optptr[2] or optlen 2171 // is wrong, but just arbitrarily decides on optptr+2. 2172 if dataLength%entrySize != 0 { 2173 // The Data section size should be a multiple of the expected 2174 // timestamp entry size. 2175 return &header.IPv4OptParameterProblem{ 2176 Pointer: header.IPv4OptionLengthOffset, 2177 NeedICMP: false, 2178 } 2179 } 2180 // If the size is OK, the pointer must be corrupted. 2181 } 2182 return &header.IPv4OptParameterProblem{ 2183 Pointer: header.IPv4OptTSPointerOffset, 2184 NeedICMP: true, 2185 } 2186 } 2187 2188 if usage.actions().timestamp == optionProcess { 2189 tsOpt.UpdateTimestamp(localAddress, clock) 2190 } 2191 return nil 2192 } 2193 2194 // handleRecordRoute checks and processes a Record route option. It is much 2195 // like the timestamp type 1 option, but without timestamps. The passed in 2196 // address is stored in the option in the correct spot if possible. 2197 func handleRecordRoute(rrOpt header.IPv4OptionRecordRoute, localAddress tcpip.Address, usage optionsUsage) *header.IPv4OptParameterProblem { 2198 optlen := rrOpt.Size() 2199 2200 if optlen < header.IPv4AddressSize+header.IPv4OptionRecordRouteHdrLength { 2201 return &header.IPv4OptParameterProblem{ 2202 Pointer: header.IPv4OptionLengthOffset, 2203 NeedICMP: true, 2204 } 2205 } 2206 2207 pointer := rrOpt.Pointer() 2208 // RFC 791 page 20 states: 2209 // The pointer is relative to this option, and the 2210 // smallest legal value for the pointer is 4. 2211 // Since the pointer is 1 based, and the header is 3 bytes long the 2212 // pointer must point beyond the header therefore 3 or less is bad. 2213 if pointer <= header.IPv4OptionRecordRouteHdrLength { 2214 return &header.IPv4OptParameterProblem{ 2215 Pointer: header.IPv4OptRRPointerOffset, 2216 NeedICMP: true, 2217 } 2218 } 2219 2220 // RFC 791 page 21 says 2221 // If the route data area is already full (the pointer exceeds the 2222 // length) the datagram is forwarded without inserting the address 2223 // into the recorded route. If there is some room but not enough 2224 // room for a full address to be inserted, the original datagram is 2225 // considered to be in error and is discarded. In either case an 2226 // ICMP parameter problem message may be sent to the source 2227 // host. 2228 // The use of the words "In either case" suggests that a 'full' RR option 2229 // could generate an ICMP at every hop after it fills up. We chose to not 2230 // do this (as do most implementations). It is probable that the inclusion 2231 // of these words is a copy/paste error from the timestamp option where 2232 // there are two failure reasons given. 2233 if pointer > optlen { 2234 return nil 2235 } 2236 2237 // The data area isn't full but there isn't room for a new entry. 2238 // Either Length or Pointer could be bad. We must select Pointer for Linux 2239 // compatibility, even if only the length is bad. NB. pointer is 1 based. 2240 if pointer+header.IPv4AddressSize > optlen+1 { 2241 if false { 2242 // This is what we would do if we were not being Linux compatible. 2243 // Check for bad pointer or length value. Must be a multiple of 4 after 2244 // accounting for the 3 byte header and not within that header. 2245 // RFC 791, page 20 says: 2246 // The pointer is relative to this option, and the 2247 // smallest legal value for the pointer is 4. 2248 // 2249 // A recorded route is composed of a series of internet addresses. 2250 // Each internet address is 32 bits or 4 octets. 2251 // Linux skips this test so we must too. See Linux code at: 2252 // https://github.com/torvalds/linux/blob/bbf5c979011a099af5dc76498918ed7df445635b/net/ipv4/ip_options.c#L338-L341 2253 // if (optptr[2]+3 > optlen) { 2254 // pp_ptr = optptr + 2; 2255 // goto error; 2256 // } 2257 if (optlen-header.IPv4OptionRecordRouteHdrLength)%header.IPv4AddressSize != 0 { 2258 // Length is bad, not on integral number of slots. 2259 return &header.IPv4OptParameterProblem{ 2260 Pointer: header.IPv4OptionLengthOffset, 2261 NeedICMP: true, 2262 } 2263 } 2264 // If not length, the fault must be with the pointer. 2265 } 2266 return &header.IPv4OptParameterProblem{ 2267 Pointer: header.IPv4OptRRPointerOffset, 2268 NeedICMP: true, 2269 } 2270 } 2271 if usage.actions().recordRoute == optionVerify { 2272 return nil 2273 } 2274 rrOpt.StoreAddress(localAddress) 2275 return nil 2276 } 2277 2278 // handleRouterAlert performs sanity checks on a Router Alert option. 2279 func handleRouterAlert(raOpt header.IPv4OptionRouterAlert) *header.IPv4OptParameterProblem { 2280 // Only the zero value is acceptable, as per RFC 2113, section 2.1: 2281 // Value: A two octet code with the following values: 2282 // 0 - Router shall examine packet 2283 // 1-65535 - Reserved 2284 if raOpt.Value() != header.IPv4OptionRouterAlertValue { 2285 return &header.IPv4OptParameterProblem{ 2286 Pointer: header.IPv4OptionRouterAlertValueOffset, 2287 NeedICMP: true, 2288 } 2289 } 2290 return nil 2291 } 2292 2293 type optionTracker struct { 2294 timestamp bool 2295 recordRoute bool 2296 routerAlert bool 2297 } 2298 2299 // processIPOptions parses the IPv4 options and produces a new set of options 2300 // suitable for use in the next step of packet processing as informed by usage. 2301 // The original will not be touched. 2302 // 2303 // If there were no errors during parsing, the new set of options is returned as 2304 // a new buffer. 2305 func (e *endpoint) processIPOptions(pkt *stack.PacketBuffer, opts header.IPv4Options, usage optionsUsage) (header.IPv4Options, optionTracker, *header.IPv4OptParameterProblem) { 2306 stats := e.stats.ip 2307 optIter := opts.MakeIterator() 2308 2309 // Except NOP, each option must only appear at most once (RFC 791 section 3.1, 2310 // at the definition of every type). 2311 // Keep track of each option we find to enable duplicate option detection. 2312 var seenOptions [math.MaxUint8 + 1]bool 2313 2314 // TODO(https://gvisor.dev/issue/4586): This will need tweaking when we start 2315 // really forwarding packets as we may need to get two addresses, for rx and 2316 // tx interfaces. We will also have to take usage into account. 2317 localAddress := e.MainAddress().Address 2318 if localAddress.BitLen() == 0 { 2319 h := header.IPv4(pkt.NetworkHeader().Slice()) 2320 dstAddr := h.DestinationAddress() 2321 if pkt.NetworkPacketInfo.LocalAddressBroadcast || header.IsV4MulticastAddress(dstAddr) { 2322 return nil, optionTracker{}, &header.IPv4OptParameterProblem{ 2323 NeedICMP: false, 2324 } 2325 } 2326 localAddress = dstAddr 2327 } 2328 2329 var optionsProcessed optionTracker 2330 for { 2331 option, done, optProblem := optIter.Next() 2332 if done || optProblem != nil { 2333 return optIter.Finalize(), optionsProcessed, optProblem 2334 } 2335 optType := option.Type() 2336 if optType == header.IPv4OptionNOPType { 2337 optIter.PushNOPOrEnd(optType) 2338 continue 2339 } 2340 if optType == header.IPv4OptionListEndType { 2341 optIter.PushNOPOrEnd(optType) 2342 return optIter.Finalize(), optionsProcessed, nil 2343 } 2344 2345 // check for repeating options (multiple NOPs are OK) 2346 if seenOptions[optType] { 2347 return nil, optionTracker{}, &header.IPv4OptParameterProblem{ 2348 Pointer: optIter.ErrCursor, 2349 NeedICMP: true, 2350 } 2351 } 2352 seenOptions[optType] = true 2353 2354 optLen, optProblem := func() (int, *header.IPv4OptParameterProblem) { 2355 switch option := option.(type) { 2356 case *header.IPv4OptionTimestamp: 2357 stats.OptionTimestampReceived.Increment() 2358 optionsProcessed.timestamp = true 2359 if usage.actions().timestamp != optionRemove { 2360 clock := e.protocol.stack.Clock() 2361 newBuffer := optIter.InitReplacement(option) 2362 optProblem := handleTimestamp(header.IPv4OptionTimestamp(newBuffer), localAddress, clock, usage) 2363 return len(newBuffer), optProblem 2364 } 2365 2366 case *header.IPv4OptionRecordRoute: 2367 stats.OptionRecordRouteReceived.Increment() 2368 optionsProcessed.recordRoute = true 2369 if usage.actions().recordRoute != optionRemove { 2370 newBuffer := optIter.InitReplacement(option) 2371 optProblem := handleRecordRoute(header.IPv4OptionRecordRoute(newBuffer), localAddress, usage) 2372 return len(newBuffer), optProblem 2373 } 2374 2375 case *header.IPv4OptionRouterAlert: 2376 stats.OptionRouterAlertReceived.Increment() 2377 optionsProcessed.routerAlert = true 2378 if usage.actions().routerAlert != optionRemove { 2379 newBuffer := optIter.InitReplacement(option) 2380 optProblem := handleRouterAlert(header.IPv4OptionRouterAlert(newBuffer)) 2381 return len(newBuffer), optProblem 2382 } 2383 2384 default: 2385 stats.OptionUnknownReceived.Increment() 2386 if usage.actions().unknown == optionPass { 2387 return len(optIter.InitReplacement(option)), nil 2388 } 2389 } 2390 return 0, nil 2391 }() 2392 2393 if optProblem != nil { 2394 optProblem.Pointer += optIter.ErrCursor 2395 return nil, optionTracker{}, optProblem 2396 } 2397 optIter.ConsumeBuffer(optLen) 2398 } 2399 }