github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/tcpip/network/ipv4/ipv4.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package ipv4 contains the implementation of the ipv4 network protocol. 16 package ipv4 17 18 import ( 19 "fmt" 20 "math" 21 "reflect" 22 "time" 23 24 "github.com/MerlinKodo/gvisor/pkg/atomicbitops" 25 "github.com/MerlinKodo/gvisor/pkg/buffer" 26 "github.com/MerlinKodo/gvisor/pkg/sync" 27 "github.com/MerlinKodo/gvisor/pkg/tcpip" 28 "github.com/MerlinKodo/gvisor/pkg/tcpip/header" 29 "github.com/MerlinKodo/gvisor/pkg/tcpip/header/parse" 30 "github.com/MerlinKodo/gvisor/pkg/tcpip/network/hash" 31 "github.com/MerlinKodo/gvisor/pkg/tcpip/network/internal/fragmentation" 32 "github.com/MerlinKodo/gvisor/pkg/tcpip/network/internal/ip" 33 "github.com/MerlinKodo/gvisor/pkg/tcpip/network/internal/multicast" 34 "github.com/MerlinKodo/gvisor/pkg/tcpip/stack" 35 ) 36 37 const ( 38 // ReassembleTimeout is the time a packet stays in the reassembly 39 // system before being evicted. 40 // As per RFC 791 section 3.2: 41 // The current recommendation for the initial timer setting is 15 seconds. 42 // This may be changed as experience with this protocol accumulates. 43 // 44 // Considering that it is an old recommendation, we use the same reassembly 45 // timeout that linux defines, which is 30 seconds: 46 // https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/include/net/ip.h#L138 47 ReassembleTimeout = 30 * time.Second 48 49 // ProtocolNumber is the ipv4 protocol number. 50 ProtocolNumber = header.IPv4ProtocolNumber 51 52 // MaxTotalSize is maximum size that can be encoded in the 16-bit 53 // TotalLength field of the ipv4 header. 54 MaxTotalSize = 0xffff 55 56 // DefaultTTL is the default time-to-live value for this endpoint. 57 DefaultTTL = 64 58 59 // buckets is the number of identifier buckets. 60 buckets = 2048 61 62 // The size of a fragment block, in bytes, as per RFC 791 section 3.1, 63 // page 14. 64 fragmentblockSize = 8 65 ) 66 67 const ( 68 forwardingDisabled = 0 69 forwardingEnabled = 1 70 ) 71 72 var ipv4BroadcastAddr = header.IPv4Broadcast.WithPrefix() 73 74 var _ stack.LinkResolvableNetworkEndpoint = (*endpoint)(nil) 75 var _ stack.ForwardingNetworkEndpoint = (*endpoint)(nil) 76 var _ stack.MulticastForwardingNetworkEndpoint = (*endpoint)(nil) 77 var _ stack.GroupAddressableEndpoint = (*endpoint)(nil) 78 var _ stack.AddressableEndpoint = (*endpoint)(nil) 79 var _ stack.NetworkEndpoint = (*endpoint)(nil) 80 var _ IGMPEndpoint = (*endpoint)(nil) 81 82 type endpoint struct { 83 nic stack.NetworkInterface 84 dispatcher stack.TransportDispatcher 85 protocol *protocol 86 stats sharedStats 87 88 // enabled is set to 1 when the endpoint is enabled and 0 when it is 89 // disabled. 90 enabled atomicbitops.Uint32 91 92 // forwarding is set to forwardingEnabled when the endpoint has forwarding 93 // enabled and forwardingDisabled when it is disabled. 94 forwarding atomicbitops.Uint32 95 96 // multicastForwarding is set to forwardingEnabled when the endpoint has 97 // forwarding enabled and forwardingDisabled when it is disabled. 98 // 99 // TODO(https://gvisor.dev/issue/7338): Implement support for multicast 100 //forwarding. Currently, setting this value to true is a no-op. 101 multicastForwarding atomicbitops.Uint32 102 103 // mu protects below. 104 mu sync.RWMutex 105 106 // +checklocks:mu 107 addressableEndpointState stack.AddressableEndpointState 108 109 // +checklocks:mu 110 igmp igmpState 111 } 112 113 // SetIGMPVersion implements IGMPEndpoint. 114 func (e *endpoint) SetIGMPVersion(v IGMPVersion) IGMPVersion { 115 e.mu.Lock() 116 defer e.mu.Unlock() 117 return e.setIGMPVersionLocked(v) 118 } 119 120 // GetIGMPVersion implements IGMPEndpoint. 121 func (e *endpoint) GetIGMPVersion() IGMPVersion { 122 e.mu.RLock() 123 defer e.mu.RUnlock() 124 return e.getIGMPVersionLocked() 125 } 126 127 // +checklocks:e.mu 128 // +checklocksalias:e.igmp.ep.mu=e.mu 129 func (e *endpoint) setIGMPVersionLocked(v IGMPVersion) IGMPVersion { 130 return e.igmp.setVersion(v) 131 } 132 133 // +checklocksread:e.mu 134 // +checklocksalias:e.igmp.ep.mu=e.mu 135 func (e *endpoint) getIGMPVersionLocked() IGMPVersion { 136 return e.igmp.getVersion() 137 } 138 139 // HandleLinkResolutionFailure implements stack.LinkResolvableNetworkEndpoint. 140 func (e *endpoint) HandleLinkResolutionFailure(pkt stack.PacketBufferPtr) { 141 // If we are operating as a router, return an ICMP error to the original 142 // packet's sender. 143 if pkt.NetworkPacketInfo.IsForwardedPacket { 144 // TODO(gvisor.dev/issue/6005): Propagate asynchronously generated ICMP 145 // errors to local endpoints. 146 e.protocol.returnError(&icmpReasonHostUnreachable{}, pkt, false /* deliveredLocally */) 147 e.stats.ip.Forwarding.Errors.Increment() 148 e.stats.ip.Forwarding.HostUnreachable.Increment() 149 return 150 } 151 // handleControl expects the entire offending packet to be in the packet 152 // buffer's data field. 153 pkt = stack.NewPacketBuffer(stack.PacketBufferOptions{ 154 Payload: pkt.ToBuffer(), 155 }) 156 defer pkt.DecRef() 157 pkt.NICID = e.nic.ID() 158 pkt.NetworkProtocolNumber = ProtocolNumber 159 // Use the same control type as an ICMPv4 destination host unreachable error 160 // since the host is considered unreachable if we cannot resolve the link 161 // address to the next hop. 162 e.handleControl(&icmpv4DestinationHostUnreachableSockError{}, pkt) 163 } 164 165 // NewEndpoint creates a new ipv4 endpoint. 166 func (p *protocol) NewEndpoint(nic stack.NetworkInterface, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint { 167 e := &endpoint{ 168 nic: nic, 169 dispatcher: dispatcher, 170 protocol: p, 171 } 172 e.mu.Lock() 173 e.addressableEndpointState.Init(e, stack.AddressableEndpointStateOptions{HiddenWhileDisabled: false}) 174 e.igmp.init(e) 175 e.mu.Unlock() 176 177 tcpip.InitStatCounters(reflect.ValueOf(&e.stats.localStats).Elem()) 178 179 stackStats := p.stack.Stats() 180 e.stats.ip.Init(&e.stats.localStats.IP, &stackStats.IP) 181 e.stats.icmp.init(&e.stats.localStats.ICMP, &stackStats.ICMP.V4) 182 e.stats.igmp.init(&e.stats.localStats.IGMP, &stackStats.IGMP) 183 184 p.mu.Lock() 185 p.eps[nic.ID()] = e 186 p.mu.Unlock() 187 188 return e 189 } 190 191 func (p *protocol) findEndpointWithAddress(addr tcpip.Address) *endpoint { 192 p.mu.RLock() 193 defer p.mu.RUnlock() 194 195 for _, e := range p.eps { 196 if addressEndpoint := e.AcquireAssignedAddress(addr, false /* allowTemp */, stack.NeverPrimaryEndpoint); addressEndpoint != nil { 197 addressEndpoint.DecRef() 198 return e 199 } 200 } 201 202 return nil 203 } 204 205 func (p *protocol) getEndpointForNIC(id tcpip.NICID) (*endpoint, bool) { 206 p.mu.RLock() 207 defer p.mu.RUnlock() 208 ep, ok := p.eps[id] 209 return ep, ok 210 } 211 212 func (p *protocol) forgetEndpoint(nicID tcpip.NICID) { 213 p.mu.Lock() 214 defer p.mu.Unlock() 215 delete(p.eps, nicID) 216 } 217 218 // Forwarding implements stack.ForwardingNetworkEndpoint. 219 func (e *endpoint) Forwarding() bool { 220 return e.forwarding.Load() == forwardingEnabled 221 } 222 223 // setForwarding sets the forwarding status for the endpoint. 224 // 225 // Returns the previous forwarding status. 226 func (e *endpoint) setForwarding(v bool) bool { 227 forwarding := uint32(forwardingDisabled) 228 if v { 229 forwarding = forwardingEnabled 230 } 231 232 return e.forwarding.Swap(forwarding) != forwardingDisabled 233 } 234 235 // SetForwarding implements stack.ForwardingNetworkEndpoint. 236 func (e *endpoint) SetForwarding(forwarding bool) bool { 237 e.mu.Lock() 238 defer e.mu.Unlock() 239 240 prevForwarding := e.setForwarding(forwarding) 241 if prevForwarding == forwarding { 242 return prevForwarding 243 } 244 245 if forwarding { 246 // There does not seem to be an RFC requirement for a node to join the all 247 // routers multicast address but 248 // https://www.iana.org/assignments/multicast-addresses/multicast-addresses.xhtml 249 // specifies the address as a group for all routers on a subnet so we join 250 // the group here. 251 if err := e.joinGroupLocked(header.IPv4AllRoutersGroup); err != nil { 252 // joinGroupLocked only returns an error if the group address is not a 253 // valid IPv4 multicast address. 254 panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv4AllRoutersGroup, err)) 255 } 256 257 return prevForwarding 258 } 259 260 switch err := e.leaveGroupLocked(header.IPv4AllRoutersGroup).(type) { 261 case nil: 262 case *tcpip.ErrBadLocalAddress: 263 // The endpoint may have already left the multicast group. 264 default: 265 panic(fmt.Sprintf("e.leaveGroupLocked(%s): %s", header.IPv4AllRoutersGroup, err)) 266 } 267 268 return prevForwarding 269 } 270 271 // MulticastForwarding implements stack.MulticastForwardingNetworkEndpoint. 272 func (e *endpoint) MulticastForwarding() bool { 273 return e.multicastForwarding.Load() == forwardingEnabled 274 } 275 276 // SetMulticastForwarding implements stack.MulticastForwardingNetworkEndpoint. 277 func (e *endpoint) SetMulticastForwarding(forwarding bool) bool { 278 updatedForwarding := uint32(forwardingDisabled) 279 if forwarding { 280 updatedForwarding = forwardingEnabled 281 } 282 283 return e.multicastForwarding.Swap(updatedForwarding) != forwardingDisabled 284 } 285 286 // Enable implements stack.NetworkEndpoint. 287 func (e *endpoint) Enable() tcpip.Error { 288 e.mu.Lock() 289 defer e.mu.Unlock() 290 return e.enableLocked() 291 } 292 293 // +checklocks:e.mu 294 // +checklocksalias:e.igmp.ep.mu=e.mu 295 func (e *endpoint) enableLocked() tcpip.Error { 296 // If the NIC is not enabled, the endpoint can't do anything meaningful so 297 // don't enable the endpoint. 298 if !e.nic.Enabled() { 299 return &tcpip.ErrNotPermitted{} 300 } 301 302 // If the endpoint is already enabled, there is nothing for it to do. 303 if !e.setEnabled(true) { 304 return nil 305 } 306 307 // Must be called after Enabled has already been set. 308 e.addressableEndpointState.OnNetworkEndpointEnabledChanged() 309 310 // Create an endpoint to receive broadcast packets on this interface. 311 ep, err := e.addressableEndpointState.AddAndAcquirePermanentAddress(ipv4BroadcastAddr, stack.AddressProperties{PEB: stack.NeverPrimaryEndpoint}) 312 if err != nil { 313 return err 314 } 315 // We have no need for the address endpoint. 316 ep.DecRef() 317 318 // Groups may have been joined while the endpoint was disabled, or the 319 // endpoint may have left groups from the perspective of IGMP when the 320 // endpoint was disabled. Either way, we need to let routers know to 321 // send us multicast traffic. 322 e.igmp.initializeAll() 323 324 // As per RFC 1122 section 3.3.7, all hosts should join the all-hosts 325 // multicast group. Note, the IANA calls the all-hosts multicast group the 326 // all-systems multicast group. 327 if err := e.joinGroupLocked(header.IPv4AllSystems); err != nil { 328 // joinGroupLocked only returns an error if the group address is not a valid 329 // IPv4 multicast address. 330 panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv4AllSystems, err)) 331 } 332 333 return nil 334 } 335 336 // Enabled implements stack.NetworkEndpoint. 337 func (e *endpoint) Enabled() bool { 338 return e.nic.Enabled() && e.isEnabled() 339 } 340 341 // isEnabled returns true if the endpoint is enabled, regardless of the 342 // enabled status of the NIC. 343 func (e *endpoint) isEnabled() bool { 344 return e.enabled.Load() == 1 345 } 346 347 // setEnabled sets the enabled status for the endpoint. 348 // 349 // Returns true if the enabled status was updated. 350 func (e *endpoint) setEnabled(v bool) bool { 351 if v { 352 return e.enabled.Swap(1) == 0 353 } 354 return e.enabled.Swap(0) == 1 355 } 356 357 // Disable implements stack.NetworkEndpoint. 358 func (e *endpoint) Disable() { 359 e.mu.Lock() 360 defer e.mu.Unlock() 361 e.disableLocked() 362 } 363 364 // +checklocks:e.mu 365 // +checklocksalias:e.igmp.ep.mu=e.mu 366 func (e *endpoint) disableLocked() { 367 if !e.isEnabled() { 368 return 369 } 370 371 // The endpoint may have already left the multicast group. 372 switch err := e.leaveGroupLocked(header.IPv4AllSystems).(type) { 373 case nil, *tcpip.ErrBadLocalAddress: 374 default: 375 panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv4AllSystems, err)) 376 } 377 378 // Leave groups from the perspective of IGMP so that routers know that 379 // we are no longer interested in the group. 380 e.igmp.softLeaveAll() 381 382 // The address may have already been removed. 383 switch err := e.addressableEndpointState.RemovePermanentAddress(ipv4BroadcastAddr.Address); err.(type) { 384 case nil, *tcpip.ErrBadLocalAddress: 385 default: 386 panic(fmt.Sprintf("unexpected error when removing address = %s: %s", ipv4BroadcastAddr.Address, err)) 387 } 388 389 // Reset the IGMP V1 present flag. 390 // 391 // If the node comes back up on the same network, it will re-learn that it 392 // needs to perform IGMPv1. 393 e.igmp.resetV1Present() 394 395 if !e.setEnabled(false) { 396 panic("should have only done work to disable the endpoint if it was enabled") 397 } 398 399 // Must be called after Enabled has been set. 400 e.addressableEndpointState.OnNetworkEndpointEnabledChanged() 401 } 402 403 // emitMulticastEvent emits a multicast forwarding event using the provided 404 // generator if a valid event dispatcher exists. 405 func (e *endpoint) emitMulticastEvent(eventGenerator func(stack.MulticastForwardingEventDispatcher)) { 406 e.protocol.mu.RLock() 407 defer e.protocol.mu.RUnlock() 408 409 if mcastDisp := e.protocol.multicastForwardingDisp; mcastDisp != nil { 410 eventGenerator(mcastDisp) 411 } 412 } 413 414 // DefaultTTL is the default time-to-live value for this endpoint. 415 func (e *endpoint) DefaultTTL() uint8 { 416 return e.protocol.DefaultTTL() 417 } 418 419 // MTU implements stack.NetworkEndpoint. It returns the link-layer MTU minus the 420 // network layer max header length. 421 func (e *endpoint) MTU() uint32 { 422 networkMTU, err := calculateNetworkMTU(e.nic.MTU(), header.IPv4MinimumSize) 423 if err != nil { 424 return 0 425 } 426 return networkMTU 427 } 428 429 // MaxHeaderLength returns the maximum length needed by ipv4 headers (and 430 // underlying protocols). 431 func (e *endpoint) MaxHeaderLength() uint16 { 432 return e.nic.MaxHeaderLength() + header.IPv4MaximumHeaderSize 433 } 434 435 // NetworkProtocolNumber implements stack.NetworkEndpoint. 436 func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber { 437 return e.protocol.Number() 438 } 439 440 func (e *endpoint) addIPHeader(srcAddr, dstAddr tcpip.Address, pkt stack.PacketBufferPtr, params stack.NetworkHeaderParams, options header.IPv4OptionsSerializer) tcpip.Error { 441 hdrLen := header.IPv4MinimumSize 442 var optLen int 443 if options != nil { 444 optLen = int(options.Length()) 445 } 446 hdrLen += optLen 447 if hdrLen > header.IPv4MaximumHeaderSize { 448 return &tcpip.ErrMessageTooLong{} 449 } 450 ipH := header.IPv4(pkt.NetworkHeader().Push(hdrLen)) 451 length := pkt.Size() 452 if length > math.MaxUint16 { 453 return &tcpip.ErrMessageTooLong{} 454 } 455 // RFC 6864 section 4.3 mandates uniqueness of ID values for non-atomic 456 // datagrams. Since the DF bit is never being set here, all datagrams 457 // are non-atomic and need an ID. 458 id := e.protocol.ids[hashRoute(srcAddr, dstAddr, params.Protocol, e.protocol.hashIV)%buckets].Add(1) 459 ipH.Encode(&header.IPv4Fields{ 460 TotalLength: uint16(length), 461 ID: uint16(id), 462 TTL: params.TTL, 463 TOS: params.TOS, 464 Protocol: uint8(params.Protocol), 465 SrcAddr: srcAddr, 466 DstAddr: dstAddr, 467 Options: options, 468 }) 469 ipH.SetChecksum(^ipH.CalculateChecksum()) 470 pkt.NetworkProtocolNumber = ProtocolNumber 471 return nil 472 } 473 474 // handleFragments fragments pkt and calls the handler function on each 475 // fragment. It returns the number of fragments handled and the number of 476 // fragments left to be processed. The IP header must already be present in the 477 // original packet. 478 func (e *endpoint) handleFragments(_ *stack.Route, networkMTU uint32, pkt stack.PacketBufferPtr, handler func(stack.PacketBufferPtr) tcpip.Error) (int, int, tcpip.Error) { 479 // Round the MTU down to align to 8 bytes. 480 fragmentPayloadSize := networkMTU &^ 7 481 networkHeader := header.IPv4(pkt.NetworkHeader().Slice()) 482 pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadSize, pkt.AvailableHeaderBytes()+len(networkHeader)) 483 defer pf.Release() 484 485 var n int 486 for { 487 fragPkt, more := buildNextFragment(&pf, networkHeader) 488 err := handler(fragPkt) 489 fragPkt.DecRef() 490 if err != nil { 491 return n, pf.RemainingFragmentCount() + 1, err 492 } 493 n++ 494 if !more { 495 return n, pf.RemainingFragmentCount(), nil 496 } 497 } 498 } 499 500 // WritePacket writes a packet to the given destination address and protocol. 501 func (e *endpoint) WritePacket(r *stack.Route, params stack.NetworkHeaderParams, pkt stack.PacketBufferPtr) tcpip.Error { 502 if err := e.addIPHeader(r.LocalAddress(), r.RemoteAddress(), pkt, params, nil /* options */); err != nil { 503 return err 504 } 505 506 return e.writePacket(r, pkt) 507 } 508 509 func (e *endpoint) writePacket(r *stack.Route, pkt stack.PacketBufferPtr) tcpip.Error { 510 netHeader := header.IPv4(pkt.NetworkHeader().Slice()) 511 dstAddr := netHeader.DestinationAddress() 512 513 // iptables filtering. All packets that reach here are locally 514 // generated. 515 outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 516 if ok := e.protocol.stack.IPTables().CheckOutput(pkt, r, outNicName); !ok { 517 // iptables is telling us to drop the packet. 518 e.stats.ip.IPTablesOutputDropped.Increment() 519 return nil 520 } 521 522 // If the packet is manipulated as per DNAT Output rules, handle packet 523 // based on destination address and do not send the packet to link 524 // layer. 525 // 526 // We should do this for every packet, rather than only DNATted packets, but 527 // removing this check short circuits broadcasts before they are sent out to 528 // other hosts. 529 if newDstAddr := netHeader.DestinationAddress(); dstAddr != newDstAddr { 530 if ep := e.protocol.findEndpointWithAddress(newDstAddr); ep != nil { 531 // Since we rewrote the packet but it is being routed back to us, we 532 // can safely assume the checksum is valid. 533 ep.handleLocalPacket(pkt, true /* canSkipRXChecksum */) 534 return nil 535 } 536 } 537 538 return e.writePacketPostRouting(r, pkt, false /* headerIncluded */) 539 } 540 541 func (e *endpoint) writePacketPostRouting(r *stack.Route, pkt stack.PacketBufferPtr, headerIncluded bool) tcpip.Error { 542 if r.Loop()&stack.PacketLoop != 0 { 543 // If the packet was generated by the stack (not a raw/packet endpoint 544 // where a packet may be written with the header included), then we can 545 // safely assume the checksum is valid. 546 e.handleLocalPacket(pkt, !headerIncluded /* canSkipRXChecksum */) 547 } 548 if r.Loop()&stack.PacketOut == 0 { 549 return nil 550 } 551 552 // Postrouting NAT can only change the source address, and does not alter the 553 // route or outgoing interface of the packet. 554 outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 555 if ok := e.protocol.stack.IPTables().CheckPostrouting(pkt, r, e, outNicName); !ok { 556 // iptables is telling us to drop the packet. 557 e.stats.ip.IPTablesPostroutingDropped.Increment() 558 return nil 559 } 560 561 stats := e.stats.ip 562 563 networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(len(pkt.NetworkHeader().Slice()))) 564 if err != nil { 565 stats.OutgoingPacketErrors.Increment() 566 return err 567 } 568 569 if packetMustBeFragmented(pkt, networkMTU) { 570 h := header.IPv4(pkt.NetworkHeader().Slice()) 571 if h.Flags()&header.IPv4FlagDontFragment != 0 && pkt.NetworkPacketInfo.IsForwardedPacket { 572 // TODO(gvisor.dev/issue/5919): Handle error condition in which DontFragment 573 // is set but the packet must be fragmented for the non-forwarding case. 574 return &tcpip.ErrMessageTooLong{} 575 } 576 sent, remain, err := e.handleFragments(r, networkMTU, pkt, func(fragPkt stack.PacketBufferPtr) tcpip.Error { 577 // TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each 578 // fragment one by one using WritePacket() (current strategy) or if we 579 // want to create a PacketBufferList from the fragments and feed it to 580 // WritePackets(). It'll be faster but cost more memory. 581 return e.nic.WritePacket(r, fragPkt) 582 }) 583 stats.PacketsSent.IncrementBy(uint64(sent)) 584 stats.OutgoingPacketErrors.IncrementBy(uint64(remain)) 585 return err 586 } 587 588 if err := e.nic.WritePacket(r, pkt); err != nil { 589 stats.OutgoingPacketErrors.Increment() 590 return err 591 } 592 stats.PacketsSent.Increment() 593 return nil 594 } 595 596 // WriteHeaderIncludedPacket implements stack.NetworkEndpoint. 597 func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt stack.PacketBufferPtr) tcpip.Error { 598 // The packet already has an IP header, but there are a few required 599 // checks. 600 h, ok := pkt.Data().PullUp(header.IPv4MinimumSize) 601 if !ok { 602 return &tcpip.ErrMalformedHeader{} 603 } 604 605 hdrLen := header.IPv4(h).HeaderLength() 606 if hdrLen < header.IPv4MinimumSize { 607 return &tcpip.ErrMalformedHeader{} 608 } 609 610 h, ok = pkt.Data().PullUp(int(hdrLen)) 611 if !ok { 612 return &tcpip.ErrMalformedHeader{} 613 } 614 ipH := header.IPv4(h) 615 616 // Always set the total length. 617 pktSize := pkt.Data().Size() 618 ipH.SetTotalLength(uint16(pktSize)) 619 620 // Set the source address when zero. 621 if ipH.SourceAddress() == header.IPv4Any { 622 ipH.SetSourceAddress(r.LocalAddress()) 623 } 624 625 // Set the packet ID when zero. 626 if ipH.ID() == 0 { 627 // RFC 6864 section 4.3 mandates uniqueness of ID values for 628 // non-atomic datagrams, so assign an ID to all such datagrams 629 // according to the definition given in RFC 6864 section 4. 630 if ipH.Flags()&header.IPv4FlagDontFragment == 0 || ipH.Flags()&header.IPv4FlagMoreFragments != 0 || ipH.FragmentOffset() > 0 { 631 ipH.SetID(uint16(e.protocol.ids[hashRoute(r.LocalAddress(), r.RemoteAddress(), 0 /* protocol */, e.protocol.hashIV)%buckets].Add(1))) 632 } 633 } 634 635 // Always set the checksum. 636 ipH.SetChecksum(0) 637 ipH.SetChecksum(^ipH.CalculateChecksum()) 638 639 // Populate the packet buffer's network header and don't allow an invalid 640 // packet to be sent. 641 // 642 // Note that parsing only makes sure that the packet is well formed as per the 643 // wire format. We also want to check if the header's fields are valid before 644 // sending the packet. 645 if !parse.IPv4(pkt) || !header.IPv4(pkt.NetworkHeader().Slice()).IsValid(pktSize) { 646 return &tcpip.ErrMalformedHeader{} 647 } 648 649 return e.writePacketPostRouting(r, pkt, true /* headerIncluded */) 650 } 651 652 // forwardPacketWithRoute emits the pkt using the provided route. 653 // 654 // If updateOptions is true, then the IP options will be updated in the copied 655 // pkt using the outgoing endpoint. Otherwise, the caller is responsible for 656 // updating the options. 657 // 658 // This method should be invoked by the endpoint that received the pkt. 659 func (e *endpoint) forwardPacketWithRoute(route *stack.Route, pkt stack.PacketBufferPtr, updateOptions bool) ip.ForwardingError { 660 h := header.IPv4(pkt.NetworkHeader().Slice()) 661 stk := e.protocol.stack 662 663 inNicName := stk.FindNICNameFromID(e.nic.ID()) 664 outNicName := stk.FindNICNameFromID(route.NICID()) 665 if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok { 666 // iptables is telling us to drop the packet. 667 e.stats.ip.IPTablesForwardDropped.Increment() 668 return nil 669 } 670 671 // We need to do a deep copy of the IP packet because 672 // WriteHeaderIncludedPacket may modify the packet buffer, but we do 673 // not own it. 674 // 675 // TODO(https://gvisor.dev/issue/7473): For multicast, only create one deep 676 // copy and then clone. 677 newPkt := pkt.DeepCopyForForwarding(int(route.MaxHeaderLength())) 678 newHdr := header.IPv4(newPkt.NetworkHeader().Slice()) 679 defer newPkt.DecRef() 680 681 forwardToEp, ok := e.protocol.getEndpointForNIC(route.NICID()) 682 if !ok { 683 return &ip.ErrUnknownOutputEndpoint{} 684 } 685 686 if updateOptions { 687 if err := forwardToEp.updateOptionsForForwarding(newPkt); err != nil { 688 return err 689 } 690 } 691 692 ttl := h.TTL() 693 // As per RFC 791 page 30, Time to Live, 694 // 695 // This field must be decreased at each point that the internet header 696 // is processed to reflect the time spent processing the datagram. 697 // Even if no local information is available on the time actually 698 // spent, the field must be decremented by 1. 699 newHdr.SetTTL(ttl - 1) 700 // We perform a full checksum as we may have updated options above. The IP 701 // header is relatively small so this is not expected to be an expensive 702 // operation. 703 newHdr.SetChecksum(0) 704 newHdr.SetChecksum(^newHdr.CalculateChecksum()) 705 706 switch err := forwardToEp.writePacketPostRouting(route, newPkt, true /* headerIncluded */); err.(type) { 707 case nil: 708 return nil 709 case *tcpip.ErrMessageTooLong: 710 // As per RFC 792, page 4, Destination Unreachable: 711 // 712 // Another case is when a datagram must be fragmented to be forwarded by a 713 // gateway yet the Don't Fragment flag is on. In this case the gateway must 714 // discard the datagram and may return a destination unreachable message. 715 // 716 // WriteHeaderIncludedPacket checks for the presence of the Don't Fragment bit 717 // while sending the packet and returns this error iff fragmentation is 718 // necessary and the bit is also set. 719 _ = e.protocol.returnError(&icmpReasonFragmentationNeeded{}, pkt, false /* deliveredLocally */) 720 return &ip.ErrMessageTooLong{} 721 case *tcpip.ErrNoBufferSpace: 722 return &ip.ErrOutgoingDeviceNoBufferSpace{} 723 default: 724 return &ip.ErrOther{Err: err} 725 } 726 } 727 728 // forwardUnicastPacket attempts to forward a packet to its final destination. 729 func (e *endpoint) forwardUnicastPacket(pkt stack.PacketBufferPtr) ip.ForwardingError { 730 hView := pkt.NetworkHeader().View() 731 defer hView.Release() 732 h := header.IPv4(hView.AsSlice()) 733 734 dstAddr := h.DestinationAddress() 735 736 if err := validateAddressesForForwarding(h); err != nil { 737 return err 738 } 739 740 ttl := h.TTL() 741 if ttl == 0 { 742 // As per RFC 792 page 6, Time Exceeded Message, 743 // 744 // If the gateway processing a datagram finds the time to live field 745 // is zero it must discard the datagram. The gateway may also notify 746 // the source host via the time exceeded message. 747 // 748 // We return the original error rather than the result of returning 749 // the ICMP packet because the original error is more relevant to 750 // the caller. 751 _ = e.protocol.returnError(&icmpReasonTTLExceeded{}, pkt, false /* deliveredLocally */) 752 return &ip.ErrTTLExceeded{} 753 } 754 755 if err := e.updateOptionsForForwarding(pkt); err != nil { 756 return err 757 } 758 759 stk := e.protocol.stack 760 761 // Check if the destination is owned by the stack. 762 if ep := e.protocol.findEndpointWithAddress(dstAddr); ep != nil { 763 inNicName := stk.FindNICNameFromID(e.nic.ID()) 764 outNicName := stk.FindNICNameFromID(ep.nic.ID()) 765 if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok { 766 // iptables is telling us to drop the packet. 767 e.stats.ip.IPTablesForwardDropped.Increment() 768 return nil 769 } 770 771 // The packet originally arrived on e so provide its NIC as the input NIC. 772 ep.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 773 return nil 774 } 775 776 r, err := stk.FindRoute(0, tcpip.Address{}, dstAddr, ProtocolNumber, false /* multicastLoop */) 777 switch err.(type) { 778 case nil: 779 // TODO(https://gvisor.dev/issues/8105): We should not observe ErrHostUnreachable from route 780 // lookups. 781 case *tcpip.ErrHostUnreachable, *tcpip.ErrNetworkUnreachable: 782 // We return the original error rather than the result of returning 783 // the ICMP packet because the original error is more relevant to 784 // the caller. 785 _ = e.protocol.returnError(&icmpReasonNetworkUnreachable{}, pkt, false /* deliveredLocally */) 786 return &ip.ErrHostUnreachable{} 787 default: 788 return &ip.ErrOther{Err: err} 789 } 790 defer r.Release() 791 792 // TODO(https://gvisor.dev/issue/7472): Unicast IP options should be updated 793 // using the output endpoint (instead of the input endpoint). In particular, 794 // RFC 1812 section 5.2.1 states the following: 795 // 796 // Processing of certain IP options requires that the router insert its IP 797 // address into the option. As noted in Section [5.2.4], the address 798 // inserted MUST be the address of the logical interface on which the 799 // packet is sent or the router's router-id if the packet is sent over an 800 // unnumbered interface. Thus, processing of these options cannot be 801 // completed until after the output interface is chosen. 802 return e.forwardPacketWithRoute(r, pkt, false /* updateOptions */) 803 } 804 805 // HandlePacket is called by the link layer when new ipv4 packets arrive for 806 // this endpoint. 807 func (e *endpoint) HandlePacket(pkt stack.PacketBufferPtr) { 808 stats := e.stats.ip 809 810 stats.PacketsReceived.Increment() 811 812 if !e.isEnabled() { 813 stats.DisabledPacketsReceived.Increment() 814 return 815 } 816 817 hView, ok := e.protocol.parseAndValidate(pkt) 818 if !ok { 819 stats.MalformedPacketsReceived.Increment() 820 return 821 } 822 h := header.IPv4(hView.AsSlice()) 823 defer hView.Release() 824 825 if !e.nic.IsLoopback() { 826 if !e.protocol.options.AllowExternalLoopbackTraffic { 827 if header.IsV4LoopbackAddress(h.SourceAddress()) { 828 stats.InvalidSourceAddressesReceived.Increment() 829 return 830 } 831 832 if header.IsV4LoopbackAddress(h.DestinationAddress()) { 833 stats.InvalidDestinationAddressesReceived.Increment() 834 return 835 } 836 } 837 838 if e.protocol.stack.HandleLocal() { 839 addressEndpoint := e.AcquireAssignedAddress(header.IPv4(pkt.NetworkHeader().Slice()).SourceAddress(), e.nic.Promiscuous(), stack.CanBePrimaryEndpoint) 840 if addressEndpoint != nil { 841 addressEndpoint.DecRef() 842 843 // The source address is one of our own, so we never should have gotten 844 // a packet like this unless HandleLocal is false or our NIC is the 845 // loopback interface. 846 stats.InvalidSourceAddressesReceived.Increment() 847 return 848 } 849 } 850 851 // Loopback traffic skips the prerouting chain. 852 inNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 853 if ok := e.protocol.stack.IPTables().CheckPrerouting(pkt, e, inNicName); !ok { 854 // iptables is telling us to drop the packet. 855 stats.IPTablesPreroutingDropped.Increment() 856 return 857 } 858 } 859 860 e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 861 } 862 863 // handleLocalPacket is like HandlePacket except it does not perform the 864 // prerouting iptables hook or check for loopback traffic that originated from 865 // outside of the netstack (i.e. martian loopback packets). 866 func (e *endpoint) handleLocalPacket(pkt stack.PacketBufferPtr, canSkipRXChecksum bool) { 867 stats := e.stats.ip 868 stats.PacketsReceived.Increment() 869 870 pkt = pkt.CloneToInbound() 871 defer pkt.DecRef() 872 pkt.RXChecksumValidated = canSkipRXChecksum 873 874 hView, ok := e.protocol.parseAndValidate(pkt) 875 if !ok { 876 stats.MalformedPacketsReceived.Increment() 877 return 878 } 879 h := header.IPv4(hView.AsSlice()) 880 defer hView.Release() 881 882 e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 883 } 884 885 func validateAddressesForForwarding(h header.IPv4) ip.ForwardingError { 886 srcAddr := h.SourceAddress() 887 888 // As per RFC 5735 section 3, 889 // 890 // 0.0.0.0/8 - Addresses in this block refer to source hosts on "this" 891 // network. Address 0.0.0.0/32 may be used as a source address for this 892 // host on this network; other addresses within 0.0.0.0/8 may be used to 893 // refer to specified hosts on this network ([RFC1122], Section 3.2.1.3). 894 // 895 // And RFC 6890 section 2.2.2, 896 // 897 // +----------------------+----------------------------+ 898 // | Attribute | Value | 899 // +----------------------+----------------------------+ 900 // | Address Block | 0.0.0.0/8 | 901 // | Name | "This host on this network"| 902 // | RFC | [RFC1122], Section 3.2.1.3 | 903 // | Allocation Date | September 1981 | 904 // | Termination Date | N/A | 905 // | Source | True | 906 // | Destination | False | 907 // | Forwardable | False | 908 // | Global | False | 909 // | Reserved-by-Protocol | True | 910 // +----------------------+----------------------------+ 911 if header.IPv4CurrentNetworkSubnet.Contains(srcAddr) { 912 return &ip.ErrInitializingSourceAddress{} 913 } 914 915 // As per RFC 3927 section 7, 916 // 917 // A router MUST NOT forward a packet with an IPv4 Link-Local source or 918 // destination address, irrespective of the router's default route 919 // configuration or routes obtained from dynamic routing protocols. 920 // 921 // A router which receives a packet with an IPv4 Link-Local source or 922 // destination address MUST NOT forward the packet. This prevents 923 // forwarding of packets back onto the network segment from which they 924 // originated, or to any other segment. 925 if header.IsV4LinkLocalUnicastAddress(srcAddr) { 926 return &ip.ErrLinkLocalSourceAddress{} 927 } 928 if dstAddr := h.DestinationAddress(); header.IsV4LinkLocalUnicastAddress(dstAddr) || header.IsV4LinkLocalMulticastAddress(dstAddr) { 929 return &ip.ErrLinkLocalDestinationAddress{} 930 } 931 return nil 932 } 933 934 // forwardMulticastPacket validates a multicast pkt and attempts to forward it. 935 // 936 // This method should be invoked for incoming multicast packets using the 937 // endpoint that received the packet. 938 func (e *endpoint) forwardMulticastPacket(h header.IPv4, pkt stack.PacketBufferPtr) ip.ForwardingError { 939 if err := validateAddressesForForwarding(h); err != nil { 940 return err 941 } 942 943 if opts := h.Options(); len(opts) != 0 { 944 // Check if the options are valid, but don't mutate them. This corresponds 945 // to step 3 of RFC 1812 section 5.2.1.1. 946 if _, _, optProblem := e.processIPOptions(pkt, opts, &optionUsageVerify{}); optProblem != nil { 947 // Per RFC 1812 section 4.3.2.7, an ICMP error message should not be 948 // sent for: 949 // 950 // A packet destined to an IP broadcast or IP multicast address. 951 // 952 // Note that protocol.returnError also enforces this requirement. 953 // However, we intentionally omit it here since this path is multicast 954 // only. 955 return &ip.ErrParameterProblem{} 956 } 957 } 958 959 routeKey := stack.UnicastSourceAndMulticastDestination{ 960 Source: h.SourceAddress(), 961 Destination: h.DestinationAddress(), 962 } 963 964 // The pkt has been validated. Consequently, if a route is not found, then 965 // the pkt can safely be queued. 966 result, hasBufferSpace := e.protocol.multicastRouteTable.GetRouteOrInsertPending(routeKey, pkt) 967 968 if !hasBufferSpace { 969 // Unable to queue the pkt. Silently drop it. 970 return &ip.ErrNoMulticastPendingQueueBufferSpace{} 971 } 972 973 switch result.GetRouteResultState { 974 case multicast.InstalledRouteFound: 975 // Attempt to forward the pkt using an existing route. 976 return e.forwardValidatedMulticastPacket(pkt, result.InstalledRoute) 977 case multicast.NoRouteFoundAndPendingInserted: 978 e.emitMulticastEvent(func(disp stack.MulticastForwardingEventDispatcher) { 979 disp.OnMissingRoute(stack.MulticastPacketContext{ 980 stack.UnicastSourceAndMulticastDestination{h.SourceAddress(), h.DestinationAddress()}, 981 e.nic.ID(), 982 }) 983 }) 984 case multicast.PacketQueuedInPendingRoute: 985 default: 986 panic(fmt.Sprintf("unexpected GetRouteResultState: %s", result.GetRouteResultState)) 987 } 988 return &ip.ErrHostUnreachable{} 989 } 990 991 func (e *endpoint) updateOptionsForForwarding(pkt stack.PacketBufferPtr) ip.ForwardingError { 992 h := header.IPv4(pkt.NetworkHeader().Slice()) 993 if opts := h.Options(); len(opts) != 0 { 994 newOpts, _, optProblem := e.processIPOptions(pkt, opts, &optionUsageForward{}) 995 if optProblem != nil { 996 if optProblem.NeedICMP { 997 // Note that this will not emit an ICMP error if the destination is 998 // multicast. 999 _ = e.protocol.returnError(&icmpReasonParamProblem{ 1000 pointer: optProblem.Pointer, 1001 }, pkt, false /* deliveredLocally */) 1002 } 1003 return &ip.ErrParameterProblem{} 1004 } 1005 copied := copy(opts, newOpts) 1006 if copied != len(newOpts) { 1007 panic(fmt.Sprintf("copied %d bytes of new options, expected %d bytes", copied, len(newOpts))) 1008 } 1009 // Since in forwarding we handle all options, including copying those we 1010 // do not recognise, the options region should remain the same size which 1011 // simplifies processing. As we MAY receive a packet with a lot of padded 1012 // bytes after the "end of options list" byte, make sure we copy 1013 // them as the legal padding value (0). 1014 for i := copied; i < len(opts); i++ { 1015 // Pad with 0 (EOL). RFC 791 page 23 says "The padding is zero". 1016 opts[i] = byte(header.IPv4OptionListEndType) 1017 } 1018 } 1019 return nil 1020 } 1021 1022 // forwardValidatedMulticastPacket attempts to forward the pkt using the 1023 // provided installedRoute. 1024 // 1025 // This method should be invoked by the endpoint that received the pkt. 1026 func (e *endpoint) forwardValidatedMulticastPacket(pkt stack.PacketBufferPtr, installedRoute *multicast.InstalledRoute) ip.ForwardingError { 1027 // Per RFC 1812 section 5.2.1.3, 1028 // 1029 // Based on the IP source and destination addresses found in the datagram 1030 // header, the router determines whether the datagram has been received 1031 // on the proper interface for forwarding. If not, the datagram is 1032 // dropped silently. 1033 if e.nic.ID() != installedRoute.ExpectedInputInterface { 1034 h := header.IPv4(pkt.NetworkHeader().Slice()) 1035 e.emitMulticastEvent(func(disp stack.MulticastForwardingEventDispatcher) { 1036 disp.OnUnexpectedInputInterface(stack.MulticastPacketContext{ 1037 stack.UnicastSourceAndMulticastDestination{h.SourceAddress(), h.DestinationAddress()}, 1038 e.nic.ID(), 1039 }, installedRoute.ExpectedInputInterface) 1040 }) 1041 return &ip.ErrUnexpectedMulticastInputInterface{} 1042 } 1043 1044 for _, outgoingInterface := range installedRoute.OutgoingInterfaces { 1045 if err := e.forwardMulticastPacketForOutgoingInterface(pkt, outgoingInterface); err != nil { 1046 e.handleForwardingError(err) 1047 continue 1048 } 1049 // The pkt was successfully forwarded. Mark the route as used. 1050 installedRoute.SetLastUsedTimestamp(e.protocol.stack.Clock().NowMonotonic()) 1051 } 1052 return nil 1053 } 1054 1055 // forwardMulticastPacketForOutgoingInterface attempts to forward the pkt out 1056 // of the provided outgoingInterface. 1057 // 1058 // This method should be invoked by the endpoint that received the pkt. 1059 func (e *endpoint) forwardMulticastPacketForOutgoingInterface(pkt stack.PacketBufferPtr, outgoingInterface stack.MulticastRouteOutgoingInterface) ip.ForwardingError { 1060 h := header.IPv4(pkt.NetworkHeader().Slice()) 1061 1062 // Per RFC 1812 section 5.2.1.3, 1063 // 1064 // A copy of the multicast datagram is forwarded out each outgoing 1065 // interface whose minimum TTL value is less than or equal to the TTL 1066 // value in the datagram header. 1067 // 1068 // Copying of the packet is deferred to forwardPacketWithRoute since unicast 1069 // and multicast both require a copy. 1070 if outgoingInterface.MinTTL > h.TTL() { 1071 return &ip.ErrTTLExceeded{} 1072 } 1073 1074 route := e.protocol.stack.NewRouteForMulticast(outgoingInterface.ID, h.DestinationAddress(), e.NetworkProtocolNumber()) 1075 1076 if route == nil { 1077 // Failed to convert to a stack.Route. This likely means that the outgoing 1078 // endpoint no longer exists. 1079 return &ip.ErrHostUnreachable{} 1080 } 1081 defer route.Release() 1082 1083 return e.forwardPacketWithRoute(route, pkt, true /* updateOptions */) 1084 } 1085 1086 func (e *endpoint) handleValidatedPacket(h header.IPv4, pkt stack.PacketBufferPtr, inNICName string) { 1087 pkt.NICID = e.nic.ID() 1088 1089 // Raw socket packets are delivered based solely on the transport protocol 1090 // number. We only require that the packet be valid IPv4, and that they not 1091 // be fragmented. 1092 if !h.More() && h.FragmentOffset() == 0 { 1093 e.dispatcher.DeliverRawPacket(h.TransportProtocol(), pkt) 1094 } 1095 1096 stats := e.stats 1097 stats.ip.ValidPacketsReceived.Increment() 1098 1099 srcAddr := h.SourceAddress() 1100 dstAddr := h.DestinationAddress() 1101 1102 // As per RFC 1122 section 3.2.1.3: 1103 // When a host sends any datagram, the IP source address MUST 1104 // be one of its own IP addresses (but not a broadcast or 1105 // multicast address). 1106 if srcAddr == header.IPv4Broadcast || header.IsV4MulticastAddress(srcAddr) { 1107 stats.ip.InvalidSourceAddressesReceived.Increment() 1108 return 1109 } 1110 // Make sure the source address is not a subnet-local broadcast address. 1111 if addressEndpoint := e.AcquireAssignedAddress(srcAddr, false /* createTemp */, stack.NeverPrimaryEndpoint); addressEndpoint != nil { 1112 subnet := addressEndpoint.Subnet() 1113 addressEndpoint.DecRef() 1114 if subnet.IsBroadcast(srcAddr) { 1115 stats.ip.InvalidSourceAddressesReceived.Increment() 1116 return 1117 } 1118 } 1119 1120 if header.IsV4MulticastAddress(dstAddr) { 1121 // Handle all packets destined to a multicast address separately. Unlike 1122 // unicast, these packets can be both delivered locally and forwarded. See 1123 // RFC 1812 section 5.2.3 for details regarding the forwarding/local 1124 // delivery decision. 1125 1126 multicastForwarding := e.MulticastForwarding() && e.protocol.multicastForwarding() 1127 1128 if multicastForwarding { 1129 e.handleForwardingError(e.forwardMulticastPacket(h, pkt)) 1130 } 1131 1132 if e.IsInGroup(dstAddr) { 1133 e.deliverPacketLocally(h, pkt, inNICName) 1134 return 1135 } 1136 1137 if !multicastForwarding { 1138 // Only consider the destination address invalid if we didn't attempt to 1139 // forward the pkt and it was not delivered locally. 1140 stats.ip.InvalidDestinationAddressesReceived.Increment() 1141 } 1142 return 1143 } 1144 1145 // Before we do any processing, check if the packet was received as some 1146 // sort of broadcast. 1147 // 1148 // If the packet is destined for this device, then it should be delivered 1149 // locally. Otherwise, if forwarding is enabled, it should be forwarded. 1150 if addressEndpoint := e.AcquireAssignedAddress(dstAddr, e.nic.Promiscuous(), stack.CanBePrimaryEndpoint); addressEndpoint != nil { 1151 subnet := addressEndpoint.AddressWithPrefix().Subnet() 1152 addressEndpoint.DecRef() 1153 pkt.NetworkPacketInfo.LocalAddressBroadcast = subnet.IsBroadcast(dstAddr) || dstAddr == header.IPv4Broadcast 1154 e.deliverPacketLocally(h, pkt, inNICName) 1155 } else if e.Forwarding() { 1156 e.handleForwardingError(e.forwardUnicastPacket(pkt)) 1157 } else { 1158 stats.ip.InvalidDestinationAddressesReceived.Increment() 1159 } 1160 } 1161 1162 // handleForwardingError processes the provided err and increments any relevant 1163 // counters. 1164 func (e *endpoint) handleForwardingError(err ip.ForwardingError) { 1165 stats := e.stats.ip 1166 switch err := err.(type) { 1167 case nil: 1168 return 1169 case *ip.ErrInitializingSourceAddress: 1170 stats.Forwarding.InitializingSource.Increment() 1171 case *ip.ErrLinkLocalSourceAddress: 1172 stats.Forwarding.LinkLocalSource.Increment() 1173 case *ip.ErrLinkLocalDestinationAddress: 1174 stats.Forwarding.LinkLocalDestination.Increment() 1175 case *ip.ErrTTLExceeded: 1176 stats.Forwarding.ExhaustedTTL.Increment() 1177 case *ip.ErrHostUnreachable: 1178 stats.Forwarding.Unrouteable.Increment() 1179 case *ip.ErrParameterProblem: 1180 stats.MalformedPacketsReceived.Increment() 1181 case *ip.ErrMessageTooLong: 1182 stats.Forwarding.PacketTooBig.Increment() 1183 case *ip.ErrNoMulticastPendingQueueBufferSpace: 1184 stats.Forwarding.NoMulticastPendingQueueBufferSpace.Increment() 1185 case *ip.ErrUnexpectedMulticastInputInterface: 1186 stats.Forwarding.UnexpectedMulticastInputInterface.Increment() 1187 case *ip.ErrUnknownOutputEndpoint: 1188 stats.Forwarding.UnknownOutputEndpoint.Increment() 1189 case *ip.ErrOutgoingDeviceNoBufferSpace: 1190 stats.Forwarding.OutgoingDeviceNoBufferSpace.Increment() 1191 default: 1192 panic(fmt.Sprintf("unrecognized forwarding error: %s", err)) 1193 } 1194 stats.Forwarding.Errors.Increment() 1195 } 1196 1197 func (e *endpoint) deliverPacketLocally(h header.IPv4, pkt stack.PacketBufferPtr, inNICName string) { 1198 stats := e.stats 1199 // iptables filtering. All packets that reach here are intended for 1200 // this machine and will not be forwarded. 1201 if ok := e.protocol.stack.IPTables().CheckInput(pkt, inNICName); !ok { 1202 // iptables is telling us to drop the packet. 1203 stats.ip.IPTablesInputDropped.Increment() 1204 return 1205 } 1206 1207 if h.More() || h.FragmentOffset() != 0 { 1208 if pkt.Data().Size()+len(pkt.TransportHeader().Slice()) == 0 { 1209 // Drop the packet as it's marked as a fragment but has 1210 // no payload. 1211 stats.ip.MalformedPacketsReceived.Increment() 1212 stats.ip.MalformedFragmentsReceived.Increment() 1213 return 1214 } 1215 if opts := h.Options(); len(opts) != 0 { 1216 // If there are options we need to check them before we do assembly 1217 // or we could be assembling errant packets. However we do not change the 1218 // options as that could lead to double processing later. 1219 if _, _, optProblem := e.processIPOptions(pkt, opts, &optionUsageVerify{}); optProblem != nil { 1220 if optProblem.NeedICMP { 1221 _ = e.protocol.returnError(&icmpReasonParamProblem{ 1222 pointer: optProblem.Pointer, 1223 }, pkt, true /* deliveredLocally */) 1224 e.stats.ip.MalformedPacketsReceived.Increment() 1225 } 1226 return 1227 } 1228 } 1229 // The packet is a fragment, let's try to reassemble it. 1230 start := h.FragmentOffset() 1231 // Drop the fragment if the size of the reassembled payload would exceed the 1232 // maximum payload size. 1233 // 1234 // Note that this addition doesn't overflow even on 32bit architecture 1235 // because pkt.Data().Size() should not exceed 65535 (the max IP datagram 1236 // size). Otherwise the packet would've been rejected as invalid before 1237 // reaching here. 1238 if int(start)+pkt.Data().Size() > header.IPv4MaximumPayloadSize { 1239 stats.ip.MalformedPacketsReceived.Increment() 1240 stats.ip.MalformedFragmentsReceived.Increment() 1241 return 1242 } 1243 1244 proto := h.Protocol() 1245 resPkt, transProtoNum, ready, err := e.protocol.fragmentation.Process( 1246 // As per RFC 791 section 2.3, the identification value is unique 1247 // for a source-destination pair and protocol. 1248 fragmentation.FragmentID{ 1249 Source: h.SourceAddress(), 1250 Destination: h.DestinationAddress(), 1251 ID: uint32(h.ID()), 1252 Protocol: proto, 1253 }, 1254 start, 1255 start+uint16(pkt.Data().Size())-1, 1256 h.More(), 1257 proto, 1258 pkt, 1259 ) 1260 if err != nil { 1261 stats.ip.MalformedPacketsReceived.Increment() 1262 stats.ip.MalformedFragmentsReceived.Increment() 1263 return 1264 } 1265 if !ready { 1266 return 1267 } 1268 defer resPkt.DecRef() 1269 pkt = resPkt 1270 h = header.IPv4(pkt.NetworkHeader().Slice()) 1271 1272 // The reassembler doesn't take care of fixing up the header, so we need 1273 // to do it here. 1274 h.SetTotalLength(uint16(pkt.Data().Size() + len(h))) 1275 h.SetFlagsFragmentOffset(0, 0) 1276 1277 e.protocol.parseTransport(pkt, tcpip.TransportProtocolNumber(transProtoNum)) 1278 1279 // Now that the packet is reassembled, it can be sent to raw sockets. 1280 e.dispatcher.DeliverRawPacket(h.TransportProtocol(), pkt) 1281 } 1282 stats.ip.PacketsDelivered.Increment() 1283 1284 p := h.TransportProtocol() 1285 if p == header.ICMPv4ProtocolNumber { 1286 // TODO(gvisor.dev/issues/3810): when we sort out ICMP and transport 1287 // headers, the setting of the transport number here should be 1288 // unnecessary and removed. 1289 pkt.TransportProtocolNumber = p 1290 e.handleICMP(pkt) 1291 return 1292 } 1293 // ICMP handles options itself but do it here for all remaining destinations. 1294 var hasRouterAlertOption bool 1295 if opts := h.Options(); len(opts) != 0 { 1296 newOpts, processedOpts, optProblem := e.processIPOptions(pkt, opts, &optionUsageReceive{}) 1297 if optProblem != nil { 1298 if optProblem.NeedICMP { 1299 _ = e.protocol.returnError(&icmpReasonParamProblem{ 1300 pointer: optProblem.Pointer, 1301 }, pkt, true /* deliveredLocally */) 1302 stats.ip.MalformedPacketsReceived.Increment() 1303 } 1304 return 1305 } 1306 hasRouterAlertOption = processedOpts.routerAlert 1307 copied := copy(opts, newOpts) 1308 if copied != len(newOpts) { 1309 panic(fmt.Sprintf("copied %d bytes of new options, expected %d bytes", copied, len(newOpts))) 1310 } 1311 for i := copied; i < len(opts); i++ { 1312 // Pad with 0 (EOL). RFC 791 page 23 says "The padding is zero". 1313 opts[i] = byte(header.IPv4OptionListEndType) 1314 } 1315 } 1316 if p == header.IGMPProtocolNumber { 1317 e.mu.Lock() 1318 e.igmp.handleIGMP(pkt, hasRouterAlertOption) // +checklocksforce: e == e.igmp.ep. 1319 e.mu.Unlock() 1320 return 1321 } 1322 1323 switch res := e.dispatcher.DeliverTransportPacket(p, pkt); res { 1324 case stack.TransportPacketHandled: 1325 case stack.TransportPacketDestinationPortUnreachable: 1326 // As per RFC: 1122 Section 3.2.2.1 A host SHOULD generate Destination 1327 // Unreachable messages with code: 1328 // 3 (Port Unreachable), when the designated transport protocol 1329 // (e.g., UDP) is unable to demultiplex the datagram but has no 1330 // protocol mechanism to inform the sender. 1331 _ = e.protocol.returnError(&icmpReasonPortUnreachable{}, pkt, true /* deliveredLocally */) 1332 case stack.TransportPacketProtocolUnreachable: 1333 // As per RFC: 1122 Section 3.2.2.1 1334 // A host SHOULD generate Destination Unreachable messages with code: 1335 // 2 (Protocol Unreachable), when the designated transport protocol 1336 // is not supported 1337 _ = e.protocol.returnError(&icmpReasonProtoUnreachable{}, pkt, true /* deliveredLocally */) 1338 default: 1339 panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res)) 1340 } 1341 } 1342 1343 // Close cleans up resources associated with the endpoint. 1344 func (e *endpoint) Close() { 1345 e.mu.Lock() 1346 e.disableLocked() 1347 e.addressableEndpointState.Cleanup() 1348 e.mu.Unlock() 1349 1350 e.protocol.forgetEndpoint(e.nic.ID()) 1351 } 1352 1353 // AddAndAcquirePermanentAddress implements stack.AddressableEndpoint. 1354 func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, properties stack.AddressProperties) (stack.AddressEndpoint, tcpip.Error) { 1355 e.mu.RLock() 1356 defer e.mu.RUnlock() 1357 1358 ep, err := e.addressableEndpointState.AddAndAcquireAddress(addr, properties, stack.Permanent) 1359 if err == nil { 1360 e.sendQueuedReports() 1361 } 1362 return ep, err 1363 } 1364 1365 // sendQueuedReports sends queued igmp reports. 1366 // 1367 // +checklocksread:e.mu 1368 // +checklocksalias:e.igmp.ep.mu=e.mu 1369 func (e *endpoint) sendQueuedReports() { 1370 e.igmp.sendQueuedReports() 1371 } 1372 1373 // RemovePermanentAddress implements stack.AddressableEndpoint. 1374 func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) tcpip.Error { 1375 e.mu.RLock() 1376 defer e.mu.RUnlock() 1377 return e.addressableEndpointState.RemovePermanentAddress(addr) 1378 } 1379 1380 // SetDeprecated implements stack.AddressableEndpoint. 1381 func (e *endpoint) SetDeprecated(addr tcpip.Address, deprecated bool) tcpip.Error { 1382 e.mu.RLock() 1383 defer e.mu.RUnlock() 1384 return e.addressableEndpointState.SetDeprecated(addr, deprecated) 1385 } 1386 1387 // SetLifetimes implements stack.AddressableEndpoint. 1388 func (e *endpoint) SetLifetimes(addr tcpip.Address, lifetimes stack.AddressLifetimes) tcpip.Error { 1389 e.mu.RLock() 1390 defer e.mu.RUnlock() 1391 return e.addressableEndpointState.SetLifetimes(addr, lifetimes) 1392 } 1393 1394 // MainAddress implements stack.AddressableEndpoint. 1395 func (e *endpoint) MainAddress() tcpip.AddressWithPrefix { 1396 e.mu.RLock() 1397 defer e.mu.RUnlock() 1398 return e.addressableEndpointState.MainAddress() 1399 } 1400 1401 // AcquireAssignedAddress implements stack.AddressableEndpoint. 1402 func (e *endpoint) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint { 1403 e.mu.RLock() 1404 defer e.mu.RUnlock() 1405 1406 loopback := e.nic.IsLoopback() 1407 return e.addressableEndpointState.AcquireAssignedAddressOrMatching(localAddr, func(addressEndpoint stack.AddressEndpoint) bool { 1408 subnet := addressEndpoint.Subnet() 1409 // IPv4 has a notion of a subnet broadcast address and considers the 1410 // loopback interface bound to an address's whole subnet (on linux). 1411 return subnet.IsBroadcast(localAddr) || (loopback && subnet.Contains(localAddr)) 1412 }, allowTemp, tempPEB) 1413 } 1414 1415 // AcquireOutgoingPrimaryAddress implements stack.AddressableEndpoint. 1416 func (e *endpoint) AcquireOutgoingPrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint { 1417 e.mu.RLock() 1418 defer e.mu.RUnlock() 1419 return e.acquireOutgoingPrimaryAddressRLocked(remoteAddr, allowExpired) 1420 } 1421 1422 // acquireOutgoingPrimaryAddressRLocked is like AcquireOutgoingPrimaryAddress 1423 // but with locking requirements 1424 // 1425 // +checklocksread:e.mu 1426 func (e *endpoint) acquireOutgoingPrimaryAddressRLocked(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint { 1427 return e.addressableEndpointState.AcquireOutgoingPrimaryAddress(remoteAddr, allowExpired) 1428 } 1429 1430 // PrimaryAddresses implements stack.AddressableEndpoint. 1431 func (e *endpoint) PrimaryAddresses() []tcpip.AddressWithPrefix { 1432 e.mu.RLock() 1433 defer e.mu.RUnlock() 1434 return e.addressableEndpointState.PrimaryAddresses() 1435 } 1436 1437 // PermanentAddresses implements stack.AddressableEndpoint. 1438 func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix { 1439 e.mu.RLock() 1440 defer e.mu.RUnlock() 1441 return e.addressableEndpointState.PermanentAddresses() 1442 } 1443 1444 // JoinGroup implements stack.GroupAddressableEndpoint. 1445 func (e *endpoint) JoinGroup(addr tcpip.Address) tcpip.Error { 1446 e.mu.Lock() 1447 defer e.mu.Unlock() 1448 return e.joinGroupLocked(addr) 1449 } 1450 1451 // joinGroupLocked is like JoinGroup but with locking requirements. 1452 // 1453 // +checklocks:e.mu 1454 // +checklocksalias:e.igmp.ep.mu=e.mu 1455 func (e *endpoint) joinGroupLocked(addr tcpip.Address) tcpip.Error { 1456 if !header.IsV4MulticastAddress(addr) { 1457 return &tcpip.ErrBadAddress{} 1458 } 1459 1460 e.igmp.joinGroup(addr) 1461 return nil 1462 } 1463 1464 // LeaveGroup implements stack.GroupAddressableEndpoint. 1465 func (e *endpoint) LeaveGroup(addr tcpip.Address) tcpip.Error { 1466 e.mu.Lock() 1467 defer e.mu.Unlock() 1468 return e.leaveGroupLocked(addr) 1469 } 1470 1471 // leaveGroupLocked is like LeaveGroup but with locking requirements. 1472 // 1473 // +checklocks:e.mu 1474 // +checklocksalias:e.igmp.ep.mu=e.mu 1475 func (e *endpoint) leaveGroupLocked(addr tcpip.Address) tcpip.Error { 1476 return e.igmp.leaveGroup(addr) 1477 } 1478 1479 // IsInGroup implements stack.GroupAddressableEndpoint. 1480 func (e *endpoint) IsInGroup(addr tcpip.Address) bool { 1481 e.mu.RLock() 1482 defer e.mu.RUnlock() 1483 return e.igmp.isInGroup(addr) // +checklocksforce: e.mu==e.igmp.ep.mu. 1484 } 1485 1486 // Stats implements stack.NetworkEndpoint. 1487 func (e *endpoint) Stats() stack.NetworkEndpointStats { 1488 return &e.stats.localStats 1489 } 1490 1491 var _ stack.NetworkProtocol = (*protocol)(nil) 1492 var _ stack.MulticastForwardingNetworkProtocol = (*protocol)(nil) 1493 var _ stack.RejectIPv4WithHandler = (*protocol)(nil) 1494 var _ fragmentation.TimeoutHandler = (*protocol)(nil) 1495 1496 type protocol struct { 1497 stack *stack.Stack 1498 1499 // mu protects annotated fields below. 1500 mu sync.RWMutex 1501 1502 // eps is keyed by NICID to allow protocol methods to retrieve an endpoint 1503 // when handling a packet, by looking at which NIC handled the packet. 1504 // +checklocks:mu 1505 eps map[tcpip.NICID]*endpoint 1506 1507 // ICMP types for which the stack's global rate limiting must apply. 1508 // +checklocks:mu 1509 icmpRateLimitedTypes map[header.ICMPv4Type]struct{} 1510 1511 // defaultTTL is the current default TTL for the protocol. Only the 1512 // uint8 portion of it is meaningful. 1513 defaultTTL atomicbitops.Uint32 1514 1515 ids []atomicbitops.Uint32 1516 hashIV uint32 1517 1518 fragmentation *fragmentation.Fragmentation 1519 1520 options Options 1521 1522 multicastRouteTable multicast.RouteTable 1523 // multicastForwardingDisp is the multicast forwarding event dispatcher that 1524 // an integrator can provide to receive multicast forwarding events. Note 1525 // that multicast packets will only be forwarded if this is non-nil. 1526 // +checklocks:mu 1527 multicastForwardingDisp stack.MulticastForwardingEventDispatcher 1528 } 1529 1530 // Number returns the ipv4 protocol number. 1531 func (p *protocol) Number() tcpip.NetworkProtocolNumber { 1532 return ProtocolNumber 1533 } 1534 1535 // MinimumPacketSize returns the minimum valid ipv4 packet size. 1536 func (p *protocol) MinimumPacketSize() int { 1537 return header.IPv4MinimumSize 1538 } 1539 1540 // ParseAddresses implements stack.NetworkProtocol. 1541 func (*protocol) ParseAddresses(v []byte) (src, dst tcpip.Address) { 1542 h := header.IPv4(v) 1543 return h.SourceAddress(), h.DestinationAddress() 1544 } 1545 1546 // SetOption implements stack.NetworkProtocol. 1547 func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error { 1548 switch v := option.(type) { 1549 case *tcpip.DefaultTTLOption: 1550 p.SetDefaultTTL(uint8(*v)) 1551 return nil 1552 default: 1553 return &tcpip.ErrUnknownProtocolOption{} 1554 } 1555 } 1556 1557 // Option implements stack.NetworkProtocol. 1558 func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error { 1559 switch v := option.(type) { 1560 case *tcpip.DefaultTTLOption: 1561 *v = tcpip.DefaultTTLOption(p.DefaultTTL()) 1562 return nil 1563 default: 1564 return &tcpip.ErrUnknownProtocolOption{} 1565 } 1566 } 1567 1568 // SetDefaultTTL sets the default TTL for endpoints created with this protocol. 1569 func (p *protocol) SetDefaultTTL(ttl uint8) { 1570 p.defaultTTL.Store(uint32(ttl)) 1571 } 1572 1573 // DefaultTTL returns the default TTL for endpoints created with this protocol. 1574 func (p *protocol) DefaultTTL() uint8 { 1575 return uint8(p.defaultTTL.Load()) 1576 } 1577 1578 // Close implements stack.TransportProtocol. 1579 func (p *protocol) Close() { 1580 p.fragmentation.Release() 1581 p.multicastRouteTable.Close() 1582 } 1583 1584 // Wait implements stack.TransportProtocol. 1585 func (*protocol) Wait() {} 1586 1587 func (p *protocol) validateUnicastSourceAndMulticastDestination(addresses stack.UnicastSourceAndMulticastDestination) tcpip.Error { 1588 if !p.isUnicastAddress(addresses.Source) || header.IsV4LinkLocalUnicastAddress(addresses.Source) { 1589 return &tcpip.ErrBadAddress{} 1590 } 1591 1592 if !header.IsV4MulticastAddress(addresses.Destination) || header.IsV4LinkLocalMulticastAddress(addresses.Destination) { 1593 return &tcpip.ErrBadAddress{} 1594 } 1595 1596 return nil 1597 } 1598 1599 func (p *protocol) multicastForwarding() bool { 1600 p.mu.RLock() 1601 defer p.mu.RUnlock() 1602 return p.multicastForwardingDisp != nil 1603 } 1604 1605 func (p *protocol) newInstalledRoute(route stack.MulticastRoute) (*multicast.InstalledRoute, tcpip.Error) { 1606 if len(route.OutgoingInterfaces) == 0 { 1607 return nil, &tcpip.ErrMissingRequiredFields{} 1608 } 1609 1610 if !p.stack.HasNIC(route.ExpectedInputInterface) { 1611 return nil, &tcpip.ErrUnknownNICID{} 1612 } 1613 1614 for _, outgoingInterface := range route.OutgoingInterfaces { 1615 if route.ExpectedInputInterface == outgoingInterface.ID { 1616 return nil, &tcpip.ErrMulticastInputCannotBeOutput{} 1617 } 1618 1619 if !p.stack.HasNIC(outgoingInterface.ID) { 1620 return nil, &tcpip.ErrUnknownNICID{} 1621 } 1622 } 1623 return p.multicastRouteTable.NewInstalledRoute(route), nil 1624 } 1625 1626 // AddMulticastRoute implements stack.MulticastForwardingNetworkProtocol. 1627 func (p *protocol) AddMulticastRoute(addresses stack.UnicastSourceAndMulticastDestination, route stack.MulticastRoute) tcpip.Error { 1628 if !p.multicastForwarding() { 1629 return &tcpip.ErrNotPermitted{} 1630 } 1631 1632 if err := p.validateUnicastSourceAndMulticastDestination(addresses); err != nil { 1633 return err 1634 } 1635 1636 installedRoute, err := p.newInstalledRoute(route) 1637 if err != nil { 1638 return err 1639 } 1640 1641 pendingPackets := p.multicastRouteTable.AddInstalledRoute(addresses, installedRoute) 1642 1643 for _, pkt := range pendingPackets { 1644 p.forwardPendingMulticastPacket(pkt, installedRoute) 1645 } 1646 return nil 1647 } 1648 1649 // RemoveMulticastRoute implements 1650 // stack.MulticastForwardingNetworkProtocol.RemoveMulticastRoute. 1651 func (p *protocol) RemoveMulticastRoute(addresses stack.UnicastSourceAndMulticastDestination) tcpip.Error { 1652 if err := p.validateUnicastSourceAndMulticastDestination(addresses); err != nil { 1653 return err 1654 } 1655 1656 if removed := p.multicastRouteTable.RemoveInstalledRoute(addresses); !removed { 1657 return &tcpip.ErrHostUnreachable{} 1658 } 1659 1660 return nil 1661 } 1662 1663 // EnableMulticastForwarding implements 1664 // stack.MulticastForwardingNetworkProtocol.EnableMulticastForwarding. 1665 func (p *protocol) EnableMulticastForwarding(disp stack.MulticastForwardingEventDispatcher) (bool, tcpip.Error) { 1666 p.mu.Lock() 1667 defer p.mu.Unlock() 1668 1669 if p.multicastForwardingDisp != nil { 1670 return true, nil 1671 } 1672 1673 if disp == nil { 1674 return false, &tcpip.ErrInvalidOptionValue{} 1675 } 1676 1677 p.multicastForwardingDisp = disp 1678 return false, nil 1679 } 1680 1681 // DisableMulticastForwarding implements 1682 // stack.MulticastForwardingNetworkProtocol.DisableMulticastForwarding. 1683 func (p *protocol) DisableMulticastForwarding() { 1684 p.mu.Lock() 1685 defer p.mu.Unlock() 1686 1687 p.multicastForwardingDisp = nil 1688 p.multicastRouteTable.RemoveAllInstalledRoutes() 1689 } 1690 1691 // MulticastRouteLastUsedTime implements 1692 // stack.MulticastForwardingNetworkProtocol. 1693 func (p *protocol) MulticastRouteLastUsedTime(addresses stack.UnicastSourceAndMulticastDestination) (tcpip.MonotonicTime, tcpip.Error) { 1694 if err := p.validateUnicastSourceAndMulticastDestination(addresses); err != nil { 1695 return tcpip.MonotonicTime{}, err 1696 } 1697 1698 timestamp, found := p.multicastRouteTable.GetLastUsedTimestamp(addresses) 1699 1700 if !found { 1701 return tcpip.MonotonicTime{}, &tcpip.ErrHostUnreachable{} 1702 } 1703 1704 return timestamp, nil 1705 } 1706 1707 func (p *protocol) forwardPendingMulticastPacket(pkt stack.PacketBufferPtr, installedRoute *multicast.InstalledRoute) { 1708 defer pkt.DecRef() 1709 1710 // Attempt to forward the packet using the endpoint that it originally 1711 // arrived on. This ensures that the packet is only forwarded if it 1712 // matches the route's expected input interface (see 5a of RFC 1812 section 1713 // 5.2.1.3). 1714 ep, ok := p.getEndpointForNIC(pkt.NICID) 1715 1716 if !ok { 1717 // The endpoint that the packet arrived on no longer exists. Silently 1718 // drop the pkt. 1719 return 1720 } 1721 1722 if !ep.MulticastForwarding() { 1723 return 1724 } 1725 1726 ep.handleForwardingError(ep.forwardValidatedMulticastPacket(pkt, installedRoute)) 1727 } 1728 1729 func (p *protocol) isUnicastAddress(addr tcpip.Address) bool { 1730 if addr.BitLen() != header.IPv4AddressSizeBits { 1731 return false 1732 } 1733 1734 if addr == header.IPv4Any || addr == header.IPv4Broadcast { 1735 return false 1736 } 1737 1738 if p.isSubnetLocalBroadcastAddress(addr) { 1739 return false 1740 } 1741 return !header.IsV4MulticastAddress(addr) 1742 } 1743 1744 func (p *protocol) isSubnetLocalBroadcastAddress(addr tcpip.Address) bool { 1745 p.mu.RLock() 1746 defer p.mu.RUnlock() 1747 1748 for _, e := range p.eps { 1749 if addressEndpoint := e.AcquireAssignedAddress(addr, false /* createTemp */, stack.NeverPrimaryEndpoint); addressEndpoint != nil { 1750 subnet := addressEndpoint.Subnet() 1751 addressEndpoint.DecRef() 1752 if subnet.IsBroadcast(addr) { 1753 return true 1754 } 1755 } 1756 } 1757 return false 1758 } 1759 1760 // parseAndValidate parses the packet (including its transport layer header) and 1761 // returns the parsed IP header. 1762 // 1763 // Returns true if the IP header was successfully parsed. 1764 func (p *protocol) parseAndValidate(pkt stack.PacketBufferPtr) (*buffer.View, bool) { 1765 transProtoNum, hasTransportHdr, ok := p.Parse(pkt) 1766 if !ok { 1767 return nil, false 1768 } 1769 1770 h := header.IPv4(pkt.NetworkHeader().Slice()) 1771 // Do not include the link header's size when calculating the size of the IP 1772 // packet. 1773 if !h.IsValid(pkt.Size() - len(pkt.LinkHeader().Slice())) { 1774 return nil, false 1775 } 1776 1777 if !pkt.RXChecksumValidated && !h.IsChecksumValid() { 1778 return nil, false 1779 } 1780 1781 if hasTransportHdr { 1782 p.parseTransport(pkt, transProtoNum) 1783 } 1784 1785 return pkt.NetworkHeader().View(), true 1786 } 1787 1788 func (p *protocol) parseTransport(pkt stack.PacketBufferPtr, transProtoNum tcpip.TransportProtocolNumber) { 1789 if transProtoNum == header.ICMPv4ProtocolNumber { 1790 // The transport layer will handle transport layer parsing errors. 1791 _ = parse.ICMPv4(pkt) 1792 return 1793 } 1794 1795 switch err := p.stack.ParsePacketBufferTransport(transProtoNum, pkt); err { 1796 case stack.ParsedOK: 1797 case stack.UnknownTransportProtocol, stack.TransportLayerParseError: 1798 // The transport layer will handle unknown protocols and transport layer 1799 // parsing errors. 1800 default: 1801 panic(fmt.Sprintf("unexpected error parsing transport header = %d", err)) 1802 } 1803 } 1804 1805 // Parse implements stack.NetworkProtocol. 1806 func (*protocol) Parse(pkt stack.PacketBufferPtr) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) { 1807 if ok := parse.IPv4(pkt); !ok { 1808 return 0, false, false 1809 } 1810 1811 ipHdr := header.IPv4(pkt.NetworkHeader().Slice()) 1812 return ipHdr.TransportProtocol(), !ipHdr.More() && ipHdr.FragmentOffset() == 0, true 1813 } 1814 1815 // allowICMPReply reports whether an ICMP reply with provided type and code may 1816 // be sent following the rate mask options and global ICMP rate limiter. 1817 func (p *protocol) allowICMPReply(icmpType header.ICMPv4Type, code header.ICMPv4Code) bool { 1818 // Mimic linux and never rate limit for PMTU discovery. 1819 // https://github.com/torvalds/linux/blob/9e9fb7655ed585da8f468e29221f0ba194a5f613/net/ipv4/icmp.c#L288 1820 if icmpType == header.ICMPv4DstUnreachable && code == header.ICMPv4FragmentationNeeded { 1821 return true 1822 } 1823 p.mu.RLock() 1824 defer p.mu.RUnlock() 1825 1826 if _, ok := p.icmpRateLimitedTypes[icmpType]; ok { 1827 return p.stack.AllowICMPMessage() 1828 } 1829 return true 1830 } 1831 1832 // SendRejectionError implements stack.RejectIPv4WithHandler. 1833 func (p *protocol) SendRejectionError(pkt stack.PacketBufferPtr, rejectWith stack.RejectIPv4WithICMPType, inputHook bool) tcpip.Error { 1834 switch rejectWith { 1835 case stack.RejectIPv4WithICMPNetUnreachable: 1836 return p.returnError(&icmpReasonNetworkUnreachable{}, pkt, inputHook) 1837 case stack.RejectIPv4WithICMPHostUnreachable: 1838 return p.returnError(&icmpReasonHostUnreachable{}, pkt, inputHook) 1839 case stack.RejectIPv4WithICMPPortUnreachable: 1840 return p.returnError(&icmpReasonPortUnreachable{}, pkt, inputHook) 1841 case stack.RejectIPv4WithICMPNetProhibited: 1842 return p.returnError(&icmpReasonNetworkProhibited{}, pkt, inputHook) 1843 case stack.RejectIPv4WithICMPHostProhibited: 1844 return p.returnError(&icmpReasonHostProhibited{}, pkt, inputHook) 1845 case stack.RejectIPv4WithICMPAdminProhibited: 1846 return p.returnError(&icmpReasonAdministrativelyProhibited{}, pkt, inputHook) 1847 default: 1848 panic(fmt.Sprintf("unhandled %[1]T = %[1]d", rejectWith)) 1849 } 1850 } 1851 1852 // calculateNetworkMTU calculates the network-layer payload MTU based on the 1853 // link-layer payload mtu. 1854 func calculateNetworkMTU(linkMTU, networkHeaderSize uint32) (uint32, tcpip.Error) { 1855 if linkMTU < header.IPv4MinimumMTU { 1856 return 0, &tcpip.ErrInvalidEndpointState{} 1857 } 1858 1859 // As per RFC 791 section 3.1, an IPv4 header cannot exceed 60 bytes in 1860 // length: 1861 // The maximal internet header is 60 octets, and a typical internet header 1862 // is 20 octets, allowing a margin for headers of higher level protocols. 1863 if networkHeaderSize > header.IPv4MaximumHeaderSize { 1864 return 0, &tcpip.ErrMalformedHeader{} 1865 } 1866 1867 networkMTU := linkMTU 1868 if networkMTU > MaxTotalSize { 1869 networkMTU = MaxTotalSize 1870 } 1871 1872 return networkMTU - networkHeaderSize, nil 1873 } 1874 1875 func packetMustBeFragmented(pkt stack.PacketBufferPtr, networkMTU uint32) bool { 1876 payload := len(pkt.TransportHeader().Slice()) + pkt.Data().Size() 1877 return pkt.GSOOptions.Type == stack.GSONone && uint32(payload) > networkMTU 1878 } 1879 1880 // addressToUint32 translates an IPv4 address into its little endian uint32 1881 // representation. 1882 // 1883 // This function does the same thing as binary.LittleEndian.Uint32 but operates 1884 // on a tcpip.Address (a string) without the need to convert it to a byte slice, 1885 // which would cause an allocation. 1886 func addressToUint32(addr tcpip.Address) uint32 { 1887 addrBytes := addr.As4() 1888 _ = addrBytes[3] // bounds check hint to compiler 1889 return uint32(addrBytes[0]) | uint32(addrBytes[1])<<8 | uint32(addrBytes[2])<<16 | uint32(addrBytes[3])<<24 1890 } 1891 1892 // hashRoute calculates a hash value for the given source/destination pair using 1893 // the addresses, transport protocol number and a 32-bit number to generate the 1894 // hash. 1895 func hashRoute(srcAddr, dstAddr tcpip.Address, protocol tcpip.TransportProtocolNumber, hashIV uint32) uint32 { 1896 a := addressToUint32(srcAddr) 1897 b := addressToUint32(dstAddr) 1898 return hash.Hash3Words(a, b, uint32(protocol), hashIV) 1899 } 1900 1901 // Options holds options to configure a new protocol. 1902 type Options struct { 1903 // IGMP holds options for IGMP. 1904 IGMP IGMPOptions 1905 1906 // AllowExternalLoopbackTraffic indicates that inbound loopback packets (i.e. 1907 // martian loopback packets) should be accepted. 1908 AllowExternalLoopbackTraffic bool 1909 } 1910 1911 // NewProtocolWithOptions returns an IPv4 network protocol. 1912 func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory { 1913 ids := make([]atomicbitops.Uint32, buckets) 1914 1915 // Randomly initialize hashIV and the ids. 1916 r := hash.RandN32(1 + buckets) 1917 for i := range ids { 1918 ids[i] = atomicbitops.FromUint32(r[i]) 1919 } 1920 hashIV := r[buckets] 1921 1922 return func(s *stack.Stack) stack.NetworkProtocol { 1923 p := &protocol{ 1924 stack: s, 1925 ids: ids, 1926 hashIV: hashIV, 1927 defaultTTL: atomicbitops.FromUint32(DefaultTTL), 1928 options: opts, 1929 } 1930 p.fragmentation = fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock(), p) 1931 p.eps = make(map[tcpip.NICID]*endpoint) 1932 // Set ICMP rate limiting to Linux defaults. 1933 // See https://man7.org/linux/man-pages/man7/icmp.7.html. 1934 p.icmpRateLimitedTypes = map[header.ICMPv4Type]struct{}{ 1935 header.ICMPv4DstUnreachable: {}, 1936 header.ICMPv4SrcQuench: {}, 1937 header.ICMPv4TimeExceeded: {}, 1938 header.ICMPv4ParamProblem: {}, 1939 } 1940 if err := p.multicastRouteTable.Init(multicast.DefaultConfig(s.Clock())); err != nil { 1941 panic(fmt.Sprintf("p.multicastRouteTable.Init(_): %s", err)) 1942 } 1943 return p 1944 } 1945 } 1946 1947 // NewProtocol is equivalent to NewProtocolWithOptions with an empty Options. 1948 func NewProtocol(s *stack.Stack) stack.NetworkProtocol { 1949 return NewProtocolWithOptions(Options{})(s) 1950 } 1951 1952 func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeader header.IPv4) (stack.PacketBufferPtr, bool) { 1953 fragPkt, offset, copied, more := pf.BuildNextFragment() 1954 fragPkt.NetworkProtocolNumber = ProtocolNumber 1955 1956 originalIPHeaderLength := len(originalIPHeader) 1957 nextFragIPHeader := header.IPv4(fragPkt.NetworkHeader().Push(originalIPHeaderLength)) 1958 fragPkt.NetworkProtocolNumber = ProtocolNumber 1959 1960 if copied := copy(nextFragIPHeader, originalIPHeader); copied != len(originalIPHeader) { 1961 panic(fmt.Sprintf("wrong number of bytes copied into fragmentIPHeaders: got = %d, want = %d", copied, originalIPHeaderLength)) 1962 } 1963 1964 flags := originalIPHeader.Flags() 1965 if more { 1966 flags |= header.IPv4FlagMoreFragments 1967 } 1968 nextFragIPHeader.SetFlagsFragmentOffset(flags, uint16(offset)) 1969 nextFragIPHeader.SetTotalLength(uint16(nextFragIPHeader.HeaderLength()) + uint16(copied)) 1970 nextFragIPHeader.SetChecksum(0) 1971 nextFragIPHeader.SetChecksum(^nextFragIPHeader.CalculateChecksum()) 1972 1973 return fragPkt, more 1974 } 1975 1976 // optionAction describes possible actions that may be taken on an option 1977 // while processing it. 1978 type optionAction uint8 1979 1980 const ( 1981 // optionRemove says that the option should not be in the output option set. 1982 optionRemove optionAction = iota 1983 1984 // optionProcess says that the option should be fully processed. 1985 optionProcess 1986 1987 // optionVerify says the option should be checked and passed unchanged. 1988 optionVerify 1989 1990 // optionPass says to pass the output set without checking. 1991 optionPass 1992 ) 1993 1994 // optionActions list what to do for each option in a given scenario. 1995 type optionActions struct { 1996 // timestamp controls what to do with a Timestamp option. 1997 timestamp optionAction 1998 1999 // recordRoute controls what to do with a Record Route option. 2000 recordRoute optionAction 2001 2002 // routerAlert controls what to do with a Router Alert option. 2003 routerAlert optionAction 2004 2005 // unknown controls what to do with an unknown option. 2006 unknown optionAction 2007 } 2008 2009 // optionsUsage specifies the ways options may be operated upon for a given 2010 // scenario during packet processing. 2011 type optionsUsage interface { 2012 actions() optionActions 2013 } 2014 2015 // optionUsageVerify implements optionsUsage for when we just want to check 2016 // fragments. Don't change anything, just check and reject if bad. No 2017 // replacement options are generated. 2018 type optionUsageVerify struct{} 2019 2020 // actions implements optionsUsage. 2021 func (*optionUsageVerify) actions() optionActions { 2022 return optionActions{ 2023 timestamp: optionVerify, 2024 recordRoute: optionVerify, 2025 routerAlert: optionVerify, 2026 unknown: optionRemove, 2027 } 2028 } 2029 2030 // optionUsageReceive implements optionsUsage for packets we will pass 2031 // to the transport layer (with the exception of Echo requests). 2032 type optionUsageReceive struct{} 2033 2034 // actions implements optionsUsage. 2035 func (*optionUsageReceive) actions() optionActions { 2036 return optionActions{ 2037 timestamp: optionProcess, 2038 recordRoute: optionProcess, 2039 routerAlert: optionVerify, 2040 unknown: optionPass, 2041 } 2042 } 2043 2044 // optionUsageForward implements optionsUsage for packets about to be forwarded. 2045 // All options are passed on regardless of whether we recognise them, however 2046 // we do process the Timestamp and Record Route options. 2047 type optionUsageForward struct{} 2048 2049 // actions implements optionsUsage. 2050 func (*optionUsageForward) actions() optionActions { 2051 return optionActions{ 2052 timestamp: optionProcess, 2053 recordRoute: optionProcess, 2054 routerAlert: optionVerify, 2055 unknown: optionPass, 2056 } 2057 } 2058 2059 // optionUsageEcho implements optionsUsage for echo packet processing. 2060 // Only Timestamp and RecordRoute are processed and sent back. 2061 type optionUsageEcho struct{} 2062 2063 // actions implements optionsUsage. 2064 func (*optionUsageEcho) actions() optionActions { 2065 return optionActions{ 2066 timestamp: optionProcess, 2067 recordRoute: optionProcess, 2068 routerAlert: optionVerify, 2069 unknown: optionRemove, 2070 } 2071 } 2072 2073 // handleTimestamp does any required processing on a Timestamp option 2074 // in place. 2075 func handleTimestamp(tsOpt header.IPv4OptionTimestamp, localAddress tcpip.Address, clock tcpip.Clock, usage optionsUsage) *header.IPv4OptParameterProblem { 2076 flags := tsOpt.Flags() 2077 var entrySize uint8 2078 switch flags { 2079 case header.IPv4OptionTimestampOnlyFlag: 2080 entrySize = header.IPv4OptionTimestampSize 2081 case 2082 header.IPv4OptionTimestampWithIPFlag, 2083 header.IPv4OptionTimestampWithPredefinedIPFlag: 2084 entrySize = header.IPv4OptionTimestampWithAddrSize 2085 default: 2086 return &header.IPv4OptParameterProblem{ 2087 Pointer: header.IPv4OptTSOFLWAndFLGOffset, 2088 NeedICMP: true, 2089 } 2090 } 2091 2092 pointer := tsOpt.Pointer() 2093 // RFC 791 page 22 states: "The smallest legal value is 5." 2094 // Since the pointer is 1 based, and the header is 4 bytes long the 2095 // pointer must point beyond the header therefore 4 or less is bad. 2096 if pointer <= header.IPv4OptionTimestampHdrLength { 2097 return &header.IPv4OptParameterProblem{ 2098 Pointer: header.IPv4OptTSPointerOffset, 2099 NeedICMP: true, 2100 } 2101 } 2102 // To simplify processing below, base further work on the array of timestamps 2103 // beyond the header, rather than on the whole option. Also to aid 2104 // calculations set 'nextSlot' to be 0 based as in the packet it is 1 based. 2105 nextSlot := pointer - (header.IPv4OptionTimestampHdrLength + 1) 2106 optLen := tsOpt.Size() 2107 dataLength := optLen - header.IPv4OptionTimestampHdrLength 2108 2109 // In the section below, we verify the pointer, length and overflow counter 2110 // fields of the option. The distinction is in which byte you return as being 2111 // in error in the ICMP packet. Offsets 1 (length), 2 pointer) 2112 // or 3 (overflowed counter). 2113 // 2114 // The following RFC sections cover this section: 2115 // 2116 // RFC 791 (page 22): 2117 // If there is some room but not enough room for a full timestamp 2118 // to be inserted, or the overflow count itself overflows, the 2119 // original datagram is considered to be in error and is discarded. 2120 // In either case an ICMP parameter problem message may be sent to 2121 // the source host [3]. 2122 // 2123 // You can get this situation in two ways. Firstly if the data area is not 2124 // a multiple of the entry size or secondly, if the pointer is not at a 2125 // multiple of the entry size. The wording of the RFC suggests that 2126 // this is not an error until you actually run out of space. 2127 if pointer > optLen { 2128 // RFC 791 (page 22) says we should switch to using the overflow count. 2129 // If the timestamp data area is already full (the pointer exceeds 2130 // the length) the datagram is forwarded without inserting the 2131 // timestamp, but the overflow count is incremented by one. 2132 if flags == header.IPv4OptionTimestampWithPredefinedIPFlag { 2133 // By definition we have nothing to do. 2134 return nil 2135 } 2136 2137 if tsOpt.IncOverflow() != 0 { 2138 return nil 2139 } 2140 // The overflow count is also full. 2141 return &header.IPv4OptParameterProblem{ 2142 Pointer: header.IPv4OptTSOFLWAndFLGOffset, 2143 NeedICMP: true, 2144 } 2145 } 2146 if nextSlot+entrySize > dataLength { 2147 // The data area isn't full but there isn't room for a new entry. 2148 // Either Length or Pointer could be bad. 2149 if false { 2150 // We must select Pointer for Linux compatibility, even if 2151 // only the length is bad. 2152 // The Linux code is at (in October 2020) 2153 // https://github.com/torvalds/linux/blob/bbf5c979011a099af5dc76498918ed7df445635b/net/ipv4/ip_options.c#L367-L370 2154 // if (optptr[2]+3 > optlen) { 2155 // pp_ptr = optptr + 2; 2156 // goto error; 2157 // } 2158 // which doesn't distinguish between which of optptr[2] or optlen 2159 // is wrong, but just arbitrarily decides on optptr+2. 2160 if dataLength%entrySize != 0 { 2161 // The Data section size should be a multiple of the expected 2162 // timestamp entry size. 2163 return &header.IPv4OptParameterProblem{ 2164 Pointer: header.IPv4OptionLengthOffset, 2165 NeedICMP: false, 2166 } 2167 } 2168 // If the size is OK, the pointer must be corrupted. 2169 } 2170 return &header.IPv4OptParameterProblem{ 2171 Pointer: header.IPv4OptTSPointerOffset, 2172 NeedICMP: true, 2173 } 2174 } 2175 2176 if usage.actions().timestamp == optionProcess { 2177 tsOpt.UpdateTimestamp(localAddress, clock) 2178 } 2179 return nil 2180 } 2181 2182 // handleRecordRoute checks and processes a Record route option. It is much 2183 // like the timestamp type 1 option, but without timestamps. The passed in 2184 // address is stored in the option in the correct spot if possible. 2185 func handleRecordRoute(rrOpt header.IPv4OptionRecordRoute, localAddress tcpip.Address, usage optionsUsage) *header.IPv4OptParameterProblem { 2186 optlen := rrOpt.Size() 2187 2188 if optlen < header.IPv4AddressSize+header.IPv4OptionRecordRouteHdrLength { 2189 return &header.IPv4OptParameterProblem{ 2190 Pointer: header.IPv4OptionLengthOffset, 2191 NeedICMP: true, 2192 } 2193 } 2194 2195 pointer := rrOpt.Pointer() 2196 // RFC 791 page 20 states: 2197 // The pointer is relative to this option, and the 2198 // smallest legal value for the pointer is 4. 2199 // Since the pointer is 1 based, and the header is 3 bytes long the 2200 // pointer must point beyond the header therefore 3 or less is bad. 2201 if pointer <= header.IPv4OptionRecordRouteHdrLength { 2202 return &header.IPv4OptParameterProblem{ 2203 Pointer: header.IPv4OptRRPointerOffset, 2204 NeedICMP: true, 2205 } 2206 } 2207 2208 // RFC 791 page 21 says 2209 // If the route data area is already full (the pointer exceeds the 2210 // length) the datagram is forwarded without inserting the address 2211 // into the recorded route. If there is some room but not enough 2212 // room for a full address to be inserted, the original datagram is 2213 // considered to be in error and is discarded. In either case an 2214 // ICMP parameter problem message may be sent to the source 2215 // host. 2216 // The use of the words "In either case" suggests that a 'full' RR option 2217 // could generate an ICMP at every hop after it fills up. We chose to not 2218 // do this (as do most implementations). It is probable that the inclusion 2219 // of these words is a copy/paste error from the timestamp option where 2220 // there are two failure reasons given. 2221 if pointer > optlen { 2222 return nil 2223 } 2224 2225 // The data area isn't full but there isn't room for a new entry. 2226 // Either Length or Pointer could be bad. We must select Pointer for Linux 2227 // compatibility, even if only the length is bad. NB. pointer is 1 based. 2228 if pointer+header.IPv4AddressSize > optlen+1 { 2229 if false { 2230 // This is what we would do if we were not being Linux compatible. 2231 // Check for bad pointer or length value. Must be a multiple of 4 after 2232 // accounting for the 3 byte header and not within that header. 2233 // RFC 791, page 20 says: 2234 // The pointer is relative to this option, and the 2235 // smallest legal value for the pointer is 4. 2236 // 2237 // A recorded route is composed of a series of internet addresses. 2238 // Each internet address is 32 bits or 4 octets. 2239 // Linux skips this test so we must too. See Linux code at: 2240 // https://github.com/torvalds/linux/blob/bbf5c979011a099af5dc76498918ed7df445635b/net/ipv4/ip_options.c#L338-L341 2241 // if (optptr[2]+3 > optlen) { 2242 // pp_ptr = optptr + 2; 2243 // goto error; 2244 // } 2245 if (optlen-header.IPv4OptionRecordRouteHdrLength)%header.IPv4AddressSize != 0 { 2246 // Length is bad, not on integral number of slots. 2247 return &header.IPv4OptParameterProblem{ 2248 Pointer: header.IPv4OptionLengthOffset, 2249 NeedICMP: true, 2250 } 2251 } 2252 // If not length, the fault must be with the pointer. 2253 } 2254 return &header.IPv4OptParameterProblem{ 2255 Pointer: header.IPv4OptRRPointerOffset, 2256 NeedICMP: true, 2257 } 2258 } 2259 if usage.actions().recordRoute == optionVerify { 2260 return nil 2261 } 2262 rrOpt.StoreAddress(localAddress) 2263 return nil 2264 } 2265 2266 // handleRouterAlert performs sanity checks on a Router Alert option. 2267 func handleRouterAlert(raOpt header.IPv4OptionRouterAlert) *header.IPv4OptParameterProblem { 2268 // Only the zero value is acceptable, as per RFC 2113, section 2.1: 2269 // Value: A two octet code with the following values: 2270 // 0 - Router shall examine packet 2271 // 1-65535 - Reserved 2272 if raOpt.Value() != header.IPv4OptionRouterAlertValue { 2273 return &header.IPv4OptParameterProblem{ 2274 Pointer: header.IPv4OptionRouterAlertValueOffset, 2275 NeedICMP: true, 2276 } 2277 } 2278 return nil 2279 } 2280 2281 type optionTracker struct { 2282 timestamp bool 2283 recordRoute bool 2284 routerAlert bool 2285 } 2286 2287 // processIPOptions parses the IPv4 options and produces a new set of options 2288 // suitable for use in the next step of packet processing as informed by usage. 2289 // The original will not be touched. 2290 // 2291 // If there were no errors during parsing, the new set of options is returned as 2292 // a new buffer. 2293 func (e *endpoint) processIPOptions(pkt stack.PacketBufferPtr, opts header.IPv4Options, usage optionsUsage) (header.IPv4Options, optionTracker, *header.IPv4OptParameterProblem) { 2294 stats := e.stats.ip 2295 optIter := opts.MakeIterator() 2296 2297 // Except NOP, each option must only appear at most once (RFC 791 section 3.1, 2298 // at the definition of every type). 2299 // Keep track of each option we find to enable duplicate option detection. 2300 var seenOptions [math.MaxUint8 + 1]bool 2301 2302 // TODO(https://gvisor.dev/issue/4586): This will need tweaking when we start 2303 // really forwarding packets as we may need to get two addresses, for rx and 2304 // tx interfaces. We will also have to take usage into account. 2305 localAddress := e.MainAddress().Address 2306 if localAddress.BitLen() == 0 { 2307 h := header.IPv4(pkt.NetworkHeader().Slice()) 2308 dstAddr := h.DestinationAddress() 2309 if pkt.NetworkPacketInfo.LocalAddressBroadcast || header.IsV4MulticastAddress(dstAddr) { 2310 return nil, optionTracker{}, &header.IPv4OptParameterProblem{ 2311 NeedICMP: false, 2312 } 2313 } 2314 localAddress = dstAddr 2315 } 2316 2317 var optionsProcessed optionTracker 2318 for { 2319 option, done, optProblem := optIter.Next() 2320 if done || optProblem != nil { 2321 return optIter.Finalize(), optionsProcessed, optProblem 2322 } 2323 optType := option.Type() 2324 if optType == header.IPv4OptionNOPType { 2325 optIter.PushNOPOrEnd(optType) 2326 continue 2327 } 2328 if optType == header.IPv4OptionListEndType { 2329 optIter.PushNOPOrEnd(optType) 2330 return optIter.Finalize(), optionsProcessed, nil 2331 } 2332 2333 // check for repeating options (multiple NOPs are OK) 2334 if seenOptions[optType] { 2335 return nil, optionTracker{}, &header.IPv4OptParameterProblem{ 2336 Pointer: optIter.ErrCursor, 2337 NeedICMP: true, 2338 } 2339 } 2340 seenOptions[optType] = true 2341 2342 optLen, optProblem := func() (int, *header.IPv4OptParameterProblem) { 2343 switch option := option.(type) { 2344 case *header.IPv4OptionTimestamp: 2345 stats.OptionTimestampReceived.Increment() 2346 optionsProcessed.timestamp = true 2347 if usage.actions().timestamp != optionRemove { 2348 clock := e.protocol.stack.Clock() 2349 newBuffer := optIter.InitReplacement(option) 2350 optProblem := handleTimestamp(header.IPv4OptionTimestamp(newBuffer), localAddress, clock, usage) 2351 return len(newBuffer), optProblem 2352 } 2353 2354 case *header.IPv4OptionRecordRoute: 2355 stats.OptionRecordRouteReceived.Increment() 2356 optionsProcessed.recordRoute = true 2357 if usage.actions().recordRoute != optionRemove { 2358 newBuffer := optIter.InitReplacement(option) 2359 optProblem := handleRecordRoute(header.IPv4OptionRecordRoute(newBuffer), localAddress, usage) 2360 return len(newBuffer), optProblem 2361 } 2362 2363 case *header.IPv4OptionRouterAlert: 2364 stats.OptionRouterAlertReceived.Increment() 2365 optionsProcessed.routerAlert = true 2366 if usage.actions().routerAlert != optionRemove { 2367 newBuffer := optIter.InitReplacement(option) 2368 optProblem := handleRouterAlert(header.IPv4OptionRouterAlert(newBuffer)) 2369 return len(newBuffer), optProblem 2370 } 2371 2372 default: 2373 stats.OptionUnknownReceived.Increment() 2374 if usage.actions().unknown == optionPass { 2375 return len(optIter.InitReplacement(option)), nil 2376 } 2377 } 2378 return 0, nil 2379 }() 2380 2381 if optProblem != nil { 2382 optProblem.Pointer += optIter.ErrCursor 2383 return nil, optionTracker{}, optProblem 2384 } 2385 optIter.ConsumeBuffer(optLen) 2386 } 2387 }